0%

LSANet代码分析

发表于 2020-10-30 更新于 2021-12-14

前言

论文LSANet看的差不多，对其代码进行分析，看看是怎么实现论文中的模型。

1 2	论文地址：https://arxiv.org/abs/1905.05442 代码地址：https://github.com/LinZhuoChen/LSANet

SFE模块

pointnet_util.py

def sample_and_group(npoint, radius, nsample, xyz, points, bn, is_training, bn_decay, mlp, knn=False, use_xyz=True, xyz_feature=None, end=False, use_edge_feature=False):
    '''
    Input:
        npoint: int32 点的数量
        radius: float32 半径
        nsample: int32 采样点的数量
        xyz: (batch_size, ndataset, 3) TF tensor BxNx3的点的xyz信息
        points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points BxNxC的点的特征信息，如果为空则用xyz
        knn: bool, if True use kNN instead of radius search 如果为True则使用knn替代球查询
        use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
        xyz_feature: 上一个SFE即sample_and_group的输出Feature_out(xyz_feature)
    Output:
        new_xyz: (batch_size, npoint, 3) TF tensor
        new_points: (batch_size, npoint, nsample, 3+channel) TF tensor
        idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points
        output_feature: 图中的Feature_sfe
        xyz_feature: 图中的Feature_out
        grouped_xyz: 图中绿色的BxN'xKx3 (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs
            (subtracted by seed point XYZ) in local regions
    '''
    # new_xyz = (N,Fin)
    new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz))  # (batch_size, npoint, 3)
    if knn: #看用knn还是球查询
        _, idx = knn_point(nsample, xyz, new_xyz)
    else:
        idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)
    grouped_xyz = group_point(xyz, idx)  # (batch_size, npoint, nsample, 3) = (b,N',K,3)
    grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1, 1, nsample, 1])  # translation normalization

    if points is not None:
        grouped_points = group_point(points, idx)  # (batch_size, npoint, nsample, channel) = (b,N',K,Fin)
        if use_xyz:
            new_points = tf.concat([grouped_xyz, grouped_points], axis=-1)  # (batch_size, npoint, nample, 3+channel)
        else:
            new_points = grouped_points
    else:
        new_points = grouped_xyz # new_points = 拼接特征和坐标的结果 #这里不是SFE

    ############################# 对比pointnet++增加的部分
    # 使用edge特征
    if use_edge_feature == False:
        return new_xyz, new_points, idx, grouped_xyz
    # [batch_size, npoint, 1, F]
    if xyz_feature == None:
        xyz_feature = xyz

    xyz_feature = group_point(xyz_feature, idx) # xyz_feature = (N',K,Fin) 若xyz_feature == None所以和下面的edge_feature一样
    edge_feature = grouped_xyz # (batch_size, npoint, nsample, 3) = (N',K,3) 图中绿色部分
    for i, num_out_channel in enumerate(mlp):
        edge_feature = tf_util.conv2d(edge_feature, num_out_channel, [1, 1],
                                      padding='VALID', stride=[1, 1],
                                      bn=bn, is_training=is_training,
                                      scope='xyz_feature_%d' % (i), bn_decay=bn_decay)
    output_feature = tf.concat([xyz_feature, edge_feature], axis=-1) # 拼接两个特征，在进行mlp和maxpool
    if end == False:
        xyz_feature = tf_util.conv2d(output_feature, mlp[-1], [1, 1],
                                     padding='VALID', stride=[1, 1],
                                     bn=bn, is_training=is_training,
                                     scope='xyz_feature2', bn_decay=bn_decay)
        # we can try sum and mean
        xyz_feature = tf.reduce_max(xyz_feature, axis=[2], keep_dims=True, name='maxpool')
        xyz_feature = tf.squeeze(xyz_feature, [2])
    
    ############################# output_feature是Feature_sfe，xyz_feature是Feature_out
    return new_xyz, new_points, idx, output_feature, xyz_feature, grouped_xyz

LSA layer

SDWs模块

整体结构

LSA layer：在下面代码函数中，会调用SFE的代码即sample_and_group()函数，随之调用LSA layer模块，也就是说LSA_layer()函数包括了SFE和LSA layer两个操作，在model的代码中通过定义多个层数来进行图示的多个layer的操作

def LSA_layer(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay,
                          scope, xyz_feature=None, bn=True, pooling='max', knn=False, use_xyz=True, use_nchw=False,
                          end=False):
    ''' LSA layer
        Input:
            xyz: (batch_size, ndataset, 3) TF tensor BxNx3的点的xyz信息
            points: (batch_size, ndataset, channel) TF tensor BxNxC的点的特征信息，如果为空则用xyz
            npoint: int32 -- #points sampled in farthest point sampling 点的数量
            radius: float32 -- search radius in local region 采样分组的半径
            nsample: int32 -- how many points in each local region 采样点的数量
            mlp: list of int32 -- output size for MLP on each point 每个点在mlp中输出的通道大小
            mlp2: list of int32 -- output size for MLP on each region 每个局部区域在mlp中输出的通道大小
            group_all: bool -- group all points into one PC if set true, OVERRIDE
                npoint, radius and nsample settings
            is_training: bool -- whether train this LSA layer
            bn_decay: float32 -- batch norm decay
            scope: scope in tensorflow
            xyz_feature: float32 -- feature from SFE 即是Feature_out
            use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
            use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format
        Return:
            new_xyz: (batch_size, npoint, 3) TF tensor 新的xyz
            new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor 新的points
            idx: (batch_size, npoint, nsample) int32 -- indices for local regions 局部点的索引
            xyz_feature: LSA layer 的Feature_out
    '''
    data_format = 'NCHW' if use_nchw else 'NHWC'
    with tf.variable_scope(scope) as sc:
        # Sample and Grouping 采样并分组
        if group_all:
            nsample = xyz.get_shape()[1].value
            new_xyz, new_points, idx, output_feature, xyz_feature, grouped_xyz = sample_and_group_all(xyz, points, bn, is_training, bn_decay, mlp2, use_xyz, xyz_feature, end, use_edge_feature=True)
        else:
            new_xyz, new_points, idx, output_feature, xyz_feature, grouped_xyz = sample_and_group(npoint, radius, nsample, xyz, points, bn, is_training, bn_decay, mlp2, knn, use_xyz, xyz_feature, end, use_edge_feature=True)
        # xyz Feature Embedding
        # 这里将output_feature和新点云的信息拼接到一起了 总体架构中的C操作
        new_points = tf.concat([new_points, output_feature], axis=-1)

        # SDWs模块 
        channel = new_points.get_shape()[-1].value
        ## spatial encoder
        attention_xyz_1 = tf_util.conv2d(grouped_xyz, 64, [1, 1],
                                         padding='VALID', stride=[1, 1],
                                         bn=bn, is_training=is_training,
                                         scope='xyz_attention_1', bn_decay=bn_decay,
                                         data_format=data_format) # Spi
        ## Region Spatial Encoder
        attention_xyz_2 = tf_util.conv2d(grouped_xyz, 64, [1, 1],
                                         padding='VALID', stride=[1, 1],
                                         bn=bn, is_training=is_training,
                                         scope='xyz_attention_2', bn_decay=bn_decay,
                                         data_format=data_format)
        attention_xyz_2 = tf.reduce_mean(attention_xyz_2, axis=[2], keep_dims=True, name='meanpool')
        attention_xyz_2 = tf.tile(attention_xyz_2, [1, 1, nsample, 1]) # Sg
        attention_xyz = tf.concat([attention_xyz_1, attention_xyz_2], axis=-1) #Si
        ## shared MLP+MLP_
        for i, num_out_channel in enumerate(mlp):
            # 假设是第一轮循环
            ## shared MLP_1(in LSA hierarchical encoding)
            new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1],
                                        padding='VALID', stride=[1, 1],
                                        bn=bn, is_training=is_training,
                                        scope='conv%d' % (i), bn_decay=bn_decay,
                                        data_format=data_format) 
            ## shared MLP(in SDWs)
            attention_xyz = tf_util.conv2d(attention_xyz, num_out_channel, [1, 1],
                                        padding='VALID', stride=[1, 1],
                                        bn=bn, is_training=is_training,
                                        scope='xyz_attention%d' % (i), bn_decay=bn_decay,
                                        data_format=data_format, activation_fn=tf.sigmoid)
            # LSA layer中的X操作，即元素相乘
            new_points = tf.multiply(new_points, attention_xyz)
        # LSA pooling
        new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool2')

        new_points = tf.squeeze(new_points, [2])  # (batch_size, npoints, mlp2[-1]) # 删掉为1的维度
        # new_points = tf.concat([new_points, xyz_feature], axis=-1)
        return new_xyz, new_points, idx, xyz_feature

细节分析