赞
踩
分割任务
分为Set Abstraction layers特征提取 , Feature Propagation layers特征传递 , FC layers全链接 三个模块。一个类似unet的结构,整个分割网络的代码如下:
# Set Abstraction layers
l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
# Feature Propagation layers
l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer1')
l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer2')
l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat([l0_xyz,l0_points],axis=-1), l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer3')
# FC layers
net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
end_points['feats'] = net
net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2')
采用反距离加权插值(把距离的倒数作为weight)。这种插值输入(N, D),输出(N’, D),保证输入的特征维度不变。
白色代表输入,绿色代表输出。
代码如下:
def pointnet_fp_module(xyz1, xyz2, points1, points2, mlp, is_training, bn_decay, scope, bn=True): ''' PointNet Feature Propogation (FP) Module Input: xyz1: (batch_size, ndataset1, 3) TF tensor xyz2: (batch_size, ndataset2, 3) TF tensor, sparser than xyz1 points1: (batch_size, ndataset1, nchannel1) TF tensor points2: (batch_size, ndataset2, nchannel2) TF tensor mlp: list of int32 -- output size for MLP on each point Return: new_points: (batch_size, ndataset1, mlp[-1]) TF tensor ''' with tf.variable_scope(scope) as sc: dist, idx = three_nn(xyz1, xyz2) dist = tf.maximum(dist, 1e-10) norm = tf.reduce_sum((1.0/dist),axis=2,keep_dims=True) norm = tf.tile(norm,[1,1,3]) weight = (1.0/dist) / norm #weight is the inverse of distance # interpolate interpolated_points = three_interpolate(points2, idx, weight) if points1 is not None: new_points1 = tf.concat(axis=2, values=[interpolated_points, points1]) # B,ndataset1,nchannel1+nchannel2 else: new_points1 = interpolated_points new_points1 = tf.expand_dims(new_points1, 2) for i, num_out_channel in enumerate(mlp): new_points1 = tf_util.conv2d(new_points1, num_out_channel, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv_%d'%(i), bn_decay=bn_decay) new_points1 = tf.squeeze(new_points1, [2]) # B,ndataset1,mlp[-1] return new_points1
segmentation部分示意图
class PointNetFeaturePropagation(nn.Module): def __init__(self, in_channel, out_channel, blocks=1, groups=1, res_expansion=1.0, bias=True, activation='relu'): super(PointNetFeaturePropagation, self).__init__() self.fuse = ConvBNReLU1D(in_channel, out_channel, 1, bias=bias) self.extraction = PosExtraction(out_channel, blocks, groups=groups, res_expansion=res_expansion, bias=bias, activation=activation) def forward(self, xyz1, xyz2, points1, points2): """ Input: xyz1: input points position data, [B, N, 3] xyz2: sampled input points position data, [B, S, 3] points1: input points data, [B, D', N] points2: input points data, [B, D'', S] Return: new_points: upsampled points data, [B, D''', N] """ # xyz1 = xyz1.permute(0, 2, 1) # xyz2 = xyz2.permute(0, 2, 1) points2 = points2.permute(0, 2, 1) B, N, C = xyz1.shape _, S, _ = xyz2.shape if S == 1: interpolated_points = points2.repeat(1, N, 1) else: dists = square_distance(xyz1, xyz2) dists, idx = dists.sort(dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = torch.sum(dist_recip, dim=2, keepdim=True) weight = dist_recip / norm interpolated_points = torch.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2) if points1 is not None: points1 = points1.permute(0, 2, 1) new_points = torch.cat([points1, interpolated_points], dim=-1) else: new_points = interpolated_points new_points = new_points.permute(0, 2, 1) new_points = self.fuse(new_points) new_points = self.extraction(new_points) return new_points class PointMLP(nn.Module): def __init__(self, num_classes=50,points=2048, embed_dim=64, groups=1, res_expansion=1.0, activation="relu", bias=True, use_xyz=True, normalize="anchor", dim_expansion=[2, 2, 2, 2], pre_blocks=[2, 2, 2, 2], pos_blocks=[2, 2, 2, 2], k_neighbors=[32, 32, 32, 32], reducers=[4, 4, 4, 4], de_dims=[512, 256, 128, 128], de_blocks=[2,2,2,2], gmp_dim=64,cls_dim=64, **kwargs): super(PointMLP, self).__init__() self.stages = len(pre_blocks) self.class_num = num_classes self.points = points #points=2048 self.embedding = ConvBNReLU1D(6, embed_dim, bias=bias, activation=activation) assert len(pre_blocks) == len(k_neighbors) == len(reducers) == len(pos_blocks) == len(dim_expansion), \ "Please check stage number consistent for pre_blocks, pos_blocks k_neighbors, reducers." self.local_grouper_list = nn.ModuleList() self.pre_blocks_list = nn.ModuleList() self.pos_blocks_list = nn.ModuleList() last_channel = embed_dim ##embed_dim=64 anchor_points = self.points #points=2048 en_dims = [last_channel] ####相比cls任务多出来的变量 en_dims=64 ### Building Encoder ##### for i in range(len(pre_blocks)): # len(pre_blocks)=4 out_channel = last_channel * dim_expansion[i] #每次输出维度都以2倍扩大 dim_expansion=[2, 2, 2, 2] pre_block_num = pre_blocks[i] pos_block_num = pos_blocks[i] kneighbor = k_neighbors[i] #k_neighbors=[32, 32, 32, 32] reduce = reducers[i] #reducers=[4, 4, 4, 4] anchor_points[2048 1024 512 256] anchor_points = anchor_points // reduce # append local_grouper_list local_grouper = LocalGrouper(last_channel, anchor_points, kneighbor, use_xyz, normalize) # [b,g,k,d] last_channel [64 128 256 512] 不是实际channel 作用不大 self.local_grouper_list.append(local_grouper) # append pre_block_list pre_block_module = PreExtraction(last_channel, out_channel, pre_block_num, groups=groups, res_expansion=res_expansion, bias=bias, activation=activation, use_xyz=use_xyz) self.pre_blocks_list.append(pre_block_module) # append pos_block_list pos_block_module = PosExtraction(out_channel, pos_block_num, groups=groups, res_expansion=res_expansion, bias=bias, activation=activation) self.pos_blocks_list.append(pos_block_module) last_channel = out_channel en_dims.append(last_channel) ### [embed_dim,128,256,512,1024] embed=64 seg和cls不一样的行,从此行开始,把每个输出维度都存了起来(输出维度都以两倍增大) ### Building Decoder ##### self.decode_list = nn.ModuleList() en_dims.reverse() # en_dims=[1024,512,256,128,64] de_dims.insert(0,en_dims[0]) # de_dims=[1024, 512, 256, 128, 128] assert len(en_dims) ==len(de_dims) == len(de_blocks)+1 for i in range(len(en_dims)-1): #len(en_dims)-1 = 4 4次循环 self.decode_list.append( PointNetFeaturePropagation(de_dims[i]+en_dims[i+1], de_dims[i+1], blocks=de_blocks[i], groups=groups, res_expansion=res_expansion, bias=bias, activation=activation) ) #第一次:input channel:1024+512 output channel:512 #第二次:input channel:512+256 output channel:256 #第三次:input channel:256+128 output channel:128 #第四次:input channel:128+62 output channel:128 self.act = get_activation(activation) # class label mapping self.cls_map = nn.Sequential( #类别有16类,所以初始通道数16 cls_dim=64 ConvBNReLU1D(16, cls_dim, bias=bias, activation=activation), ConvBNReLU1D(cls_dim, cls_dim, bias=bias, activation=activation) ) # global max pooling mapping self.gmp_map_list = nn.ModuleList() for en_dim in en_dims:# en_dims=[1024,512,256,128,64] gmp_dim=64 self.gmp_map_list.append(ConvBNReLU1D(en_dim, gmp_dim, bias=bias, activation=activation)) self.gmp_map_end = ConvBNReLU1D(gmp_dim*len(en_dims), gmp_dim, bias=bias, activation=activation) # classifier self.classifier = nn.Sequential( #gmp_dim=64 cls_dim=64 de_dims=[1024, 512, 256, 128, 128] nn.Conv1d(gmp_dim+cls_dim+de_dims[-1], 128, 1, bias=bias), # 输入64+64+128 输出128 nn.BatchNorm1d(128), nn.Dropout(), nn.Conv1d(128, num_classes, 1, bias=bias) ) self.en_dims = en_dims def forward(self, x, norm_plt, cls_label): #norm_plt应该指norm xyz = x.permute(0, 2, 1) x = torch.cat([x,norm_plt],dim=1) #xyz和norm的结合 通道数6 x = self.embedding(x) # B,D,N #通道数6 ->embed_dim=64 xyz_list = [xyz] # [B, N, 3] x_list = [x] # [B, D, N] # here is the encoder # self.stages=4 pre_block_module:[128 256 512 1024] # local_grouper_list output channel:[64 128 256 512] anchor_points[2048 1024 512 256] # 第一次 x input channel:64 output channel:128 # 第二次 x input channel:128 output channel:256 # 第三次 x input channel:256 output channel:512 # 第四次 x input channel:512 output channel:1024 for i in range(self.stages): # Give xyz[b, p, 3] and fea[b, p, d], return new_xyz[b, g, 3] and new_fea[b, g, k, d] xyz, x = self.local_grouper_list[i](xyz, x.permute(0, 2, 1)) # [b,g,3] [b,g,k,d] x = self.pre_blocks_list[i](x) # [b,d,g] x = self.pos_blocks_list[i](x) # [b,d,g] xyz_list.append(xyz) # 每次采样后的xyz都存在xyz_list x_list.append(x) #经过pre和pos处理的数据x 每个stage的x都存在x_list [128 256 512 1024] # here is the decoder xyz_list.reverse() x_list.reverse() #x_list [1024 512 256 128] x = x_list[0] for i in range(len(self.decode_list)): x = self.decode_list[i](xyz_list[i+1], xyz_list[i], x_list[i+1],x) # here is the global context gmp_list = [] for i in range(len(x_list)): gmp_list.append(F.adaptive_max_pool1d(self.gmp_map_list[i](x_list[i]), 1)) global_context = self.gmp_map_end(torch.cat(gmp_list, dim=1)) # [b, gmp_dim, 1] #here is the cls_token cls_token = self.cls_map(cls_label.unsqueeze(dim=-1)) # [b, cls_dim, 1] x = torch.cat([x, global_context.repeat([1, 1, x.shape[-1]]), cls_token.repeat([1, 1, x.shape[-1]])], dim=1) x = self.classifier(x) x = F.log_softmax(x, dim=1) x = x.permute(0, 2, 1) return x
class PAConv(nn.Module): def __init__(self, args, num_part): super(PAConv, self).__init__() # baseline args: self.args = args self.num_part = num_part # PAConv args: self.k = args.get('k_neighbors', 30) self.calc_scores = args.get('calc_scores', 'softmax') self.hidden = args.get('hidden', [[16], [16], [16], [16]]) # the hidden layers of ScoreNet self.m2, self.m3, self.m4, self.m5 = args.get('num_matrices', [8, 8, 8, 8]) self.scorenet2 = ScoreNet(10, self.m2, hidden_unit=self.hidden[0]) self.scorenet3 = ScoreNet(10, self.m3, hidden_unit=self.hidden[1]) self.scorenet4 = ScoreNet(10, self.m4, hidden_unit=self.hidden[2]) self.scorenet5 = ScoreNet(10, self.m5, hidden_unit=self.hidden[3]) i2 = 64 # channel dim of input_2nd o2 = i3 = 64 # channel dim of output_2st and input_3rd o3 = i4 = 64 # channel dim of output_3rd and input_4th o4 = i5 = 64 # channel dim of output_4th and input_5th o5 = 64 # channel dim of output_5th tensor2 = nn.init.kaiming_normal_(torch.empty(self.m2, i2 * 2, o2), nonlinearity='relu') \ .permute(1, 0, 2).contiguous().view(i2 * 2, self.m2 * o2) tensor3 = nn.init.kaiming_normal_(torch.empty(self.m3, i3 * 2, o3), nonlinearity='relu') \ .permute(1, 0, 2).contiguous().view(i3 * 2, self.m3 * o3) tensor4 = nn.init.kaiming_normal_(torch.empty(self.m4, i4 * 2, o4), nonlinearity='relu') \ .permute(1, 0, 2).contiguous().view(i4 * 2, self.m4 * o4) tensor5 = nn.init.kaiming_normal_(torch.empty(self.m5, i5 * 2, o5), nonlinearity='relu') \ .permute(1, 0, 2).contiguous().view(i4 * 2, self.m5 * o5) self.matrice2 = nn.Parameter(tensor2, requires_grad=True) self.matrice3 = nn.Parameter(tensor3, requires_grad=True) self.matrice4 = nn.Parameter(tensor4, requires_grad=True) self.matrice5 = nn.Parameter(tensor5, requires_grad=True) self.bn2 = nn.BatchNorm1d(64, momentum=0.1) self.bn3 = nn.BatchNorm1d(64, momentum=0.1) self.bn4 = nn.BatchNorm1d(64, momentum=0.1) self.bn5 = nn.BatchNorm1d(64, momentum=0.1) self.bnt = nn.BatchNorm1d(1024, momentum=0.1) self.bnc = nn.BatchNorm1d(64, momentum=0.1) self.bn6 = nn.BatchNorm1d(256, momentum=0.1) self.bn7 = nn.BatchNorm1d(256, momentum=0.1) self.bn8 = nn.BatchNorm1d(128, momentum=0.1) self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=True), nn.BatchNorm2d(64, momentum=0.1)) self.convt = nn.Sequential(nn.Conv1d(64*5, 1024, kernel_size=1, bias=False), self.bnt) self.convc = nn.Sequential(nn.Conv1d(16, 64, kernel_size=1, bias=False), self.bnc) self.conv6 = nn.Sequential(nn.Conv1d(1088+64*5, 256, kernel_size=1, bias=False), self.bn6) self.dp1 = nn.Dropout(p=args.get('dropout', 0.4)) self.conv7 = nn.Sequential(nn.Conv1d(256, 256, kernel_size=1, bias=False), self.bn7) self.dp2 = nn.Dropout(p=args.get('dropout', 0.4)) self.conv8 = nn.Sequential(nn.Conv1d(256, 128, kernel_size=1, bias=False), self.bn8) self.conv9 = nn.Conv1d(128, num_part, kernel_size=1, bias=True) def forward(self, x, norm_plt, cls_label, gt=None): B, C, N = x.size() idx, _ = knn(x, k=self.k) # different with DGCNN, the knn search is only in 3D space xyz = get_scorenet_input(x, k=self.k, idx=idx) # ScoreNet input ################# # use MLP at the 1st layer, same with DGCNN x = get_graph_feature(x, k=self.k, idx=idx) x = x.permute(0, 3, 1, 2) # b,2cin,n,k x = F.relu(self.conv1(x)) x1 = x.max(dim=-1, keepdim=False)[0] ################# # replace the last 4 DGCNN-EdgeConv with PAConv: """CUDA implementation of PAConv: (presented in the supplementary material of the paper)""" """feature transformation:""" x2, center2 = feat_trans_dgcnn(point_input=x1, kernel=self.matrice2, m=self.m2) score2 = self.scorenet2(xyz, calc_scores=self.calc_scores, bias=0) """assemble with scores:""" x = assemble_dgcnn(score=score2, point_input=x2, center_input=center2, knn_idx=idx, aggregate='sum') x2 = F.relu(self.bn2(x)) x3, center3 = feat_trans_dgcnn(point_input=x2, kernel=self.matrice3, m=self.m3) score3 = self.scorenet3(xyz, calc_scores=self.calc_scores, bias=0) x = assemble_dgcnn(score=score3, point_input=x3, center_input=center3, knn_idx=idx, aggregate='sum') x3 = F.relu(self.bn3(x)) x4, center4 = feat_trans_dgcnn(point_input=x3, kernel=self.matrice4, m=self.m4) score4 = self.scorenet4(xyz, calc_scores=self.calc_scores, bias=0) x = assemble_dgcnn(score=score4, point_input=x4, center_input=center4, knn_idx=idx, aggregate='sum') x4 = F.relu(self.bn4(x)) x5, center5 = feat_trans_dgcnn(point_input=x4, kernel=self.matrice5, m=self.m5) score5 = self.scorenet5(xyz, calc_scores=self.calc_scores, bias=0) x = assemble_dgcnn(score=score5, point_input=x5, center_input=center5, knn_idx=idx, aggregate='sum') x5 = F.relu(self.bn5(x)) ############### xx = torch.cat((x1, x2, x3, x4, x5), dim=1) xc = F.relu(self.convt(xx)) #input channel:64*5 output channel: 1024 xc = F.adaptive_max_pool1d(xc, 1).view(B, -1) cls_label = cls_label.view(B, 16, 1) cls_label = F.relu(self.convc(cls_label)) #input channel:16 output channel:64 cls = torch.cat((xc.view(B, 1024, 1), cls_label), dim=1) #cat后 output channel:1088 cls = cls.repeat(1, 1, N) # B,1088,N x = torch.cat((xx, cls), dim=1) # 1088+64*3 x = F.relu(self.conv6(x)) x = self.dp1(x) x = F.relu(self.conv7(x)) x = self.dp2(x) x = F.relu(self.conv8(x)) x = self.conv9(x) x = F.log_softmax(x, dim=1) x = x.permute(0, 2, 1) # b,n,50 if gt is not None: return x, F.nll_loss(x.contiguous().view(-1, self.num_part), gt.view(-1, 1)[:, 0]) else: return x
参考:
【1】论文笔记:PointNet++论文代码讨论
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。