数据集转换
数据集转换的意义在于将原本的 txt
点云文件转换为更方便运算的npy
点云文件,同时,将原本的xyzrgb
这 6
个维度转换为xyzrgbc
,最后一个c维度代表该点云所属的类别。
for anno_path in anno_paths:
print(anno_path)
try:
elements = anno_path.split('/')
out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy
collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')
except:
print(anno_path, 'ERROR!!')
其中,anno_paths
的值如下:
['D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/conferenceRoom_1/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/conferenceRoom_2/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/copyRoom_1/Annotations',
'D:\\chat\\programs\\point2\\pooint2torch\\data\\s3dis\\Stanford3dDataset_v1.2_Aligned_Version\\Area_1/hallway_1/Annotations', ]
随后,进入collect_point_label
方法,首先根据每个点云目标的名称来获得其类别,并将其转换为对应的类别id
。
for f in glob.glob(os.path.join(anno_path, '*.txt')):
cls = os.path.basename(f).split('_')[0]
print(f)
if cls not in g_classes: # note: in some room there is 'staris' class..
cls = 'clutter'
points = np.loadtxt(f)
labels = np.ones((points.shape[0],1)) * g_class2label[cls]
points_list.append(np.concatenate([points, labels], 1)) # Nx7
以第一个Annotations
中的内容为例:
读取的第一个点云内容如下,其类别是beam
labels = np.ones((points.shape[0],1)) * g_class2label[cls]
生成对应的类别编号
最后将其拼接在一起即可:
最终该点云集下得到转换后的点云列表:
将List
转换为numpy
类型:
data_label = np.concatenate(points_list, 0)
点云集减去最小值:
xyz_min = np.amin(data_label, axis=0)[0:3]
data_label[:, 0:3] -= xyz_min
最终将这些点云保存为npy
类型
np.save(out_filename, data_label)
最终得到转换后的数据集如下:
模型训练
参数设置
点云类型,当我们更换数据集时也要修改
classes = ['ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 'table', 'chair', 'sofa', 'bookcase',
'board', 'clutter']
模型训练这块主要是参数配置:其中比较重要的是模型(model),batch_size,epoch,log_dir(保存路径)。test_area(测试集)
parser = argparse.ArgumentParser('Model')
parser.add_argument('--model', type=str, default='pointnet2_sem_seg_msg', help='model name [default: pointnet_sem_seg]')
parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
parser.add_argument('--epoch', default=1, type=int, help='Epoch to run [default: 32]')
parser.add_argument('--learning_rate', default=0.001, type=float, help='Initial learning rate [default: 0.001]')
parser.add_argument('--gpu', type=str, default='0', help='GPU to use [default: GPU 0]')
parser.add_argument('--optimizer', type=str, default='Adam', help='Adam or SGD [default: Adam]')
parser.add_argument('--log_dir', type=str, default="pointnet2_sem_seg_msg", help='Log path [default: None]')
parser.add_argument('--decay_rate', type=float, default=1e-4, help='weight decay [default: 1e-4]')
parser.add_argument('--npoint', type=int, default=4096, help='Point Number [default: 4096]')
parser.add_argument('--step_size', type=int, default=10, help='Decay step for lr decay [default: every 10 epochs]')
parser.add_argument('--lr_decay', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')
parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test, option: 1-6 [default: 5]')
此外,还需要注意的是,当我们换了数据集后,需要修改这个路径和类别数目
root = 'data/stanford_indoor3d/'
NUM_CLASSES = 13
数据集加载
加载训练集:
TRAIN_DATASET = S3DISDataset(split='train', data_root=root, num_point=NUM_POINT, test_area=args.test_area, block_size=1.0, sample_rate=1.0, transform=None)
训练集加载完成后信息如下,其中 room_idx
代表所属房间的id
。
room_labels
是一个列表,共有204
个场所,每个list
成员代表一个场所,其内为对应点云类别,代表每个场所内不同类型的点云:
使用pytorch框架加载训练集:
trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=BATCH_SIZE, shuffle=True, num_workers=0,
pin_memory=True, drop_last=True,
worker_init_fn=lambda x: np.random.seed(x + int(time.time())))
数据集各类别权重:
PointNet++结构与损失函数
加载模型与损失函数,这里我们可以看到其损失函数使用的是nll_loss
,其是一个分类损失:
total_loss = F.nll_loss(pred, target, weight=weight)
MODEL = importlib.import_module(args.model)
shutil.copy('models/%s.py' % args.model, str(experiment_dir))
shutil.copy('models/pointnet2_utils.py', str(experiment_dir))
classifier = MODEL.get_model(NUM_CLASSES).cuda()
criterion = MODEL.get_loss().cuda()
模型结构定义如下:
import torch.nn as nn
import torch.nn.functional as F
from models.pointnet2_utils import PointNetSetAbstractionMsg,PointNetFeaturePropagation
class get_model(nn.Module):
def __init__(self, num_classes):
super(get_model, self).__init__()
self.sa1 = PointNetSetAbstractionMsg(1024, [0.05, 0.1], [16, 32], 9, [[16, 16, 32], [32, 32, 64]])
self.sa2 = PointNetSetAbstractionMsg(256, [0.1, 0.2], [16, 32], 32+64, [[64, 64, 128], [64, 96, 128]])
self.sa3 = PointNetSetAbstractionMsg(64, [0.2, 0.4], [16, 32], 128+128, [[128, 196, 256], [128, 196, 256]])
self.sa4 = PointNetSetAbstractionMsg(16, [0.4, 0.8], [16, 32], 256+256, [[256, 256, 512], [256, 384, 512]])
self.fp4 = PointNetFeaturePropagation(512+512+256+256, [256, 256])
self.fp3 = PointNetFeaturePropagation(128+128+256, [256, 256])
self.fp2 = PointNetFeaturePropagation(32+64+256, [256, 128])
self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128])
self.conv1 = nn.Conv1d(128, 128, 1)
self.bn1 = nn.BatchNorm1d(128)
self.drop1 = nn.Dropout(0.5)
self.conv2 = nn.Conv1d(128, num_classes, 1)
def forward(self, xyz):
l0_points = xyz
l0_xyz = xyz[:,:3,:]
l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
l4_xyz, l4_points = self.sa4(l3_xyz, l3_points)
l3_points = self.fp4(l3_xyz, l4_xyz, l3_points, l4_points)
l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
l0_points = self.fp1(l0_xyz, l1_xyz, None, l1_points)
x = self.drop1(F.relu(self.bn1(self.conv1(l0_points))))
x = self.conv2(x)
x = F.log_softmax(x, dim=1)
x = x.permute(0, 2, 1)
return x, l4_points
class get_loss(nn.Module):
def __init__(self):
super(get_loss, self).__init__()
def forward(self, pred, target, trans_feat, weight):
total_loss = F.nll_loss(pred, target, weight=weight)
return total_loss
if __name__ == '__main__':
import torch
model = get_model(13)
xyz = torch.rand(8, 9, 2048)
(model(xyz))
这里博主在开始时具有困惑,为何传入的值xyz
的格式为(8,9,2048)
呢,不应该是(point_num,6)
吗?事实上,8代表的是batch-size
,9
为点云信息维度,2048
是点云数量。
加载最优模型
加载最优模型,保证是在最优模型基础上进行训练:
checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth')
start_epoch = checkpoint['epoch']
classifier.load_state_dict(checkpoint['model_state_dict'])
log_string('Use pretrain model')
except:
log_string('No existing model, starting training from scratch...')
start_epoch = 0
classifier = classifier.apply(weights_init)
设置优化器
优化器设置,默认即可
if args.optimizer == 'Adam':
optimizer = torch.optim.Adam(
classifier.parameters(),
lr=args.learning_rate,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=args.decay_rate
)
else:
optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9)
训练开始
开始迭代训练,下面的几个参数用于记录正确分类数量,总损失等,同时将模型开启训练
for epoch in range(start_epoch, args.epoch):
total_correct = 0
total_seen = 0
loss_sum = 0
classifier = classifier.train()
下面的代码是训练的核心部分,即完成加载数据集,将数据送入模型,计算损失,反向传播等功能,其中我们着重看一下数据在模型中是如何变化的。
for i, (points, target) in tqdm(enumerate(trainDataLoader), total=len(trainDataLoader), smoothing=0.9):
optimizer.zero_grad()#梯度清零,方便计算
points = points.data.numpy()#获取点云数据
points[:, :, :3] = provider.rotate_point_cloud_z(points[:, :, :3])
points = torch.Tensor(points)
points, target = points.float().cuda(), target.long().cuda()
points = points.transpose(2, 1)
seg_pred, trans_feat = classifier(points)
seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)
batch_label = target.view(-1, 1)[:, 0].cpu().data.numpy()
target = target.view(-1, 1)[:, 0]
loss = criterion(seg_pred, target, trans_feat, weights)
loss.backward()
optimizer.step()
pred_choice = seg_pred.cpu().data.max(1)[1].numpy()
correct = np.sum(pred_choice == batch_label)
total_correct += correct
total_seen += (BATCH_SIZE * NUM_POINT)
loss_sum += loss
首先,加载的点云(points
)与真值(targets
)如下:
其中,对于points
,16
是batch-size
,4096
是点云数量(我们在前一篇博客中说过,为了使输出的值统一,我们的输入值的数量也要统一,这里设置一个batch
中输入的点云数量为4096
),9
则是其点云维度。
这里的9的维度实际上是xyzrgb
,然后加上xyz标准化后的数据,该部分代码在\data_utils\S3DISDataLoader.py
中
current_points[:, 6] = selected_points[:, 0] / self.room_coord_max[room_idx][0]
current_points[:, 7] = selected_points[:, 1] / self.room_coord_max[room_idx][1]
current_points[:, 8] = selected_points[:, 2] / self.room_coord_max[room_idx][2]
selected_points[:, 0] = selected_points[:, 0] - center[0]
selected_points[:, 1] = selected_points[:, 1] - center[1]#不变z,即高度不变
selected_points[:, 3:6] /= 255.0#颜色
点云数据类型转换:
points = points.data.numpy()
points[:, :, :3] = provider.rotate_point_cloud_z(points[:, :, :3])#做旋转增广
points = torch.Tensor(points)
points, target = points.float().cuda(), target.long().cuda()
points = points.transpose(2, 1)
此时,points的格式如下:
将点云数据送入模型,得到的值如下:
seg_pred, trans_feat = classifier(points)
将预测值转换为(点云数量,类别)的形式
seg_pred = seg_pred.contiguous().view(-1, NUM_CLASSES)
16x4096=65536
损失计算
batch_label = target.view(-1, 1)[:, 0].cpu().data.numpy()
target = target.view(-1, 1)[:, 0]
loss = criterion(seg_pred, target, trans_feat, weights)
这里,虽然传入的值中含有 trans_feat
,但该值并不参与损失计算。
class get_loss(nn.Module):
def __init__(self):
super(get_loss, self).__init__()
def forward(self, pred, target, trans_feat, weight):
total_loss = F.nll_loss(pred, target, weight=weight)
return total_loss
传入的值如下:
随后进行反向传播等操作即可
loss.backward()
optimizer.step()
pred_choice = seg_pred.cpu().data.max(1)[1].numpy()
correct = np.sum(pred_choice == batch_label)
total_correct += correct
total_seen += (BATCH_SIZE * NUM_POINT)
loss_sum += loss
模型测试
在完成模型训练后,我们紧接着可以使用训练好的模型进行测试,点云分割所使用的评价指标是mIOU
在测试时,需要指定的参数如下:
parser = argparse.ArgumentParser('Model')
parser.add_argument('--batch_size', type=int, default=16, help='batch size in testing [default: 32]')
parser.add_argument('--gpu', type=str, default='0', help='specify gpu device')
parser.add_argument('--num_point', type=int, default=4096, help='point number [default: 4096]')
parser.add_argument('--log_dir', type=str,default="pointnet2_sem_seg_msg", help='experiment root')
parser.add_argument('--visual', action='store_true', default=False, help='visualize result [default: False]')
parser.add_argument('--test_area', type=int, default=5, help='area for testing, option: 1-6 [default: 5]')
parser.add_argument('--num_votes', type=int, default=3, help='aggregate segmentation scores with voting [default: 5]')
return parser.parse_args()
这里我们使用的测试集是Area5,其内有67个场所
num_batches = len(TEST_DATASET_WHOLE_SCENE)
total_seen_class = [0 for _ in range(NUM_CLASSES)]
total_correct_class = [0 for _ in range(NUM_CLASSES)]
total_iou_deno_class = [0 for _ in range(NUM_CLASSES)]
log_string('---- EVALUATION WHOLE SCENE----')
for batch_idx in range(num_batches):#开启循环测试
数据维度转换
这块主要是将测试集的点云进行维度转换,即将原本场景的点云变为(16,9,4096)
格式,挺绕的,这里就不详细介绍了。
获取第一个场所的点云数据,1047554
为第一个场景中点云的数量
whole_scene_data = TEST_DATASET_WHOLE_SCENE.scene_points_list[batch_idx]
模型推理
由于测试的时候并不是像训练那样随机采样block,而是需要把整个场景全部输入网络,所以用到了S3DISDataLoader.py中定义的ScannetDatasetWholeScene()来制作数据。具体来说是将一个房间按给定步长网格化,然后有重叠的移动block进行点的采样,和训练的时候一样,block中的点如果不足4096,就重复采样一些点。这样在每个block内部一般都会有数个小的batch,将每个batch输入网络进行预测得到相应的预测分数进行保存,最后计算IOU,并将每个点类别信息和语义标签的颜色信息进行关联,然后一同写入文件。
最终,经过一系列转换,得到输入模型的数据维度依旧为(16,9,4096)
seg_pred, _ = classifier(torch_data)
batch_pred_label = seg_pred.contiguous().cpu().data.max(2)[1].numpy()
点云输出结果转换为对应的类别
组合点云和类别,在先前为方便运算,将场景中的点云进行了切分,每个batch
含有4096
个,在完成对4096
个点的分类后,将其组合起来,即恢复为原来的场景。
vote_label_pool = add_vote(vote_label_pool, batch_point_index[0:real_batch_size, ...],
batch_pred_label[0:real_batch_size, ...],
batch_smpw[0:real_batch_size, ...])
def add_vote(vote_label_pool, point_idx, pred_label, weight):
B = pred_label.shape[0]
N = pred_label.shape[1]
for b in range(B):
for n in range(N):
if weight[b, n] != 0 and not np.isinf(weight[b, n]):
vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1
return vote_label_pool
传入的数据如下:
将点云取最值作为类别
pred_label = np.argmax(vote_label_pool, 1)
可以看到,第一个场景中每个点的类别已经分配好了,其点云的数量与原本的点云数量一致
在这里就已经完成了对点云的分类,将其与xyz
组合后,根据类别设置对应的颜色,也就完成了点云分割,这部分即推理过程。
这里我们可以举个简单例子,即将原本的点云数据与其预测的类别对应,代码如下:
import open3d as o3d
import numpy as np
# 加载3D点云数据和分割预测结果
point_cloud = np.load('Area_5_conferenceRoom_1.npy')
segmentation = np.load('Area_5_conferenceRoom_1_c.npy')
# 设置点的颜色和大小
colors = [[0,255,0],
[0,0,255],
[0,255,255],
[255,255,0],
[255,0,255],
[100,100,255],
[200,200,100],
[170,120,200],
[255,0,0],
[200,100,100],
[10,200,100],
[200,200,200],
[50,50,50]]
point_xyz=point_cloud[:,0:3]
segmentation=np.array(segmentation,dtype=int)
point_color=np.array(colors)[segmentation]
pcd=o3d.geometry.PointCloud()
pcd.points=o3d.utility.Vector3dVector(point_xyz)
pcd.colors=o3d.utility.Vector3dVector(point_color)
o3d.visualization.draw_geometries([pcd])
mIOU计算
for l in range(NUM_CLASSES):
total_seen_class_tmp[l] += np.sum((whole_scene_label == l))
total_correct_class_tmp[l] += np.sum((pred_label == l) & (whole_scene_label == l))
total_iou_deno_class_tmp[l] += np.sum(((pred_label == l) | (whole_scene_label == l)))
total_seen_class[l] += total_seen_class_tmp[l]
total_correct_class[l] += total_correct_class_tmp[l]
total_iou_deno_class[l] += total_iou_deno_class_tmp[l]
iou_map = np.array(total_correct_class_tmp) / (np.array(total_iou_deno_class_tmp, dtype=float) + 1e-6)
print(iou_map)
arr = np.array(total_seen_class_tmp)
tmp_iou = np.mean(iou_map[arr != 0])
log_string('Mean IoU of %s: %.4f' % (scene_id[batch_idx], tmp_iou))
print('----------------------------')
filename = os.path.join(visual_dir, scene_id[batch_idx] + '.txt')
with open(filename, 'w') as pl_save:
for i in pred_label:
pl_save.write(str(int(i)) + '\n')
pl_save.close()
for i in range(whole_scene_label.shape[0]):
color = g_label2color[pred_label[i]]
color_gt = g_label2color[whole_scene_label[i]]
if args.visual:
fout.write('v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color[0], color[1],
color[2]))
fout_gt.write(
'v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color_gt[0],
color_gt[1], color_gt[2]))
if args.visual:
fout.close()
fout_gt.close()
至此,我们完成了PointNet++
模型的流程梳理。
模型推理可视化
在测试时,我们保存了点云预测的类别,我们讲解了如何将其可视化,事实上,在pointnet++
中,可以选择是否进行可视化,将可视化参数设置为True
即可:
parser.add_argument('--visual', default=True, help='visualize result [default: False]')
if args.visual:
fout = open(os.path.join(visual_dir, scene_id[batch_idx] + '_pred.obj'), 'w')
fout_gt = open(os.path.join(visual_dir, scene_id[batch_idx] + '_gt.obj'), 'w')
存放在pooint2torch\log\sem_seg\pointnet2_sem_seg_msg\visua
l路径下:
其中,txt
中只有类别,而obj
文件则根据类别编号设置了对应的颜色,其效果与我们前面自己写的可视化相同。
然而,我们发现结果中第一列有一个v
字符,这会影响到模型读取,应该将其去掉
读取代码如下,将前面的v
去掉即可
if args.visual:
fout.write('v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color[0], color[1],
color[2]))
fout_gt.write('v %f %f %f %d %d %d\n' % (
whole_scene_data[i, 0], whole_scene_data[i, 1], whole_scene_data[i, 2], color_gt[0],
color_gt[1], color_gt[2]))
生成文件后,发现在使用open3d
进行可视化时会有问题,大概是格式的问题,这里我们可以让其生成txt
格式的文件,之后再将其转换为numpy
或其他格式即可。