赞
踩
由于tensorflow编译C++的api比较麻烦,此次部署的pointnet代码的Python版本为Pytorch编写的。
代码:Pointnet_Pointnet2_pytorch
环境配置:win10系统
cuda10.1+cudnn7.5+Python3.6.5+pytorch1.5.0+libtorch1.5.0+VS2017
或者libtorch1.4.0+VS2015
软件下载和配置过程在此不赘述。
2023/04/15更新:新增semantic segmentation部分,统一代码风格等。
2023/05/15更新:由于gpu换为RTX30系,更换环境配置为cuda11.1+cudnn8.0.4+python3.7.16+pytorch1.8.0+libtorch1.8.0+VS2019,以及torchscipt转换文件下载更新。
ps:30系gpu只能安装cuda11以上,其他配置pytorch和libtorch的版本对应即可(貌似高版本libtorch兼容低版本pytorch,但有概率会出问题),libtorch1.5.0(需要支持C++14)之后不兼容VS2015。别问LZ怎么知道的,这些都是泪呀。
pytorch训练得到的pth文件转libtorch使用的pt文件脚本(以分10类,gpu版本为例):
torchscript.py
import torch
import pointnet_cls
point_num = 1024
class_num = 10
normal_channel = False
model = pointnet_cls.get_model(class_num , normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('cls.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
traced_script_module = torch.jit.trace(model, x)
traced_script_module.save("cls.pt")
python推理代码:
import numpy as np
import torch
import pointnet_cls
point_num = 1024
class_num = 10
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
data = np.loadtxt('./bed_0610.txt', delimiter=',').astype(np.float32)
point_set = data[:, 0:3]
point_set = point_set[0:point_num, :]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
points = torch.from_numpy(point_set)
points = torch.reshape(points,((1, point_num, 3)))
model = pointnet_cls.get_model(class_num, normal_channel=False)
model = model.cuda()
checkpoint = torch.load('cls.pth')
model.load_state_dict(checkpoint['model_state_dict'])
with torch.no_grad():
model = model.eval()
points = points.transpose(2, 1).cuda()
pred, _ = model(points)
pred_choice = pred.data.max(1)[1]
pred_list = pred_choice.cpu().numpy().tolist()
print(pred_list)
C++推理代码:
#include <iostream>
#include <vector>
#include <fstream>
#include <torch/script.h>
const int point_num = 1024;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void classfier(std::vector<float> & points)
{
torch::Tensor points_tensor = torch::from_blob(points.data(), { 1, point_num, 3 }, torch::kFloat);
points_tensor = points_tensor.to(torch::kCUDA);
points_tensor = points_tensor.permute({ 0, 2, 1 });
torch::jit::script::Module module = torch::jit::load("cls.pt");
module.to(torch::kCUDA);
auto outputs = module.forward({ points_tensor }).toTuple();
torch::Tensor out0 = outputs->elements()[0].toTensor();
std::cout << out0 << std::endl;
auto max_classes = out0.max(1);
auto max_index = std::get<1>(max_classes).item<int>();
std::cout << max_index << std::endl;
}
int main()
{
std::vector<float> points;
float x, y, z, nx, ny, nz;
char ch;
std::ifstream infile("bed_0610.txt");
for (size_t i = 0; i < point_num; i++)
{
infile >> x >> ch >> y >> ch >> z >> ch >> nx >> ch >> ny >> ch >> nz;
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
infile.close();
pc_normalize(points);
classfier(points);
return 0;
}
预测结果:
预测类别为1,在names.txt中对应为bed,结果正确。
C++推理速度稳定在不到0.2s,相比Python推理速度1~2s快了很多。
pytorch训练得到的pth文件转libtorch使用的pt文件脚本(以16类物体分成50部分,gpu版本为例):
torchscript.py
import torch
import pointnet_part_seg
point_num = 2048
class_num = 16
part_num = 50
normal_channel = False
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = torch.eye(class_num)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
model = pointnet_part_seg.get_model(part_num, normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('part_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
label = torch.randint(0, 1, (1, 1))
label = label.cuda() #cpu版本需注释此句
traced_script_module = torch.jit.trace(model, (x, to_categorical(label, class_num)))
traced_script_module.save("part_seg.pt")
python推理代码:
import torch
import numpy as np
import pointnet_part_seg
point_num = 2048
class_num = 16
part_num = 50
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = torch.eye(class_num)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
data = np.loadtxt('85a15c26a6e9921ae008cc4902bfe3cd.txt').astype(np.float32)
point_set = data[:, 0:3]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
choice = np.random.choice(point_set.shape[0], point_num, replace=True)
point_set = point_set[choice, :][:, 0:3]
pts = point_set
points = torch.from_numpy(point_set)
points = torch.reshape(points,((1, point_num, 3)))
label = torch.tensor([[0]], dtype=torch.int32)
model = pointnet_part_seg.get_model(part_num, normal_channel=False)
model = model.cuda()
checkpoint = torch.load('part_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
with torch.no_grad():
model = model.eval()
points, label = points.float().cuda(), label.long().cuda()
cloud = points.cpu().data.numpy()
points = points.transpose(2, 1)
seg_pred, _ = model(points, to_categorical(label, class_num))
cur_pred_val = seg_pred.cpu().data.numpy()
cur_pred_val_logits = cur_pred_val
cur_pred_val = np.zeros((1, point_num)).astype(np.int32)
logits = cur_pred_val_logits[0, :, :]
cur_pred_val[0, :] = np.argmax(logits, 1)
pts = np.append(cloud.reshape(point_num, 3), cur_pred_val[0, :].reshape(point_num, 1), 1)
np.savetxt('pred.txt', pts, fmt='%.06f')
C++推理代码:
#include <iostream>
#include <vector>
#include <fstream>
#include <torch/script.h>
const int point_num = 2048;
const int class_num = 16;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void resample(std::vector<float> & points)
{
srand((int)time(0));
std::vector<int> choice(point_num);
for (size_t i = 0; i < point_num; i++)
{
choice[i] = rand() % (points.size() / 3);
}
std::vector<float> temp_points(3 * point_num);
for (size_t i = 0; i < point_num; i++)
{
temp_points[3 * i] = points[3 * choice[i]];
temp_points[3 * i + 1] = points[3 * choice[i] + 1];
temp_points[3 * i + 2] = points[3 * choice[i] + 2];
}
points = temp_points;
}
at::Tensor classfier(std::vector<float> & points, std::vector<float> & labels)
{
torch::Tensor points_tensor = torch::from_blob(points.data(), { 1, point_num, 3 }, torch::kFloat);
torch::Tensor labels_tensor = torch::from_blob(labels.data(), { 1, 1, class_num }, torch::kFloat);
points_tensor = points_tensor.to(torch::kCUDA);
points_tensor = points_tensor.permute({ 0, 2, 1 });
labels_tensor = labels_tensor.to(torch::kCUDA);
torch::jit::script::Module module = torch::jit::load("part_seg.pt");
module.to(torch::kCUDA);
auto outputs = module.forward({ points_tensor, labels_tensor }).toTuple();
torch::Tensor out0 = outputs->elements()[0].toTensor();
out0 = torch::squeeze(out0);
auto max_classes = out0.max(1);
auto max_result = std::get<0>(max_classes);
auto max_index = std::get<1>(max_classes);
return max_index;
}
int main()
{
std::vector<float> points, labels;
float x, y, z, nx, ny, nz, label;
std::ifstream infile("85a15c26a6e9921ae008cc4902bfe3cd.txt");
while (infile >> x >> y >> z >> nx >> ny >> nz >> label)
{
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
for (size_t i = 0; i < class_num; i++)
{
labels.push_back(0.0);
}
labels[0] = 1.0;
infile.close();
pc_normalize(points);
resample(points);
at::Tensor result = classfier(points, labels);
std::fstream outfile("pred.txt", 'w');
for (size_t i = 0; i < point_num; i++)
{
outfile << points[3 * i] << " " << points[3 * i + 1] << " " << points[3 * i + 2] << " " << result[i].item<int>() << std::endl;
}
outfile.close();
return 0;
}
预测结果:
pytorch训练得到的pth文件转libtorch使用的pt文件脚本(以13类物体,gpu版本为例):
torchscript.py
import torch
import pointnet_sem_seg
point_num = 4096
class_num = 13
model = pointnet_sem_seg.get_model(class_num)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('sem_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = torch.rand(1, 9, point_num)
x = x.cuda() #cpu版本需注释此句
traced_script_module = torch.jit.trace(model, x)
traced_script_module.save("sem_seg.pt")
python推理代码:
import torch
import numpy as np
import pointnet_sem_seg
num_point = 4096
class_num = 13
stride = 0.5
block_size = 1.0
if __name__ == '__main__':
data = np.load('Area_1_conferenceRoom_1.npy')
points = data[:,:6]
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
grid_x = int(np.ceil(float(coord_max[0] - coord_min[0] - block_size) / stride) + 1)
grid_y = int(np.ceil(float(coord_max[1] - coord_min[1] - block_size) / stride) + 1)
data_room, index_room = np.array([]), np.array([])
for index_y in range(0, grid_y):
for index_x in range(0, grid_x):
s_x = coord_min[0] + index_x * stride
e_x = min(s_x + block_size, coord_max[0])
s_x = e_x - block_size
s_y = coord_min[1] + index_y * stride
e_y = min(s_y + block_size, coord_max[1])
s_y = e_y - block_size
point_idxs = np.where((points[:, 0] >= s_x) & (points[:, 0] <= e_x) & (points[:, 1] >= s_y) & (points[:, 1] <= e_y))[0]
if point_idxs.size == 0:
continue
num_batch = int(np.ceil(point_idxs.size / num_point))
point_size = int(num_batch * num_point)
replace = False if (point_size - point_idxs.size <= point_idxs.size) else True
point_idxs_repeat = np.random.choice(point_idxs, point_size - point_idxs.size, replace=replace)
point_idxs = np.concatenate((point_idxs, point_idxs_repeat))
np.random.shuffle(point_idxs)
data_batch = points[point_idxs, :]
normlized_xyz = np.zeros((point_size, 3)) #(73728, 3)
normlized_xyz[:, 0] = data_batch[:, 0] / coord_max[0]
normlized_xyz[:, 1] = data_batch[:, 1] / coord_max[1]
normlized_xyz[:, 2] = data_batch[:, 2] / coord_max[2]
data_batch[:, 0] = data_batch[:, 0] - (s_x + block_size / 2.0)
data_batch[:, 1] = data_batch[:, 1] - (s_y + block_size / 2.0)
data_batch[:, 3:6] /= 255.0
data_batch = np.concatenate((data_batch, normlized_xyz), axis=1)
data_room = np.vstack([data_room, data_batch]) if data_room.size else data_batch
index_room = np.hstack([index_room, point_idxs]) if index_room.size else point_idxs
data_room = data_room.reshape((-1, num_point, data_room.shape[1]))
index_room = index_room.reshape((-1, num_point))
model = pointnet_sem_seg.get_model(class_num)
model = model.cuda()
checkpoint = torch.load('sem_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model = model.eval()
with torch.no_grad():
vote_label_pool = np.zeros((points.shape[0], class_num))
num_blocks = data_room.shape[0]
batch_data = np.zeros((1, num_point, 9))
batch_point_index = np.zeros((1, num_point))
for sbatch in range(num_blocks):
start_idx = sbatch
end_idx = min(sbatch + 1, num_blocks)
real_batch_size = end_idx - start_idx
batch_data[0:real_batch_size, ...] = data_room[start_idx:end_idx, ...]
batch_point_index[0:real_batch_size, ...] = index_room[start_idx:end_idx, ...]
torch_data = torch.Tensor(batch_data)
torch_data = torch_data.float().cuda()
torch_data = torch_data.transpose(2, 1)
seg_pred, _ = model(torch_data)
batch_pred_label = seg_pred.contiguous().cpu().data.max(2)[1].numpy()
point_idx = batch_point_index[0:real_batch_size, ...]
pred_label = batch_pred_label[0:real_batch_size, ...]
for b in range(pred_label.shape[0]):
for n in range(pred_label.shape[1]):
vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1
pred = np.argmax(vote_label_pool, 1)
fout = open('pred.txt', 'w')
for i in range(points.shape[0]):
fout.write('%f %f %f %d\n' % (points[i, 0], points[i, 1], points[i, 2], pred[i]))
fout.close()
C++推理代码:
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <ctime>
#include <random>
#include <torch/script.h>
const int point_num = 4096;
const int class_num = 13;
const float stride = 0.5;
const float block_size = 1.0;
struct point
{
float m_x, m_y, m_z, m_r, m_g, m_b, m_normal_x, m_normal_y, m_normal_z;
point() :
m_x(0), m_y(0), m_z(0), m_r(0), m_g(0), m_b(0), m_normal_x(0), m_normal_y(0), m_normal_z(0) {}
point(float x, float y, float z, float r, float g, float b) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(0), m_normal_y(0), m_normal_z(0) {}
point(float x, float y, float z, float r, float g, float b, float normal_x, float normal_y, float normal_z) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(normal_x), m_normal_y(normal_y), m_normal_z(normal_z) {}
};
int main()
{
std::ifstream infile("Area_1_conferenceRoom_1.txt");
float x, y, z, r, g, b, l;
std::vector<point> pts;
std::vector<float> points_x, points_y, points_z;
int points_num = 0;
while (infile >> x >> y >> z >> r >> g >> b >> l)
{
point pt(x, y, z, r, g, b);
pts.push_back(pt);
points_x.push_back(x);
points_y.push_back(y);
points_z.push_back(z);
points_num++;
}
float x_min = *std::min_element(points_x.begin(), points_x.end());
float y_min = *std::min_element(points_y.begin(), points_y.end());
float z_min = *std::min_element(points_z.begin(), points_z.end());
float x_max = *std::max_element(points_x.begin(), points_x.end());
float y_max = *std::max_element(points_y.begin(), points_y.end());
float z_max = *std::max_element(points_z.begin(), points_z.end());
int grid_x = ceil((x_max - x_min - block_size) / stride) + 1;
int grid_y = ceil((y_max - y_min - block_size) / stride) + 1;
std::vector<point> data_room;
std::vector<int> index_room;
srand((int)time(0));
for (size_t index_y = 0; index_y < grid_y; index_y++)
{
for (size_t index_x = 0; index_x < grid_x; index_x++)
{
float s_x = x_min + index_x * stride;
float e_x = std::min(s_x + block_size, x_max);
s_x = e_x - block_size;
float s_y = y_min + index_y * stride;
float e_y = std::min(s_y + block_size, y_max);
s_y = e_y - block_size;
std::vector<int> point_idxs;
for (size_t i = 0; i < points_num; i++)
{
if (points_x[i] >= s_x && points_x[i] <= e_x && points_y[i] >= s_y && points_y[i] <= e_y)
point_idxs.push_back(i);
}
if (point_idxs.size() == 0)
continue;
int num_batch = ceil(point_idxs.size() * 1.0 / point_num);
int point_size = num_batch * point_num;
bool replace = (point_size - point_idxs.size() <= point_idxs.size() ? false : true);
std::vector<int> point_idxs_repeat;
if (replace)
{
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
point_idxs_repeat.push_back(point_idxs[id]);
}
}
else
{
std::vector<bool> flags(pts.size(), false);
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
while (true)
{
if (flags[id] == false)
{
flags[id] = true;
break;
}
id = rand() % point_idxs.size();
}
point_idxs_repeat.push_back(point_idxs[id]);
}
}
point_idxs.insert(point_idxs.end(), point_idxs_repeat.begin(), point_idxs_repeat.end());
std::random_device rd;
std::mt19937 g(rd()); // 随机数引擎:基于梅森缠绕器算法的随机数生成器
std::shuffle(point_idxs.begin(), point_idxs.end(), g); // 打乱顺序,重新排序(随机序列)
std::vector<point> data_batch;
for (size_t i = 0; i < point_idxs.size(); i++)
{
data_batch.push_back(pts[point_idxs[i]]);
}
for (size_t i = 0; i < point_size; i++)
{
data_batch[i].m_normal_x = data_batch[i].m_x / x_max;
data_batch[i].m_normal_y = data_batch[i].m_y / y_max;
data_batch[i].m_normal_z = data_batch[i].m_z / z_max;
data_batch[i].m_x -= (s_x + block_size / 2.0);
data_batch[i].m_y -= (s_y + block_size / 2.0);
data_batch[i].m_r /= 255.0;
data_batch[i].m_g /= 255.0;
data_batch[i].m_b /= 255.0;
data_room.push_back(data_batch[i]);
index_room.push_back(point_idxs[i]);
}
}
}
int n = point_num, m = index_room.size() / n;
std::vector<std::vector<point>> data_rooms(m, std::vector<point>(n, point()));
std::vector<std::vector<int>> index_rooms(m, std::vector<int>(n, 0));
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
data_rooms[i][j] = data_room[i * n + j];
index_rooms[i][j] = index_room[i * n + j];
}
}
std::vector<std::vector<int>> vote_label_pool(points_num, std::vector<int>(class_num, 0));
int num_blocks = data_rooms.size();
torch::jit::script::Module module = torch::jit::load("sem_seg.pt");
module.to(torch::kCUDA);
for (int sbatch = 0; sbatch < num_blocks; sbatch++)
{
int start_idx = sbatch;
int end_idx = std::min(sbatch + 1, num_blocks);
int real_batch_size = end_idx - start_idx;
std::vector<point> batch_data = data_rooms[start_idx];
std::vector<int> point_idx = index_rooms[start_idx];
std::vector<float> batch(point_num * 9);
for (size_t i = 0; i < point_num; i++)
{
batch[9 * i + 0] = batch_data[i].m_x;
batch[9 * i + 1] = batch_data[i].m_y;
batch[9 * i + 2] = batch_data[i].m_z;
batch[9 * i + 3] = batch_data[i].m_r;
batch[9 * i + 4] = batch_data[i].m_g;
batch[9 * i + 5] = batch_data[i].m_b;
batch[9 * i + 6] = batch_data[i].m_normal_x;
batch[9 * i + 7] = batch_data[i].m_normal_y;
batch[9 * i + 8] = batch_data[i].m_normal_z;
}
torch::Tensor inputs = torch::from_blob(batch.data(), { 1, point_num, 9 }, torch::kFloat);
inputs = inputs.to(torch::kCUDA);
inputs = inputs.permute({ 0, 2, 1 });
auto outputs = module.forward({ inputs }).toTuple();
torch::Tensor out0 = outputs->elements()[0].toTensor();
auto max_index = std::get<1>(torch::max(out0, 2));
max_index = torch::squeeze(max_index).to(torch::kCPU).to(torch::kInt);
std::vector<int> pred_label(max_index.data_ptr<int>(), max_index.data_ptr<int>() + max_index.numel());
for (size_t i = 0; i < pred_label.size(); i++)
{
vote_label_pool[point_idx[i]][pred_label[i]] += 1;
}
}
std::ofstream outfile("pred.txt");
for (size_t i = 0; i < points_num; i++)
{
int max_index = std::max_element(vote_label_pool[i].begin(), vote_label_pool[i].end()) - vote_label_pool[i].begin();
outfile << pts[i].m_x << " " << pts[i].m_y << " " << pts[i].m_z << " " << max_index << std::endl;
}
outfile.close();
return 0;
}
注意,由于C++无法直接读取npy格式文件(可以依赖一些库),这里先使用python脚本将npy文件转换成txt文件。
import numpy as np
npy = np.load("Area_1_conferenceRoom_1.npy")
np.savetxt('Area_1_conferenceRoom_1.txt', npy, fmt='%0.06f')
预测结果:
参考:Libtorch部署模型
在C+中部署python(libtoch)模型的方法总结+,PytorchLibtorch,Win10VS2017
A simple C++ implementation of Charles Qi’s PointNet
模型的下载地址:pointnet模型权重
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。