赞
踩
以分10类,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch import pointnet_cls point_num = 1024 class_num = 10 normal_channel = False model = pointnet_cls.get_model(class_num, normal_channel) model = model.cuda() #cpu版本需注释此句 model.eval() checkpoint = torch.load('./cls.pth') model.load_state_dict(checkpoint['model_state_dict']) x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num)) x = x.cuda() #cpu版本需注释此句 export_onnx_file = "./cls.onnx" torch.onnx.export(model, x, export_onnx_file, opset_version = 11)
python推理:
import numpy as np import onnxruntime point_num = 1024 def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m return pc if __name__ == '__main__': file = './bed_0610.txt' data = np.loadtxt(file, delimiter=',').astype(np.float32) point_set = data[:, 0:3] point_set = point_set[0:point_num, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) points = np.reshape(point_set, ((1, point_num, 3))) points = points.swapaxes(2, 1) onnx_session = onnxruntime.InferenceSession("cls.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) input_name = [] for node in onnx_session.get_inputs(): input_name.append(node.name) output_name = [] for node in onnx_session.get_outputs(): output_name.append(node.name) inputs = {} for name in input_name: inputs[name] = points outputs = onnx_session.run(None, inputs)[0] print(np.argmax(outputs))
C++推理:
#include <iostream> #include <vector> #include <fstream> #include <onnxruntime_cxx_api.h> const int point_num = 1024; const int class_num = 10; void pc_normalize(std::vector<float>& points) { float mean_x = 0, mean_y = 0, mean_z = 0; for (size_t i = 0; i < point_num; ++i) { mean_x += points[3 * i]; mean_y += points[3 * i + 1]; mean_z += points[3 * i + 2]; } mean_x /= point_num; mean_y /= point_num; mean_z /= point_num; for (size_t i = 0; i < point_num; ++i) { points[3 * i] -= mean_x; points[3 * i + 1] -= mean_y; points[3 * i + 2] -= mean_z; } float m = 0; for (size_t i = 0; i < point_num; ++i) { if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m) m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)); } for (size_t i = 0; i < point_num; ++i) { points[3 * i] /= m; points[3 * i + 1] /= m; points[3 * i + 2] /= m; } } void classfier(std::vector<float> & points) { Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "cls"); Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); OrtCUDAProviderOptions cuda_option; cuda_option.device_id = 0; cuda_option.arena_extend_strategy = 0; cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; cuda_option.gpu_mem_limit = SIZE_MAX; cuda_option.do_copy_in_default_stream = 1; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); session_options.AppendExecutionProvider_CUDA(cuda_option); const wchar_t* model_path = L"cls.onnx"; Ort::Session session(env, model_path, session_options); Ort::AllocatorWithDefaultOptions allocator; std::vector<const char*> input_node_names; for (size_t i = 0; i < session.GetInputCount(); i++) { input_node_names.push_back(session.GetInputName(i, allocator)); } std::vector<const char*> output_node_names; for (size_t i = 0; i < session.GetOutputCount(); i++) { output_node_names.push_back(session.GetOutputName(i, allocator)); } const size_t input_tensor_size = 1 * 3 * point_num ; std::vector<float> input_tensor_values(input_tensor_size); for (size_t i = 0; i < 3; i++) { for (size_t j = 0; j < point_num; j++) { input_tensor_values[point_num * i + j] = points[3 * j + i]; } } std::vector<int64_t> input_node_dims = { 1, 3, point_num }; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size()); std::vector<Ort::Value> inputs; inputs.push_back(std::move(input_tensor)); std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size()); const float* rawOutput = outputs[0].GetTensorData<float>(); std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); std::vector<float> pred(rawOutput, rawOutput + count); int predict_label = std::max_element(pred.begin(), pred.end()) - pred.begin(); std::cout << predict_label << std::endl; } int main() { std::vector<float> points; float x, y, z, nx, ny, nz; char ch; std::ifstream infile("bed_0610.txt"); for (size_t i = 0; i < point_num; i++) { infile >> x >> ch >> y >> ch >> z >> ch >> nx >> ch >> ny >> ch >> nz; points.push_back(x); points.push_back(y); points.push_back(z); } infile.close(); pc_normalize(points); classfier(points); return 0; }
以分16类50部分,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch import pointnet_part_seg point_num = 2048 class_num = 16 part_num = 50 normal_channel = False def to_categorical(y, class_num): """ 1-hot encodes a tensor """ new_y = torch.eye(class_num)[y.cpu().data.numpy(),] if (y.is_cuda): return new_y.cuda() return new_y model = pointnet_part_seg.get_model(part_num, normal_channel) model = model.cuda() #cpu版本需注释此句 model.eval() checkpoint = torch.load('./part_seg.pth') model.load_state_dict(checkpoint['model_state_dict']) x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num)) x = x.cuda() #cpu版本需注释此句 label = torch.randint(0, 1, (1, 1)) label = label.cuda() #cpu版本需注释此句 export_onnx_file = "./part_seg.onnx" torch.onnx.export(model, (x, to_categorical(label, class_num)), export_onnx_file, opset_version = 11)
python推理:
import numpy as np import onnxruntime point_num = 2048 class_num = 16 def to_categorical(y, class_num): """ 1-hot encodes a tensor """ new_y = np.eye(class_num)[y,] return new_y.astype(np.float32) def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc ** 2, axis=1))) pc = pc / m return pc if __name__ == '__main__': data = np.loadtxt('85a15c26a6e9921ae008cc4902bfe3cd.txt').astype(np.float32) point_set = data[:, 0:3] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) choice = np.random.choice(point_set.shape[0], point_num, replace=True) point_set = point_set[choice, :][:, 0:3] pts = point_set points = np.reshape(point_set, ((1, point_num, 3))) points = points.swapaxes(2, 1) label = np.array([[0]], dtype=np.int32) onnx_session = onnxruntime.InferenceSession("part_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) input_name = [] for node in onnx_session.get_inputs(): input_name.append(node.name) output_name = [] for node in onnx_session.get_outputs(): output_name.append(node.name) inputs = {} inputs[input_name[0]] = points inputs[input_name[1]] = to_categorical(label, class_num) outputs = onnx_session.run(None, inputs)[0] cur_pred_val_logits = outputs cur_pred_val = np.zeros((1, point_num)).astype(np.int32) logits = cur_pred_val_logits[0, :, :] cur_pred_val[0, :] = np.argmax(logits, 1) pts = np.append(pts.reshape(point_num, 3), cur_pred_val[0, :].reshape(point_num, 1), 1) np.savetxt('pred.txt', pts, fmt='%.06f')
C++推理:
#include <iostream> #include <vector> #include <fstream> #include <ctime> #include <onnxruntime_cxx_api.h> const int point_num = 2048; const int class_num = 16; const int parts_num = 50; void pc_normalize(std::vector<float>& points) { float mean_x = 0, mean_y = 0, mean_z = 0; for (size_t i = 0; i < point_num; ++i) { mean_x += points[3 * i]; mean_y += points[3 * i + 1]; mean_z += points[3 * i + 2]; } mean_x /= point_num; mean_y /= point_num; mean_z /= point_num; for (size_t i = 0; i < point_num; ++i) { points[3 * i] -= mean_x; points[3 * i + 1] -= mean_y; points[3 * i + 2] -= mean_z; } float m = 0; for (size_t i = 0; i < point_num; ++i) { if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m) m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)); } for (size_t i = 0; i < point_num; ++i) { points[3 * i] /= m; points[3 * i + 1] /= m; points[3 * i + 2] /= m; } } void resample(std::vector<float>& points) { srand((int)time(0)); std::vector<int> choice(point_num); for (size_t i = 0; i < point_num; i++) { choice[i] = rand() % (points.size() / 3); } std::vector<float> temp_points(3 * point_num); for (size_t i = 0; i < point_num; i++) { temp_points[3 * i] = points[3 * choice[i]]; temp_points[3 * i + 1] = points[3 * choice[i] + 1]; temp_points[3 * i + 2] = points[3 * choice[i] + 2]; } points = temp_points; } std::vector<int> classfier(std::vector<float> & points, std::vector<float> & labels) { Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "part_seg"); Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); OrtCUDAProviderOptions cuda_option; cuda_option.device_id = 0; cuda_option.arena_extend_strategy = 0; cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; cuda_option.gpu_mem_limit = SIZE_MAX; cuda_option.do_copy_in_default_stream = 1; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); session_options.AppendExecutionProvider_CUDA(cuda_option); const wchar_t* model_path = L"part_seg.onnx"; Ort::Session session(env, model_path, session_options); Ort::AllocatorWithDefaultOptions allocator; std::vector<const char*> input_node_names; for (size_t i = 0; i < session.GetInputCount(); i++) { input_node_names.push_back(session.GetInputName(i, allocator)); } std::vector<const char*> output_node_names; for (size_t i = 0; i < session.GetOutputCount(); i++) { output_node_names.push_back(session.GetOutputName(i, allocator)); } const size_t input_tensor_size0 = 1 * 3 * point_num; std::vector<float> input_tensor_values0(input_tensor_size0); for (size_t i = 0; i < 3; i++) { for (size_t j = 0; j < point_num; j++) { input_tensor_values0[point_num * i + j] = points[3 * j + i]; } } std::vector<int64_t> input_node_dims0 = { 1, 3, point_num }; auto memory_info0 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor0 = Ort::Value::CreateTensor<float>(memory_info0, input_tensor_values0.data(), input_tensor_size0, input_node_dims0.data(), input_node_dims0.size()); const size_t input_tensor_size1 = 1 * 1 * class_num; std::vector<float> input_tensor_values1(input_tensor_size0); for (size_t i = 0; i < class_num; i++) { input_tensor_values1[i] = labels[i]; } std::vector<int64_t> input_node_dims1 = { 1, 1, class_num }; auto memory_info1 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor1 = Ort::Value::CreateTensor<float>(memory_info1, input_tensor_values1.data(), input_tensor_size1, input_node_dims1.data(), input_node_dims1.size()); std::vector<Ort::Value> inputs; inputs.push_back(std::move(input_tensor0)); inputs.push_back(std::move(input_tensor1)); std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size()); const float* rawOutput = outputs[0].GetTensorData<float>(); std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); std::vector<float> pred(rawOutput, rawOutput + count); std::vector<std::vector<float>> preds(point_num, std::vector<float>(parts_num, 0)); for (size_t i = 0; i < point_num; i++) { for (size_t j = 0; j < parts_num; j++) { preds[i][j] = pred[i * parts_num + j]; } } std::vector<int> max_index(point_num, 0); for (size_t i = 0; i < point_num; i++) { max_index[i]= std::max_element(preds[i].begin(), preds[i].end()) - preds[i].begin(); } return max_index; } int main() { std::vector<float> points, labels; float x, y, z, nx, ny, nz, label; std::ifstream infile("85a15c26a6e9921ae008cc4902bfe3cd.txt"); while (infile >> x >> y >> z >> nx >> ny >> nz >> label) { points.push_back(x); points.push_back(y); points.push_back(z); } for (size_t i = 0; i < class_num; i++) { labels.push_back(0.0); } labels[0] = 1.0; infile.close(); pc_normalize(points); resample(points); std::vector<int> result = classfier(points, labels); std::fstream outfile("pred.txt", 'w'); for (size_t i = 0; i < point_num; i++) { outfile << points[3 * i] << " " << points[3 * i + 1] << " " << points[3 * i + 2] << " " << result[i]<< std::endl; } outfile.close(); return 0; }
以分13类,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch import pointnet_sem_seg point_num = 4096 class_num = 13 model = pointnet_sem_seg.get_model(class_num) model = model.cuda() #cpu版本需注释此句 model.eval() checkpoint = torch.load('sem_seg.pth') model.load_state_dict(checkpoint['model_state_dict']) x = torch.rand(1, 9, point_num) x = x.cuda() #cpu版本需注释此句 export_onnx_file = "./sem_seg.onnx" torch.onnx.export(model, x, export_onnx_file, opset_version = 11)
python推理代码:
import numpy as np import onnxruntime point_num = 4096 class_num = 13 stride = 0.5 block_size = 1.0 if __name__ == '__main__': data = np.load('Area_1_conferenceRoom_1.npy') points = data[:,:6] coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3] grid_x = int(np.ceil(float(coord_max[0] - coord_min[0] - block_size) / stride) + 1) grid_y = int(np.ceil(float(coord_max[1] - coord_min[1] - block_size) / stride) + 1) data_room, index_room = np.array([]), np.array([]) for index_y in range(0, grid_y): for index_x in range(0, grid_x): s_x = coord_min[0] + index_x * stride e_x = min(s_x + block_size, coord_max[0]) s_x = e_x - block_size s_y = coord_min[1] + index_y * stride e_y = min(s_y + block_size, coord_max[1]) s_y = e_y - block_size point_idxs = np.where((points[:, 0] >= s_x) & (points[:, 0] <= e_x) & (points[:, 1] >= s_y) & (points[:, 1] <= e_y))[0] if point_idxs.size == 0: continue num_batch = int(np.ceil(point_idxs.size / point_num)) point_size = int(num_batch * point_num) replace = False if (point_size - point_idxs.size <= point_idxs.size) else True point_idxs_repeat = np.random.choice(point_idxs, point_size - point_idxs.size, replace=replace) point_idxs = np.concatenate((point_idxs, point_idxs_repeat)) np.random.shuffle(point_idxs) data_batch = points[point_idxs, :] normlized_xyz = np.zeros((point_size, 3)) normlized_xyz[:, 0] = data_batch[:, 0] / coord_max[0] normlized_xyz[:, 1] = data_batch[:, 1] / coord_max[1] normlized_xyz[:, 2] = data_batch[:, 2] / coord_max[2] data_batch[:, 0] = data_batch[:, 0] - (s_x + block_size / 2.0) data_batch[:, 1] = data_batch[:, 1] - (s_y + block_size / 2.0) data_batch[:, 3:6] /= 255.0 data_batch = np.concatenate((data_batch, normlized_xyz), axis=1) data_room = np.vstack([data_room, data_batch]) if data_room.size else data_batch index_room = np.hstack([index_room, point_idxs]) if index_room.size else point_idxs data_room = data_room.reshape((-1, point_num, data_room.shape[1])) index_room = index_room.reshape((-1, point_num)) onnx_session = onnxruntime.InferenceSession("sem_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) input_name = [] for node in onnx_session.get_inputs(): input_name.append(node.name) output_name = [] for node in onnx_session.get_outputs(): output_name.append(node.name) vote_label_pool = np.zeros((points.shape[0], class_num)) num_blocks = data_room.shape[0] batch_data = np.zeros((1, point_num, 9)) batch_point_index = np.zeros((1, point_num)) for sbatch in range(num_blocks): start_idx = sbatch end_idx = min(sbatch + 1, num_blocks) real_batch_size = end_idx - start_idx batch_data[0:real_batch_size, ...] = data_room[start_idx:end_idx, ...] batch_point_index[0:real_batch_size, ...] = index_room[start_idx:end_idx, ...] inputs = {} for name in input_name: inputs[name] = batch_data.swapaxes(2, 1).astype(np.float32) outputs = onnx_session.run(None, inputs)[0] batch_pred_label = np.argmax(outputs, 2) point_idx = batch_point_index[0:real_batch_size, ...] pred_label = batch_pred_label[0:real_batch_size, ...] for b in range(pred_label.shape[0]): for n in range(pred_label.shape[1]): vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1 pred = np.argmax(vote_label_pool, 1) fout = open('pred.txt', 'w') for i in range(points.shape[0]): fout.write('%f %f %f %d\n' % (points[i, 0], points[i, 1], points[i, 2], pred[i])) fout.close()
C++推理:
#include <iostream> #include <fstream> #include <vector> #include <algorithm> #include <ctime> #include <random> #include <onnxruntime_cxx_api.h> const int point_num = 4096; const int class_num = 13; struct point { float m_x, m_y, m_z, m_r, m_g, m_b, m_normal_x, m_normal_y, m_normal_z; point() : m_x(0), m_y(0), m_z(0), m_r(0), m_g(0), m_b(0), m_normal_x(0), m_normal_y(0), m_normal_z(0) {} point(float x, float y, float z, float r, float g, float b) : m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(0), m_normal_y(0), m_normal_z(0) {} point(float x, float y, float z, float r, float g, float b, float normal_x, float normal_y, float normal_z) : m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(normal_x), m_normal_y(normal_y), m_normal_z(normal_z) {} }; int main() { float x, y, z, r, g, b, l; std::vector<point> pts; std::vector<float> points_x, points_y, points_z; int points_num = 0; std::ifstream infile("Area_1_conferenceRoom_1.txt"); while (infile >> x >> y >> z >> r >> g >> b >> l) { point pt(x, y, z, r, g, b); pts.push_back(pt); points_x.push_back(x); points_y.push_back(y); points_z.push_back(z); points_num++; } float x_min = *std::min_element(points_x.begin(), points_x.end()); float y_min = *std::min_element(points_y.begin(), points_y.end()); float z_min = *std::min_element(points_z.begin(), points_z.end()); float x_max = *std::max_element(points_x.begin(), points_x.end()); float y_max = *std::max_element(points_y.begin(), points_y.end()); float z_max = *std::max_element(points_z.begin(), points_z.end()); float stride = 0.5; float block_size = 1.0; srand((int)time(0)); int grid_x = ceil((x_max - x_min - block_size) / stride) + 1; int grid_y = ceil((y_max - y_min - block_size) / stride) + 1; std::vector<point> data_room; std::vector<int> index_room; for (size_t index_y = 0; index_y < grid_y; index_y++) { for (size_t index_x = 0; index_x < grid_x; index_x++) { float s_x = x_min + index_x * stride; float e_x = std::min(s_x + block_size, x_max); s_x = e_x - block_size; float s_y = y_min + index_y * stride; float e_y = std::min(s_y + block_size, y_max); s_y = e_y - block_size; std::vector<int> point_idxs; for (size_t i = 0; i < points_num; i++) { if (points_x[i] >= s_x && points_x[i] <= e_x && points_y[i] >= s_y && points_y[i] <= e_y) point_idxs.push_back(i); } if (point_idxs.size() == 0) continue; int num_batch = ceil(point_idxs.size() * 1.0 / point_num); int point_size = num_batch * point_num; bool replace = (point_size - point_idxs.size() <= point_idxs.size() ? false : true); std::vector<int> point_idxs_repeat; if (replace) { for (size_t i = 0; i < point_size - point_idxs.size(); i++) { int id = rand() % point_idxs.size(); point_idxs_repeat.push_back(point_idxs[id]); } } else { std::vector<bool> flags(pts.size(), false); for (size_t i = 0; i < point_size - point_idxs.size(); i++) { int id = rand() % point_idxs.size(); while (true) { if (flags[id] == false) { flags[id] = true; break; } id = rand() % point_idxs.size(); } point_idxs_repeat.push_back(point_idxs[id]); } } point_idxs.insert(point_idxs.end(), point_idxs_repeat.begin(), point_idxs_repeat.end()); std::random_device rd; std::mt19937 g(rd()); // 随机数引擎:基于梅森缠绕器算法的随机数生成器 std::shuffle(point_idxs.begin(), point_idxs.end(), g); // 打乱顺序,重新排序(随机序列) std::vector<point> data_batch; for (size_t i = 0; i < point_idxs.size(); i++) { data_batch.push_back(pts[point_idxs[i]]); } for (size_t i = 0; i < point_size; i++) { data_batch[i].m_normal_x = data_batch[i].m_x / x_max; data_batch[i].m_normal_y = data_batch[i].m_y / y_max; data_batch[i].m_normal_z = data_batch[i].m_z / z_max; data_batch[i].m_x -= (s_x + block_size / 2.0); data_batch[i].m_y -= (s_y + block_size / 2.0); data_batch[i].m_r /= 255.0; data_batch[i].m_g /= 255.0; data_batch[i].m_b /= 255.0; data_room.push_back(data_batch[i]); index_room.push_back(point_idxs[i]); } } } int n = point_num, m = index_room.size() / n; std::vector<std::vector<point>> data_rooms(m, std::vector<point>(n, point())); std::vector<std::vector<int>> index_rooms(m, std::vector<int>(n, 0)); for (size_t i = 0; i < m; i++) { for (size_t j = 0; j < n; j++) { data_rooms[i][j] = data_room[i * n + j]; index_rooms[i][j] = index_room[i * n + j]; } } std::vector<std::vector<int>> vote_label_pool(points_num, std::vector<int>(class_num, 0)); int num_blocks = data_rooms.size(); Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "sem_seg"); Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); OrtCUDAProviderOptions cuda_option; cuda_option.device_id = 0; cuda_option.arena_extend_strategy = 0; cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; cuda_option.gpu_mem_limit = SIZE_MAX; cuda_option.do_copy_in_default_stream = 1; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); session_options.AppendExecutionProvider_CUDA(cuda_option); const wchar_t* model_path = L"sem_seg.onnx"; Ort::Session session(env, model_path, session_options); Ort::AllocatorWithDefaultOptions allocator; std::vector<const char*> input_node_names; for (size_t i = 0; i < session.GetInputCount(); i++) { input_node_names.push_back(session.GetInputName(i, allocator)); } std::vector<const char*> output_node_names; for (size_t i = 0; i < session.GetOutputCount(); i++) { output_node_names.push_back(session.GetOutputName(i, allocator)); } const size_t input_tensor_size = 1 * 9 * point_num; std::vector<float> input_tensor_values(input_tensor_size); for (int sbatch = 0; sbatch < num_blocks; sbatch++) { int start_idx = sbatch; int end_idx = std::min(sbatch + 1, num_blocks); int real_batch_size = end_idx - start_idx; std::vector<point> batch_data = data_rooms[start_idx]; std::vector<int> point_idx = index_rooms[start_idx]; std::vector<float> batch(point_num * 9); for (size_t i = 0; i < point_num; i++) { batch[9 * i + 0] = batch_data[i].m_x; batch[9 * i + 1] = batch_data[i].m_y; batch[9 * i + 2] = batch_data[i].m_z; batch[9 * i + 3] = batch_data[i].m_r; batch[9 * i + 4] = batch_data[i].m_g; batch[9 * i + 5] = batch_data[i].m_b; batch[9 * i + 6] = batch_data[i].m_normal_x; batch[9 * i + 7] = batch_data[i].m_normal_y; batch[9 * i + 8] = batch_data[i].m_normal_z; } for (size_t i = 0; i < 9; i++) { for (size_t j = 0; j < point_num; j++) { input_tensor_values[i * point_num + j] = batch[9 * j + i]; } } std::vector<int64_t> input_node_dims = { 1, 9, point_num }; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size()); std::vector<Ort::Value> inputs; inputs.push_back(std::move(input_tensor)); std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size()); const float* rawOutput = outputs[0].GetTensorData<float>(); std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); std::vector<float> pred(rawOutput, rawOutput + count); std::vector<std::vector<float>> preds(point_num, std::vector<float>(class_num, 0)); for (size_t i = 0; i < point_num; i++) { for (size_t j = 0; j < class_num; j++) { preds[i][j] = pred[i * class_num + j]; } } std::vector<int> pred_label(point_num, 0); for (size_t i = 0; i < point_num; i++) { pred_label[i] = std::max_element(preds[i].begin(), preds[i].end()) - preds[i].begin(); vote_label_pool[point_idx[i]][pred_label[i]] += 1; } } std::ofstream outfile("pred.txt"); for (size_t i = 0; i < points_num; i++) { int max_index = std::max_element(vote_label_pool[i].begin(), vote_label_pool[i].end()) - vote_label_pool[i].begin(); outfile << pts[i].m_x << " " << pts[i].m_y << " " << pts[i].m_z << " " << max_index << std::endl; } outfile.close(); return 0; }
注意,由于C++无法直接读取npy格式文件(可以依赖一些库),这里先使用python脚本将npy文件转换成txt文件。
import numpy as np
npy = np.load("Area_1_conferenceRoom_1.npy")
np.savetxt('Area_1_conferenceRoom_1.txt', npy, fmt='%0.06f')
模型的下载地址:pointnet模型权重
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。