当前位置:   article > 正文

(有待完善)Ubuntu20.04安装部署TensorRT_ubuntu20.04安装tensorrt

ubuntu20.04安装tensorrt

、下载解压

下载网址:https://developer.nvidia.com/nvidia-tensorrt-download

下载完拉到主目录下

tar -zxvf TensorRT-8.2.1.8.Linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz

二、添加环境变量

gedit ~/.bashrc

文末添加:

export LD_LIBRARY_PATH=/home/wave/TensorRT-8.2.1.8/lib:$LD_LIBRARY_PATH

刷新环境变量

source ~/.bashrc

三、下载源码

网址:https://gitcode.net/mirrors/wang-xinyu/tensorrtx?utm_source=csdn_github_accelerator

注意##下载的版本按照你安装的yolov5版本

下载yolov5对应的模型网址:https://github.com/ultralytics/yolov5/releases

上一篇博客你安装的yolov5-v5.0的话,上面的源码和模型也要对应版本。

都下载完后进入yolov5的目录

将里面的gen_wts.py文件复制到你yolov5的目录下

在此目录下打开终端生成.wts文件

python3 gen_wts.py --w yolov5s.pt

生成完毕后回到这个目录

mkdir build && cd build
cmake ..
make

然后把刚刚生成的.wts文件拉到这个build文件下

sudo ./yolov5 -s yolov5s.wts yolov5s.engine s
sudo ./yolov5 -d yolov5s.engine ../sample

如果要开启摄像头的话将yolov5.cpp的代码换成如下

  1. #include <iostream>
  2. #include <chrono>
  3. #include "cuda_utils.h"
  4. #include "logging.h"
  5. #include "common.hpp"
  6. #include "utils.h"
  7. #include "calibrator.h"
  8. #define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
  9. #define DEVICE 0 // GPU id
  10. #define NMS_THRESH 0.4
  11. #define CONF_THRESH 0.5
  12. #define BATCH_SIZE 1
  13. // stuff we know about the network and the input/output blobs
  14. static const int INPUT_H = Yolo::INPUT_H;
  15. static const int INPUT_W = Yolo::INPUT_W;
  16. static const int CLASS_NUM = Yolo::CLASS_NUM;
  17. static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
  18. const char* INPUT_BLOB_NAME = "data";
  19. const char* OUTPUT_BLOB_NAME = "prob";
  20. static Logger gLogger;
  21. //修改为自己的类别
  22. char *my_classes[]={ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  23. "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
  24. "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
  25. "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard","surfboard",
  26. "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
  27. "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
  28. "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
  29. "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
  30. "hair drier", "toothbrush" };
  31. static int get_width(int x, float gw, int divisor = 8) {
  32. //return math.ceil(x / divisor) * divisor
  33. if (int(x * gw) % divisor == 0) {
  34. return int(x * gw);
  35. }
  36. return (int(x * gw / divisor) + 1) * divisor;
  37. }
  38. static int get_depth(int x, float gd) {
  39. if (x == 1) {
  40. return 1;
  41. }
  42. else {
  43. return round(x * gd) > 1 ? round(x * gd) : 1;
  44. }
  45. }
  46. //#创建engine和network
  47. ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
  48. INetworkDefinition* network = builder->createNetworkV2(0U);
  49. // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
  50. ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
  51. assert(data);
  52. std::map<std::string, Weights> weightMap = loadWeights(wts_name);
  53. /* ------ yolov5 backbone------ */
  54. auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
  55. auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
  56. auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
  57. auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
  58. auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
  59. auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
  60. auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
  61. auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
  62. auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");
  63. /* ------ yolov5 head ------ */
  64. auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
  65. auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
  66. auto upsample11 = network->addResize(*conv10->getOutput(0));
  67. assert(upsample11);
  68. upsample11->setResizeMode(ResizeMode::kNEAREST);
  69. upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
  70. ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
  71. auto cat12 = network->addConcatenation(inputTensors12, 2);
  72. auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
  73. auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
  74. auto upsample15 = network->addResize(*conv14->getOutput(0));
  75. assert(upsample15);
  76. upsample15->setResizeMode(ResizeMode::kNEAREST);
  77. upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
  78. ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
  79. auto cat16 = network->addConcatenation(inputTensors16, 2);
  80. auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
  81. // yolo layer 0
  82. IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
  83. auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
  84. ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
  85. auto cat19 = network->addConcatenation(inputTensors19, 2);
  86. auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
  87. //yolo layer 1
  88. IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
  89. auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
  90. ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
  91. auto cat22 = network->addConcatenation(inputTensors22, 2);
  92. auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
  93. IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
  94. auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
  95. yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
  96. network->markOutput(*yolo->getOutput(0));
  97. // Build engine
  98. builder->setMaxBatchSize(maxBatchSize);
  99. config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
  100. #if defined(USE_FP16)
  101. config->setFlag(BuilderFlag::kFP16);
  102. #elif defined(USE_INT8)
  103. std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
  104. assert(builder->platformHasFastInt8());
  105. config->setFlag(BuilderFlag::kINT8);
  106. Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
  107. config->setInt8Calibrator(calibrator);
  108. #endif
  109. std::cout << "Building engine, please wait for a while..." << std::endl;
  110. ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
  111. std::cout << "Build engine successfully!" << std::endl;
  112. // Don't need the network any more
  113. network->destroy();
  114. // Release host memory
  115. for (auto& mem : weightMap)
  116. {
  117. free((void*)(mem.second.values));
  118. }
  119. return engine;
  120. }
  121. ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
  122. INetworkDefinition* network = builder->createNetworkV2(0U);
  123. // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
  124. ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
  125. assert(data);
  126. std::map<std::string, Weights> weightMap = loadWeights(wts_name);
  127. /* ------ yolov5 backbone------ */
  128. auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
  129. auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
  130. auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
  131. auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
  132. auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
  133. auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
  134. auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
  135. auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
  136. auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
  137. auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
  138. auto spp10 = SPP(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), 3, 5, 7, "model.10");
  139. auto c3_11 = C3(network, weightMap, *spp10->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.11");
  140. /* ------ yolov5 head ------ */
  141. auto conv12 = convBlock(network, weightMap, *c3_11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
  142. auto upsample13 = network->addResize(*conv12->getOutput(0));
  143. assert(upsample13);
  144. upsample13->setResizeMode(ResizeMode::kNEAREST);
  145. upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
  146. ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
  147. auto cat14 = network->addConcatenation(inputTensors14, 2);
  148. auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
  149. auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
  150. auto upsample17 = network->addResize(*conv16->getOutput(0));
  151. assert(upsample17);
  152. upsample17->setResizeMode(ResizeMode::kNEAREST);
  153. upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
  154. ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
  155. auto cat18 = network->addConcatenation(inputTensors18, 2);
  156. auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
  157. auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
  158. auto upsample21 = network->addResize(*conv20->getOutput(0));
  159. assert(upsample21);
  160. upsample21->setResizeMode(ResizeMode::kNEAREST);
  161. upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
  162. ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
  163. auto cat22 = network->addConcatenation(inputTensors21, 2);
  164. auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
  165. auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
  166. ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
  167. auto cat25 = network->addConcatenation(inputTensors25, 2);
  168. auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
  169. auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
  170. ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
  171. auto cat28 = network->addConcatenation(inputTensors28, 2);
  172. auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
  173. auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
  174. ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
  175. auto cat31 = network->addConcatenation(inputTensors31, 2);
  176. auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
  177. /* ------ detect ------ */
  178. IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
  179. IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
  180. IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
  181. IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
  182. auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
  183. yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
  184. network->markOutput(*yolo->getOutput(0));
  185. // Build engine
  186. builder->setMaxBatchSize(maxBatchSize);
  187. config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
  188. #if defined(USE_FP16)
  189. config->setFlag(BuilderFlag::kFP16);
  190. #elif defined(USE_INT8)
  191. std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
  192. assert(builder->platformHasFastInt8());
  193. config->setFlag(BuilderFlag::kINT8);
  194. Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
  195. config->setInt8Calibrator(calibrator);
  196. #endif
  197. std::cout << "Building engine, please wait for a while..." << std::endl;
  198. ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
  199. std::cout << "Build engine successfully!" << std::endl;
  200. // Don't need the network any more
  201. network->destroy();
  202. // Release host memory
  203. for (auto& mem : weightMap)
  204. {
  205. free((void*)(mem.second.values));
  206. }
  207. return engine;
  208. }
  209. void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, float& gd, float& gw, std::string& wts_name) {
  210. // Create builder
  211. IBuilder* builder = createInferBuilder(gLogger);
  212. IBuilderConfig* config = builder->createBuilderConfig();
  213. // Create model to populate the network, then set the outputs and create an engine
  214. ICudaEngine* engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
  215. assert(engine != nullptr);
  216. // Serialize the engine
  217. (*modelStream) = engine->serialize();
  218. // Close everything down
  219. engine->destroy();
  220. builder->destroy();
  221. config->destroy();
  222. }
  223. void doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, int batchSize) {
  224. // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
  225. CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
  226. context.enqueue(batchSize, buffers, stream, nullptr);
  227. CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
  228. cudaStreamSynchronize(stream);
  229. }
  230. bool parse_args(int argc, char** argv, std::string& engine) {
  231. if (argc < 3) return false;
  232. if (std::string(argv[1]) == "-v" && argc == 3) {
  233. engine = std::string(argv[2]);
  234. }
  235. else {
  236. return false;
  237. }
  238. return true;
  239. }
  240. int main(int argc, char** argv) {
  241. cudaSetDevice(DEVICE);
  242. //std::string wts_name = "";
  243. std::string engine_name = "";
  244. //float gd = 0.0f, gw = 0.0f;
  245. //std::string img_dir;
  246. if (!parse_args(argc, argv, engine_name)) {
  247. std::cerr << "arguments not right!" << std::endl;
  248. std::cerr << "./yolov5 -v [.engine] // run inference with camera" << std::endl;
  249. return -1;
  250. }
  251. std::ifstream file(engine_name, std::ios::binary);
  252. if (!file.good()) {
  253. std::cerr << " read " << engine_name << " error! " << std::endl;
  254. return -1;
  255. }
  256. char* trtModelStream{ nullptr };
  257. size_t size = 0;
  258. file.seekg(0, file.end);
  259. size = file.tellg();
  260. file.seekg(0, file.beg);
  261. trtModelStream = new char[size];
  262. assert(trtModelStream);
  263. file.read(trtModelStream, size);
  264. file.close();
  265. // prepare input data ---------------------------
  266. static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
  267. //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
  268. // data[i] = 1.0;
  269. static float prob[BATCH_SIZE * OUTPUT_SIZE];
  270. IRuntime* runtime = createInferRuntime(gLogger);
  271. assert(runtime != nullptr);
  272. ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
  273. assert(engine != nullptr);
  274. IExecutionContext* context = engine->createExecutionContext();
  275. assert(context != nullptr);
  276. delete[] trtModelStream;
  277. assert(engine->getNbBindings() == 2);
  278. void* buffers[2];
  279. // In order to bind the buffers, we need to know the names of the input and output tensors.
  280. // Note that indices are guaranteed to be less than IEngine::getNbBindings()
  281. const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
  282. const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
  283. assert(inputIndex == 0);
  284. assert(outputIndex == 1);
  285. // Create GPU buffers on device
  286. CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
  287. CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
  288. // Create stream
  289. cudaStream_t stream;
  290. CUDA_CHECK(cudaStreamCreate(&stream));
  291. //#读取本地视频
  292. //cv::VideoCapture capture("/home/nano/Videos/video.mp4");
  293. //#调用本地usb摄像头,我的默认参数为1,如果1报错,可修改为0.
  294. cv::VideoCapture capture(1);
  295. if (!capture.isOpened()) {
  296. std::cout << "Error opening video stream or file" << std::endl;
  297. return -1;
  298. }
  299. int key;
  300. int fcount = 0;
  301. while (1)
  302. {
  303. cv::Mat frame;
  304. capture >> frame;
  305. if (frame.empty())
  306. {
  307. std::cout << "Fail to read image from camera!" << std::endl;
  308. break;
  309. }
  310. fcount++;
  311. //if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
  312. for (int b = 0; b < fcount; b++) {
  313. //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
  314. cv::Mat img = frame;
  315. if (img.empty()) continue;
  316. cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
  317. int i = 0;
  318. for (int row = 0; row < INPUT_H; ++row) {
  319. uchar* uc_pixel = pr_img.data + row * pr_img.step;
  320. for (int col = 0; col < INPUT_W; ++col) {
  321. data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
  322. data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
  323. data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
  324. uc_pixel += 3;
  325. ++i;
  326. }
  327. }
  328. }
  329. // Run inference
  330. auto start = std::chrono::system_clock::now();//#获取模型推理开始时间
  331. doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
  332. auto end = std::chrono::system_clock::now();//#结束时间
  333. //std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
  334. int fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
  335. std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
  336. for (int b = 0; b < fcount; b++) {
  337. auto& res = batch_res[b];
  338. nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
  339. }
  340. for (int b = 0; b < fcount; b++) {
  341. auto& res = batch_res[b];
  342. //std::cout << res.size() << std::endl;
  343. //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
  344. for (size_t j = 0; j < res.size(); j++) {
  345. cv::Rect r = get_rect(frame, res[j].bbox);
  346. cv::rectangle(frame, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
  347. std::string label = my_classes[(int)res[j].class_id];
  348. cv::putText(frame, label, cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
  349. std::string jetson_fps = "FPS: " + std::to_string(fps);
  350. cv::putText(frame, jetson_fps, cv::Point(11, 80), cv::FONT_HERSHEY_PLAIN, 3, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
  351. }
  352. //cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
  353. }
  354. cv::imshow("yolov5", frame);
  355. key = cv::waitKey(1);
  356. if (key == 'q') {
  357. break;
  358. }
  359. fcount = 0;
  360. }
  361. capture.release();
  362. // Release stream and buffers
  363. cudaStreamDestroy(stream);
  364. CUDA_CHECK(cudaFree(buffers[inputIndex]));
  365. CUDA_CHECK(cudaFree(buffers[outputIndex]));
  366. // Destroy the engine
  367. context->destroy();
  368. engine->destroy();
  369. runtime->destroy();
  370. return 0;
  371. }

修改完代码后

  1. make
  2. sudo ./yolov5 -v yolov5s.engine
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/449689
推荐阅读
相关标签
  

闽ICP备14008679号