赞
踩
、下载解压
下载网址:https://developer.nvidia.com/nvidia-tensorrt-download
下载完拉到主目录下
tar -zxvf TensorRT-8.2.1.8.Linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz
二、添加环境变量
gedit ~/.bashrc
文末添加:
export LD_LIBRARY_PATH=/home/wave/TensorRT-8.2.1.8/lib:$LD_LIBRARY_PATH
刷新环境变量
source ~/.bashrc
三、下载源码
网址:https://gitcode.net/mirrors/wang-xinyu/tensorrtx?utm_source=csdn_github_accelerator
注意##下载的版本按照你安装的yolov5版本
下载yolov5对应的模型网址:https://github.com/ultralytics/yolov5/releases
上一篇博客你安装的yolov5-v5.0的话,上面的源码和模型也要对应版本。
都下载完后进入yolov5的目录
将里面的gen_wts.py文件复制到你yolov5的目录下
在此目录下打开终端生成.wts文件
python3 gen_wts.py --w yolov5s.pt
生成完毕后回到这个目录
mkdir build && cd build
cmake ..
make
然后把刚刚生成的.wts文件拉到这个build文件下
sudo ./yolov5 -s yolov5s.wts yolov5s.engine s
sudo ./yolov5 -d yolov5s.engine ../sample
如果要开启摄像头的话将yolov5.cpp的代码换成如下
- #include <iostream>
- #include <chrono>
- #include "cuda_utils.h"
- #include "logging.h"
- #include "common.hpp"
- #include "utils.h"
- #include "calibrator.h"
-
- #define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
- #define DEVICE 0 // GPU id
- #define NMS_THRESH 0.4
- #define CONF_THRESH 0.5
- #define BATCH_SIZE 1
-
- // stuff we know about the network and the input/output blobs
- static const int INPUT_H = Yolo::INPUT_H;
- static const int INPUT_W = Yolo::INPUT_W;
- static const int CLASS_NUM = Yolo::CLASS_NUM;
- static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
- const char* INPUT_BLOB_NAME = "data";
- const char* OUTPUT_BLOB_NAME = "prob";
- static Logger gLogger;
-
- //修改为自己的类别
- char *my_classes[]={ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
- "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
- "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
- "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard","surfboard",
- "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
- "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
- "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
- "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
- "hair drier", "toothbrush" };
-
- static int get_width(int x, float gw, int divisor = 8) {
- //return math.ceil(x / divisor) * divisor
- if (int(x * gw) % divisor == 0) {
- return int(x * gw);
- }
- return (int(x * gw / divisor) + 1) * divisor;
- }
-
- static int get_depth(int x, float gd) {
- if (x == 1) {
- return 1;
- }
- else {
- return round(x * gd) > 1 ? round(x * gd) : 1;
- }
- }
- //#创建engine和network
- ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
- INetworkDefinition* network = builder->createNetworkV2(0U);
-
- // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
- ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
- assert(data);
-
- std::map<std::string, Weights> weightMap = loadWeights(wts_name);
-
- /* ------ yolov5 backbone------ */
- auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
- auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
- auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
- auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
- auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
- auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
- auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
- auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
- auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8");
-
- /* ------ yolov5 head ------ */
- auto bottleneck_csp9 = C3(network, weightMap, *spp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.9");
- auto conv10 = convBlock(network, weightMap, *bottleneck_csp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
-
- auto upsample11 = network->addResize(*conv10->getOutput(0));
- assert(upsample11);
- upsample11->setResizeMode(ResizeMode::kNEAREST);
- upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
-
- ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
- auto cat12 = network->addConcatenation(inputTensors12, 2);
- auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
- auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
-
- auto upsample15 = network->addResize(*conv14->getOutput(0));
- assert(upsample15);
- upsample15->setResizeMode(ResizeMode::kNEAREST);
- upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
-
- ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
- auto cat16 = network->addConcatenation(inputTensors16, 2);
-
- auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
-
- // yolo layer 0
- IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
- auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
- ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
- auto cat19 = network->addConcatenation(inputTensors19, 2);
- auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
- //yolo layer 1
- IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
- auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
- ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
- auto cat22 = network->addConcatenation(inputTensors22, 2);
- auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
- IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
-
- auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
- yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
- network->markOutput(*yolo->getOutput(0));
-
- // Build engine
- builder->setMaxBatchSize(maxBatchSize);
- config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
- #if defined(USE_FP16)
- config->setFlag(BuilderFlag::kFP16);
- #elif defined(USE_INT8)
- std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
- assert(builder->platformHasFastInt8());
- config->setFlag(BuilderFlag::kINT8);
- Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
- config->setInt8Calibrator(calibrator);
- #endif
-
- std::cout << "Building engine, please wait for a while..." << std::endl;
- ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
- std::cout << "Build engine successfully!" << std::endl;
-
- // Don't need the network any more
- network->destroy();
-
- // Release host memory
- for (auto& mem : weightMap)
- {
- free((void*)(mem.second.values));
- }
-
- return engine;
- }
-
- ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float& gd, float& gw, std::string& wts_name) {
- INetworkDefinition* network = builder->createNetworkV2(0U);
-
- // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
- ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
- assert(data);
-
- std::map<std::string, Weights> weightMap = loadWeights(wts_name);
-
- /* ------ yolov5 backbone------ */
- auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0");
- auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
- auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
- auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
- auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4");
- auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
- auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
- auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
- auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
- auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
- auto spp10 = SPP(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), 3, 5, 7, "model.10");
- auto c3_11 = C3(network, weightMap, *spp10->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.11");
-
- /* ------ yolov5 head ------ */
- auto conv12 = convBlock(network, weightMap, *c3_11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
- auto upsample13 = network->addResize(*conv12->getOutput(0));
- assert(upsample13);
- upsample13->setResizeMode(ResizeMode::kNEAREST);
- upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
- ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
- auto cat14 = network->addConcatenation(inputTensors14, 2);
- auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
-
- auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
- auto upsample17 = network->addResize(*conv16->getOutput(0));
- assert(upsample17);
- upsample17->setResizeMode(ResizeMode::kNEAREST);
- upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
- ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
- auto cat18 = network->addConcatenation(inputTensors18, 2);
- auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
-
- auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
- auto upsample21 = network->addResize(*conv20->getOutput(0));
- assert(upsample21);
- upsample21->setResizeMode(ResizeMode::kNEAREST);
- upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
- ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
- auto cat22 = network->addConcatenation(inputTensors21, 2);
- auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
-
- auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
- ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
- auto cat25 = network->addConcatenation(inputTensors25, 2);
- auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
-
- auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
- ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
- auto cat28 = network->addConcatenation(inputTensors28, 2);
- auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
-
- auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
- ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
- auto cat31 = network->addConcatenation(inputTensors31, 2);
- auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
-
- /* ------ detect ------ */
- IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
- IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
- IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
- IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
-
- auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
- yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
- network->markOutput(*yolo->getOutput(0));
-
- // Build engine
- builder->setMaxBatchSize(maxBatchSize);
- config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
- #if defined(USE_FP16)
- config->setFlag(BuilderFlag::kFP16);
- #elif defined(USE_INT8)
- std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
- assert(builder->platformHasFastInt8());
- config->setFlag(BuilderFlag::kINT8);
- Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
- config->setInt8Calibrator(calibrator);
- #endif
-
- std::cout << "Building engine, please wait for a while..." << std::endl;
- ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
- std::cout << "Build engine successfully!" << std::endl;
-
- // Don't need the network any more
- network->destroy();
-
- // Release host memory
- for (auto& mem : weightMap)
- {
- free((void*)(mem.second.values));
- }
-
- return engine;
- }
-
- void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, float& gd, float& gw, std::string& wts_name) {
- // Create builder
- IBuilder* builder = createInferBuilder(gLogger);
- IBuilderConfig* config = builder->createBuilderConfig();
-
- // Create model to populate the network, then set the outputs and create an engine
- ICudaEngine* engine = build_engine(maxBatchSize, builder, config, DataType::kFLOAT, gd, gw, wts_name);
- assert(engine != nullptr);
-
- // Serialize the engine
- (*modelStream) = engine->serialize();
-
- // Close everything down
- engine->destroy();
- builder->destroy();
- config->destroy();
- }
-
- void doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, int batchSize) {
- // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
- CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
- context.enqueue(batchSize, buffers, stream, nullptr);
- CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
- cudaStreamSynchronize(stream);
- }
-
- bool parse_args(int argc, char** argv, std::string& engine) {
- if (argc < 3) return false;
- if (std::string(argv[1]) == "-v" && argc == 3) {
- engine = std::string(argv[2]);
- }
- else {
- return false;
- }
- return true;
- }
-
- int main(int argc, char** argv) {
- cudaSetDevice(DEVICE);
-
- //std::string wts_name = "";
- std::string engine_name = "";
- //float gd = 0.0f, gw = 0.0f;
- //std::string img_dir;
-
- if (!parse_args(argc, argv, engine_name)) {
- std::cerr << "arguments not right!" << std::endl;
- std::cerr << "./yolov5 -v [.engine] // run inference with camera" << std::endl;
- return -1;
- }
-
- std::ifstream file(engine_name, std::ios::binary);
- if (!file.good()) {
- std::cerr << " read " << engine_name << " error! " << std::endl;
- return -1;
- }
- char* trtModelStream{ nullptr };
- size_t size = 0;
- file.seekg(0, file.end);
- size = file.tellg();
- file.seekg(0, file.beg);
- trtModelStream = new char[size];
- assert(trtModelStream);
- file.read(trtModelStream, size);
- file.close();
-
-
- // prepare input data ---------------------------
- static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
- //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
- // data[i] = 1.0;
- static float prob[BATCH_SIZE * OUTPUT_SIZE];
- IRuntime* runtime = createInferRuntime(gLogger);
- assert(runtime != nullptr);
- ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
- assert(engine != nullptr);
- IExecutionContext* context = engine->createExecutionContext();
- assert(context != nullptr);
- delete[] trtModelStream;
- assert(engine->getNbBindings() == 2);
- void* buffers[2];
- // In order to bind the buffers, we need to know the names of the input and output tensors.
- // Note that indices are guaranteed to be less than IEngine::getNbBindings()
- const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
- const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
- assert(inputIndex == 0);
- assert(outputIndex == 1);
- // Create GPU buffers on device
- CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
- CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
- // Create stream
- cudaStream_t stream;
- CUDA_CHECK(cudaStreamCreate(&stream));
-
- //#读取本地视频
- //cv::VideoCapture capture("/home/nano/Videos/video.mp4");
- //#调用本地usb摄像头,我的默认参数为1,如果1报错,可修改为0.
- cv::VideoCapture capture(1);
- if (!capture.isOpened()) {
- std::cout << "Error opening video stream or file" << std::endl;
- return -1;
- }
-
- int key;
- int fcount = 0;
- while (1)
- {
- cv::Mat frame;
- capture >> frame;
- if (frame.empty())
- {
- std::cout << "Fail to read image from camera!" << std::endl;
- break;
- }
- fcount++;
- //if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
- for (int b = 0; b < fcount; b++) {
- //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
- cv::Mat img = frame;
- if (img.empty()) continue;
- cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
- int i = 0;
- for (int row = 0; row < INPUT_H; ++row) {
- uchar* uc_pixel = pr_img.data + row * pr_img.step;
- for (int col = 0; col < INPUT_W; ++col) {
- data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
- data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
- data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
- uc_pixel += 3;
- ++i;
- }
- }
- }
-
- // Run inference
- auto start = std::chrono::system_clock::now();//#获取模型推理开始时间
- doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
- auto end = std::chrono::system_clock::now();//#结束时间
- //std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
- int fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
- std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
- for (int b = 0; b < fcount; b++) {
- auto& res = batch_res[b];
- nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
- }
- for (int b = 0; b < fcount; b++) {
- auto& res = batch_res[b];
- //std::cout << res.size() << std::endl;
- //cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
- for (size_t j = 0; j < res.size(); j++) {
- cv::Rect r = get_rect(frame, res[j].bbox);
- cv::rectangle(frame, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
- std::string label = my_classes[(int)res[j].class_id];
- cv::putText(frame, label, cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
- std::string jetson_fps = "FPS: " + std::to_string(fps);
- cv::putText(frame, jetson_fps, cv::Point(11, 80), cv::FONT_HERSHEY_PLAIN, 3, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
- }
- //cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
- }
- cv::imshow("yolov5", frame);
- key = cv::waitKey(1);
- if (key == 'q') {
- break;
- }
- fcount = 0;
- }
-
- capture.release();
- // Release stream and buffers
- cudaStreamDestroy(stream);
- CUDA_CHECK(cudaFree(buffers[inputIndex]));
- CUDA_CHECK(cudaFree(buffers[outputIndex]));
- // Destroy the engine
- context->destroy();
- engine->destroy();
- runtime->destroy();
-
- return 0;
- }
修改完代码后
- make
- sudo ./yolov5 -v yolov5s.engine
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。