赞
踩
学习本篇文章后你会:部署yolov5-6.0版本、模型转换(.pt->.wts->.engine)、将yolov5-6.0封装动态库并得到深度学习推理结果,最后在qt或其他项目中调用使用。
官网下载
网址:
https://github.com/ultralytics/yolov5/tree/v6.0
操作:
点击"Code"下的"Download ZIP"
下载得到yolov5-6.0.zip压缩文件
官网下载
网址:
https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v6.0
操作:
点击"Code"下的"Download ZIP"
下载得到tensorrtx-yolov5-v6.0.zip压缩文件
官网下载
网址:
https://github.com/ultralytics/yolov5/tree/v6.0
向页面查找"YOLOv5s"
YOLOv5n 640 28.4 46.0 45 6.3 0.6 1.9 4.5
YOLOv5s 640 37.2 56.0 98 6.4 0.9 7.2 16.5
YOLOv5m 640 45.2 63.9 224 8.2 1.7 21.2 49.0
YOLOv5l 640 48.8 67.2 430 10.1 2.7 46.5 109.1
YOLOv5x 640 50.7 68.9 766 12.1 4.8 86.7 205.7
YOLOv5n6 1280 34.0 50.7 153 8.1 2.1 3.2 4.6
YOLOv5s6 1280 44.5 63.0 385 8.2 3.6 16.8 12.6
YOLOv5m6 1280 51.0 69.0 887 11.1 6.8 35.7 50.0
YOLOv5l6 1280 53.6 71.6 1784 15.8 10.5 76.8 111.4
操作:
点击"YOLOv5s"即可下载yolov5s.pt文件
新建"TensorRT"目录
mkdir TensorRT
"yolov5-6.0.zip"放入TensorRT目录
"tensorrtx-yolov5-v6.0.zip"放入TensorRT目录
"yolov5s.pt"放入TensorRT目录
存放好后将压缩文件解压即可
sudo apt update && \
sudo apt upgrade -y && \
sudo apt install -y build-essential cmake pip
参考我的博客: 『heqingchun-ubuntu系统下安装nvidia显卡驱动3种方法』
参考我的博客: 『heqingchun-ubuntu系统下安装cuda与cudnn』
参考我的博客: 『heqingchun-ubuntu使用TensorRT配置』
先设置pip加速,要不然非常慢: Ubuntu系统+设置pip加速
安装pytorch前设置环境变量
打开
sudo gedit /etc/profile
写入
export PATH=/home/heqingchun/.local/bin:$PATH
更新
source /etc/profile
参考我的博客: ubuntu开发环境配置(cuda、cudnn、ffmpeg、opencv、darknet-master、TensorRT、python、pytorch、MySql、qt(armv8交叉编译))
第"九"项
pip3 install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
解压下载的“yolov5-6.0.zip”压缩文件,进入目录
unzip yolov5-6.0.zip
cd yolov5-6.0
执行:
pip3 install -r requirements.txt
如果遇到问题:ERROR: pandas 2.0.3 has requirement python-dateutil>=2.8.2, but you’ll have python-dateutil 2.7.3 which is incompatible.则执行:
pip3 install --upgrade python-dateutil
将yolov5.pt文件与tensorrtx-yolov5-v6.0/yolov5目录下的gen_wts.py文件放置到yolov5-6.0目录下
cd TensorRT
cp yolov5s.pt tensorrtx-yolov5-v6.0/yolov5/gen_wts.py yolov5-6.0
转换
python3 yolov5-6.0/gen_wts.py --w yolov5-6.0/yolov5s.pt --o yolov5s.wts
在TensorRT目录生成yolov5s.wts文件
修改cmake文件
cd TensorRT
gedit tensorrtx-yolov5-v6.0/yolov5/CMakeLists.txt
修改tensorrt头文件与库文件目录为当前正确目录
原:
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
新:
# tensorrt
include_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.4.3.1/include)
link_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.4.3.1/lib)
新建build目录
mkdir -p tensorrtx-yolov5-v6.0/yolov5/build
进入build目录
cd tensorrtx-yolov5-v6.0/yolov5/build
编译
cmake ..
make -j $(nproc)
在build目录会生成可执行文件,可用于转换模型与推理检测。注:当前使用模型文件为官方模型,故检测类型数量为80,如需更改,请在cmake前修改yololayer.h中的num_classes 的数量。
转换
./yolov5 -s ../../../yolov5s.wts yolov5s.engine s
等待一会就会发现已经成功转换出yolov5s.engine文件了。
cd TensorRT/tensorrtx-yolov5-v6.0/yolov5/build/
./yolov5 -d yolov5s.engine ../samples
控制台打印信息:
[11/27/2023-09:37:24] [W] [TRT] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
inference time: 3ms
inference time: 3ms
在samples目录中存放有2张图片
可见推理成功,并生成两张图片"_bus.jpg"与"_zidane.jpg"已画框,时间为3ms。
在以上操作均没问题的情况下,目前已经能使用生成的yolov5进行深度学习推理检测了,不过只能在控制台进行命令行操作,接下来进行动态库的封装以及将库嵌入到qt项目中进行测试,以方便后续使用。
cd TensorRT/tensorrtx-yolov5-v6.0
cp -fr yolov5 YOLOv5v60
cd YOLOv5v60
touch YOLOv5v60.cpp
gedit YOLOv5v60.cpp
写入以下内容并保存
#include <iostream>
#include <fstream>
#include <sstream>
#include <string.h>
#include <vector>
#include <chrono>
#include "cuda_runtime_api.h"
#include "logging.h"
#include "common.hpp"
#include "calibrator.h"
#include "cuda_utils.h"
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
#define DEVICE 0 // GPU id
#define BATCH_SIZE 1
using namespace std;
using namespace cv;
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1;
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
typedef struct {
int classid; // 标签名id
cv::Rect rbox; // 框
float conf; // 置信度
} YOLOv5v60Result;
typedef struct {
float *data;
float *prob;
IExecutionContext *exe_context;
void* buffers[2];
cudaStream_t cuda_stream;
int inputIndex;
int outputIndex;
} YOLOv5v60TRTContext;
static inline cv::Mat PreprocessImage(cv::Mat& img, int input_w, int input_h) {
int w, h, x, y;
float r_w = input_w / (img.cols*1.0);
float r_h = input_h / (img.rows*1.0);
if (r_h > r_w) {
w = input_w;
h = r_w * img.rows;
x = 0;
y = (input_h - h) / 2;
} else {
w = r_h * img.cols;
h = input_h;
x = (input_w - w) / 2;
y = 0;
}
cv::Mat re(h, w, CV_8UC3);
cv::cvtColor(img,img,cv::COLOR_RGBA2RGB);
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
return out;
}
extern "C" YOLOv5v60TRTContext* InitEngine(const char* enginePath);
extern "C" void YOLOv5v60TRTDetect(YOLOv5v60TRTContext *trt_ctx,cv::Mat img, std::vector<YOLOv5v60Result>& vYoloresult, float conf_thresh, float nms_thresh);
extern "C" void DeleteYolo(YOLOv5v60TRTContext *trt_ctx);
static void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize)
{
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
extern "C" YOLOv5v60TRTContext* InitEngine(const char* enginePath)
{
size_t size;
char *trtModelStream = NULL;
YOLOv5v60TRTContext * trt_ctx = NULL;
trt_ctx = new YOLOv5v60TRTContext();
trt_ctx->data = new float[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
trt_ctx->prob = new float[BATCH_SIZE * OUTPUT_SIZE];
std::ifstream file(enginePath, std::ios::binary);
if (file.good())
{
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
}
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine *engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
trt_ctx->exe_context = engine->createExecutionContext();
assert(trt_ctx->exe_context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
CUDA_CHECK(cudaMalloc(&trt_ctx->buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&trt_ctx->buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
CUDA_CHECK(cudaStreamCreate(&trt_ctx->cuda_stream));
printf("YOLOv5v60 InitEngine successed\n");
return (YOLOv5v60TRTContext *)trt_ctx;
}
extern "C" void YOLOv5v60TRTDetect(YOLOv5v60TRTContext *trt_ctx,cv::Mat img, std::vector<YOLOv5v60Result>& vYoloresult, float conf_thresh, float nms_thresh)
{
printf("YOLOv5v60TRTDetect start\n");
cv::Mat pr_img = PreprocessImage(img, INPUT_W, INPUT_H);
// letterbox BGR to RGB
int i = 0;
for (int row = 0; row < INPUT_H; ++row)
{
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < INPUT_W; ++col)
{
trt_ctx->data[i] = (float)uc_pixel[2] / 255.0;
trt_ctx->data[i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
trt_ctx->data[i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
++i;
}
}
// Run inference
auto start = std::chrono::system_clock::now();
doInference(*trt_ctx->exe_context, trt_ctx->cuda_stream, trt_ctx->buffers, trt_ctx->data, trt_ctx->prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
std::cout << "检测耗时:" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::vector<Yolo::Detection> batch_res;
nms(batch_res, &trt_ctx->prob[0], conf_thresh, nms_thresh);
for (size_t j = 0; j < batch_res.size(); j++)
{
cv::Rect r = get_rect(img, batch_res[j].bbox);
YOLOv5v60Result yoloresult;
yoloresult.rbox = r;
yoloresult.classid = (int)batch_res[j].class_id;
yoloresult.conf = ((float)(int)((batch_res[j].conf + 0.005) * 100)) / 100;
vYoloresult.push_back(yoloresult);
}
}
extern "C" void DeleteYolo(YOLOv5v60TRTContext *trt_ctx)
{
cudaStreamDestroy(trt_ctx->cuda_stream);
CUDA_CHECK(cudaFree(trt_ctx->buffers[0]));
CUDA_CHECK(cudaFree(trt_ctx->buffers[1]));
trt_ctx->exe_context->destroy();
delete trt_ctx->data;
delete trt_ctx->prob;
delete trt_ctx;
}
修改位置我已做好标记(#heqingchun)。
原文件:
cmake_minimum_required(VERSION 2.6)
project(yolov5)
add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
find_package(CUDA REQUIRED)
if(WIN32)
enable_language(CUDA)
endif(WIN32)
include_directories(${PROJECT_SOURCE_DIR}/include)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.5.3.1/include)
link_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.5.3.1/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -g -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
cuda_add_library(myplugins SHARED yololayer.cu)
target_link_libraries(myplugins nvinfer cudart)
find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})
cuda_add_executable(yolov5 calibrator.cpp yolov5.cpp preprocess.cu)
target_link_libraries(yolov5 nvinfer)
target_link_libraries(yolov5 cudart)
target_link_libraries(yolov5 myplugins)
target_link_libraries(yolov5 ${OpenCV_LIBS})
if(UNIX)
add_definitions(-O2 -pthread)
endif(UNIX)
修改后文件:
cmake_minimum_required(VERSION 2.6)
#heqingchun project(yolov5)
#heqingchun
project(YOLOv5v60)
add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
find_package(CUDA REQUIRED)
if(WIN32)
enable_language(CUDA)
endif(WIN32)
include_directories(${PROJECT_SOURCE_DIR}/include)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.5.3.1/include)
link_directories(/home/heqingchun/soft/TensorRT/TensorRT-8.5.3.1/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -g -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
#heqingchun cuda_add_library(myplugins SHARED yololayer.cu)
#heqingchun target_link_libraries(myplugins nvinfer cudart)
#heqingchun
cuda_add_library(YOLOv5v60 SHARED yololayer.cu YOLOv5v60.cpp)
find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})
#heqingchun cuda_add_executable(yolov5 calibrator.cpp yolov5.cpp preprocess.cu)
#heqingchun target_link_libraries(yolov5 nvinfer)
#heqingchun target_link_libraries(yolov5 cudart)
#heqingchun target_link_libraries(yolov5 myplugins)
#heqingchun target_link_libraries(yolov5 ${OpenCV_LIBS})
#heqingchun
target_link_libraries(YOLOv5v60 nvinfer)
target_link_libraries(YOLOv5v60 cudart)
target_link_libraries(YOLOv5v60 ${OpenCV_LIBS})
if(UNIX)
add_definitions(-O2 -pthread)
endif(UNIX)
修改后保存
进入build目录,将文件全部删除
cd build
rm -fr *
编译
cmake ..
make -j $(nproc)
结束会在build目录生成"libYOLOv5v60.so"动态库文件
打开qt,新建mainwindow桌面程序,ui界面增加一个按钮,并准备好槽函数,qt项目文件如下:
QT += core gui
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
CONFIG += c++11
# The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS
# You can also make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp \
mainwindow.cpp
HEADERS += \
mainwindow.h
FORMS += \
mainwindow.ui
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target
INCLUDEPATH += /usr/local/include/opencv4
unix|win32: LIBS += -L/usr/local/lib/ -lopencv_world
INCLUDEPATH += /usr/local/cuda/include
unix|win32: LIBS += -L/usr/local/cuda/lib64/ -lcudart
INCLUDEPATH += /home/heqingchun/soft/TensorRT/TensorRT-8.5.3.1/include
unix|win32: LIBS += -L$$PWD/../../../soft/TensorRT/TensorRT-8.5.3.1/lib/ -lnvinfer
INCLUDEPATH += /home/heqingchun/soft/TensorRT/tensorrtx-yolov5-v6.0/YOLOv5v60
unix|win32: LIBS += -L$$PWD/../../../soft/TensorRT/tensorrtx-yolov5-v6.0/YOLOv5v60/build/ -lYOLOv5v60
#include "mainwindow.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
return a.exec();
}
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
#include <QDebug>
QT_BEGIN_NAMESPACE
namespace Ui { class MainWindow; }
QT_END_NAMESPACE
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
MainWindow(QWidget *parent = nullptr);
~MainWindow();
private slots:
void on_pushButton_clicked();
private:
Ui::MainWindow *ui;
};
#endif // MAINWINDOW_H
#include "mainwindow.h"
#include "ui_mainwindow.h"
#include "YOLOv5v60.cpp"
extern "C" YOLOv5v60TRTContext* InitEngine(const char* enginePath);
extern "C" void YOLOv5v60TRTDetect(YOLOv5v60TRTContext *trt_ctx,cv::Mat img, std::vector<YOLOv5v60Result>& vYoloresult, float conf_thresh, float nms_thresh);
extern "C" void DeleteYolo(YOLOv5v60TRTContext *trt_ctx);
YOLOv5v60TRTContext * trt_ctx = NULL;
vector<YOLOv5v60Result> vYoloresult;
cv::Mat mat;
QStringList nameList = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
MainWindow::MainWindow(QWidget *parent)
: QMainWindow(parent)
, ui(new Ui::MainWindow)
{
ui->setupUi(this);
std::string modePath = "/home/heqingchun/soft/TensorRT/tensorrtx-yolov5-v6.0/yolov5/build/yolov5s.engine";
trt_ctx = InitEngine(modePath.data());
}
MainWindow::~MainWindow()
{
delete ui;
}
void MainWindow::on_pushButton_clicked()
{
mat = cv::imread("/home/heqingchun/soft/TensorRT/tensorrtx-yolov5-v6.0/yolov5/samples/bus.jpg");
YOLOv5v60TRTDetect(trt_ctx,mat,vYoloresult,0.6,0.45);
for (unsigned long i = 0; i < vYoloresult.size(); i++) {
YOLOv5v60Result result = vYoloresult[i];
QString classId =nameList[result.classid];
qDebug()<<"current classes:"<<classId<<"conf:"<<result.conf<<result.rbox.x<<result.rbox.y<<result.rbox.width<<result.rbox.height;
// 画框
cv::rectangle(mat,cv::Point(result.rbox.x,result.rbox.y),cv::Point(result.rbox.x + result.rbox.width,result.rbox.y + result.rbox.height),cv::Scalar(0,0,255));
// 写字
cv::putText(mat,classId.toStdString() + ":" + QString::number(result.conf).toStdString(),cv::Point(result.rbox.x,result.rbox.y),5,1,cv::Scalar(0,0,255));
}
cv::imshow("结果",mat);
mat.release();
vYoloresult.clear();
}
Ubuntu系统+x86架构+配置编译安装使用yolov5-6.0+带有TensorRT硬件加速+封装动态库+C++部署+Qt-完毕
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。