赞
踩
tensorrt_c++_api 加速推理
上一篇写了tensorrt python 加速,这一篇是关于c++版本的加速,但是由于在预处理没有找到c++ PIL的实现,在精度上复现不了python版本,并且速度也没有python的快,但是将模型做成服务的话,由于python的并发没有c++版本的快,所以在部署时还是使用c++更合适
#pragma once
#include <iostream>
#include <fstream>
#include <sstream>
#include "opencv2/opencv.hpp"
#include "cuda_runtime.h"
#include "torch/script.h"
#include "torch/torch.h"
#include "torch/cuda.h"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "filesystem"
#include <ctime>
#define INPUT_CHANNEL 3
#define IMAGE_WIDTH 224
#define IMAGE_HEIGHT 224
// 实例化记录器,用来捕捉警告信息,并且忽略信息留言
class Logger : public nvinfer1::ILogger{
void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override{
//忽略以下级别信息
if (severity <= nvinfer1::ILogger::Severity::kWARNING){
std::cout << msg << std::endl;
}
}
} logger;
//遍历文件夹图片
void getImageFiles(std::vector<std::string>& fileLists,std::string testPath) {
std::filesystem::path path(testPath);
assert(std::filesystem::exists(path));
std::filesystem::directory_iterator files(path);
for (auto& file : files) {
if (!cv::imread(file.path()).empty())
fileLists.push_back(file.path());
}
}
void prepareImage(cv::Mat &vec_img, float* inputData) {
// std::vector<float> img_mean{0.485, 0.456, 0.406};
// std::vector<float> img_std{0.229, 0.224, 0.225};
std::vector<float> result;
if (!vec_img.data)
std::cout << "error" <<std::endl;
cv::Mat rsz_img, flt_img;
cv::cvtColor(vec_img,rsz_img,cv::COLOR_BGR2RGB);
cv::resize(vec_img, rsz_img, cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT));
for(int i = 0; i < 224; ++i){
for(int j = 0; j < 224; ++j){
std::cout << rsz_img.at<cv::Vec3b>(i,j) << std::endl;
}
}
torch::Tensor img_tensor = torch::from_blob(rsz_img.data, { rsz_img.rows, rsz_img.cols, 3 }, torch::kByte);
img_tensor = img_tensor.permute({ 2, 0, 1 });
img_tensor = img_tensor.to(torch::kF32);
img_tensor = img_tensor.div(255);
img_tensor = img_tensor.unsqueeze(0);
img_tensor[0][0] = img_tensor[0][0].sub_(0.5).div_(0.5);
img_tensor[0][1] = img_tensor[0][1].sub_(0.5).div_(0.5);
img_tensor[0][2] = img_tensor[0][2].sub_(0.5).div_(0.5);
auto imgTensor = img_tensor.accessor<float, 4>();
// 将vector换成数组
for(int channel = 0; channel < 3;++channel){
for(int left = 0; left < 224; ++left){
for(int right = 0; right < 224; ++right){
// result.emplace_back(imgTensor[0][channel][left][right]);
inputData[channel*224*224 + left*224 + right] = imgTensor[0][channel][left][right];
// std::cout << imgTensor[0][channel][left][right] << std::endl;
}
}
}
}
int64_t volume(const nvinfer1::Dims& d)
{
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
unsigned int getElementSize(nvinfer1::DataType t)
{
switch (t)
{
case nvinfer1::DataType::kINT32: return 4;
case nvinfer1::DataType::kFLOAT: return 4;
case nvinfer1::DataType::kHALF: return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8: return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
int returnMax(float a[]){
int length = 10;
float temp;
int flag = 0;
for(int i = 1; i < length;++i){
if(temp < a[i]){
temp = a[i];
flag = i;
}
}
return flag;
}
int getIndex(std::vector<std::string> class_, std::string str){
std::vector<std::string>::iterator begin = class_.begin();
for(int i = 0; i < class_.size(); ++i){
if (class_[i] == str){
return i;
}
}
}
int getResult(std::string image_path, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context){
// 读取图片图片
cv::Mat image = cv::imread(image_path);
assert(!image.empty());
// 图片预处理
float a[3*224*224];
prepareImage(image, a);
// 预测图片
void *buffers[2];
std::vector<int64_t> bufferSize;
int nbindings = engine->getNbBindings();
bufferSize.resize(nbindings);
for(int i = 0;i < nbindings; ++i){
nvinfer1::Dims dims = engine->getBindingDimensions(i);
nvinfer1::DataType dtype = engine->getBindingDataType(i);
int64_t totalSize = volume(dims) * 1 * getElementSize(dtype);
// std::cout << i << " : " << totalSize << std::endl;
bufferSize[i] = totalSize;
cudaMalloc(&buffers[i], totalSize);
}
cudaStream_t stream;
cudaStreamCreate(&stream);
int outSize = bufferSize[1] / sizeof(float);
cudaMemcpyAsync(buffers[0],&a, bufferSize[0],cudaMemcpyHostToDevice,stream);
//注意:下面方法线程不安全
context->execute(1, buffers);
float out[outSize];
cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
cudaFree(buffers[0]);
cudaFree(buffers[1]);
cudaStreamDestroy(stream);
return returnMax(out);
}
int main(){
// 模型路径
std::string model_path = "/data/kile/other/Inception/mobile_net/onnx_/mobilev2_onnx2.trt";
// 定义文件流
std::ifstream inFile(model_path, std::ios_base::in|std::ios_base::binary);
std::string cached_engine = "";
while(inFile.peek() != EOF){
std::stringstream buffer;
buffer << inFile.rdbuf();
cached_engine.append(buffer.str());
}
inFile.close();
// 反序列化模型
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
// 从内存中加载模型获得引擎
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(cached_engine.data(), cached_engine.size(), nullptr);
// 开始推理
// 创建推理上下文
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
// 图片路径
// std::string image_path = "/data/kile/other/Inception/mobile_net/dataset/test_one/airplane/airplane_3.jpg";
// class类别
std::vector<std::string> class_ = {"airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"};
int correct = 0;
int total = 0;
// 计时
clock_t start = clock();
for (auto str:class_){
int classId = getIndex(class_, str);
std::cout << classId << std::endl;
// 遍历图片
std::vector<std::string> fileLists;
// getImageFiles(fileLists, "/data/kile/other/Inception/mobile_net/dataset/test_data/"+str);
getImageFiles(fileLists, "/data/kile/other/Inception/mobile_net/dataset/test_one/bird");
for(auto filePath:fileLists){
int predictClassId = getResult(filePath, engine, context);
total += 1;
std::cout << str << " " << class_[predictClassId] << " " << correct << std::endl;
if(str == class_[predictClassId]){
correct += 1;
}
}
}
clock_t end = clock();
std::cout << "correct :" << (float)correct / total << "time" << (float)(end-start) << std::endl;
return 0;
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。