赞
踩
前文 PaddleSeg c++部署OCRNet+HRNet模型中的语义分割模型输出为float32
类型,模型不含softmax和argmax处理,导致在项目应用过程中后处理耗时较高。
通过PaddleSeg/tools/export.py在网络末端增加softmax和argmax算子,解决应用中的后处理耗时问题。
参考文档PaddleSeg/docs/model_export_cn.md导出预测模型。将导出的预测模型文件保存在output/inference_model
文件夹中,如下。模型输出类型为int32
。
./output/inference_model
├── deploy.yaml # 部署相关的配置文件,主要说明数据预处理的方式
├── model.pdmodel # 预测模型的拓扑结构文件
├── model.pdiparams # 预测模型的权重文件
└── model.pdiparams.info # 参数额外信息,一般无需关注网络输出类型为int32。
python tools/export.py \
--config configs\ocrnet\ocrnet_hrnetw18_cityscapes_1024x512_160k_lovasz_softmax.yml\
--model_path output\iter_12000\model.pdparams \
--save_dir output\inference_model
--output_op argmax
PaddleSeg v2.0
以前export.py
中不含argmax
和softmax
参数选项,可通过以下代码在模型末端增加softmax
和argmax
算子。
import argparse import os import paddle import yaml from paddleseg.cvlibs import Config from paddleseg.utils import logger def parse_args(): parser = argparse.ArgumentParser(description='Model export.') # params of training parser.add_argument( "--config", dest="cfg", help="The config file.", default=None, type=str, required=True) parser.add_argument( '--save_dir', dest='save_dir', help='The directory for saving the model snapshot', type=str, default='./output') parser.add_argument( '--model_path', dest='model_path', help='The path of model for evaluation', type=str, default=None) return parser.parse_args() class SavedSegmentationNet(paddle.nn.Layer): def __init__(self, net, without_argmax=False, with_softmax=False): super().__init__() self.net = net self.post_processer = PostPorcesser(without_argmax, with_softmax) def forward(self, x): outs = self.net(x) outs = self.post_processer(outs) return outs class PostPorcesser(paddle.nn.Layer): def __init__(self, without_argmax, with_softmax): super().__init__() self.without_argmax = without_argmax self.with_softmax = with_softmax def forward(self, outs): new_outs = [] for out in outs: if self.with_softmax: out = paddle.nn.functional.softmax(out, axis=1) if not self.without_argmax: out = paddle.argmax(out, axis=1) new_outs.append(out) return new_outs def main(args): os.environ['PADDLESEG_EXPORT_STAGE'] = 'True' cfg = Config(args.cfg) net = cfg.model if args.model_path: para_state_dict = paddle.load(args.model_path) net.set_dict(para_state_dict) logger.info('Loaded trained params of model successfully.') # 增加softmax、argmax处理 new_net = SavedSegmentationNet(net, True,True) new_net.eval() new_net = paddle.jit.to_static( new_net, input_spec=[ paddle.static.InputSpec( shape=[None, 3, None, None], dtype='float32') ]) save_path = os.path.join(args.save_dir, 'model') paddle.jit.save(new_net, save_path) yml_file = os.path.join(args.save_dir, 'deploy.yaml') with open(yml_file, 'w') as file: transforms = cfg.export_config.get('transforms', [{ 'type': 'Normalize' }]) data = { 'Deploy': { 'transforms': transforms, 'model': 'model.pdmodel', 'params': 'model.pdiparams' } } yaml.dump(data, file) logger.info(f'Model is saved in {args.save_dir}.') if __name__ == '__main__': args = parse_args() main(args)
参考文档 PaddleSeg/docs/model_export_onnx_cn.md
参考文档Paddle2ONNX
(1)安装Paddle2ONNX
pip install paddle2onnx
(2)模型转换
执行如下命令,使用Paddle2ONNX
将output/inference_model
文件夹中的预测模型导出为ONNX格式模型。将导出的预测模型文件保存为model.onnx
。
paddle2onnx --model_dir output/inference_model \
--model_filename model.pdmodel \
--params_filename model.pdiparams \
--opset_version 12 \
--save_file model.onnx \
--enable_dev_version True
由于是动态输入,因此指定了输入尺寸范围和最优尺寸。将导出的预测模型文件保存为model.trt
。
trtexec.exe
--onnx=model.onnx
--explicitBatch --fp16
--minShapes=x:1x3x540x960
--optShapes=x:1x3x720x1280
--maxShapes=x:1x3x1080x1920
--saveEngine=model.trt
namespace TRTSegmentation { class Logger : public nvinfer1::ILogger { public: Logger(Severity severity = Severity::kWARNING) : severity_(severity) {} virtual void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= severity_) { //std::cout << msg << std::endl; } } nvinfer1::ILogger& getTRTLogger() noexcept { return *this; } private: Severity severity_; }; struct InferDeleter { template <typename T> void operator()(T* obj) const { delete obj; } }; template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>; class LaneSegInferTRT { public: LaneSegInferTRT(const std::string seg_model_dir = "") { this->seg_model_dir_ = seg_model_dir; InitPredictor(); } ~LaneSegInferTRT() { cudaFree(bindings_[0]); cudaFree(bindings_[1]); } void PredictSeg( const cv::Mat &image_mat, std::vector<PaddleSegmentation::DataLane> &solLanes /*实线*/, std::vector<PaddleSegmentation::DataLane> &dasLanes /*虚线*/, std::vector<double>* times = nullptr); private: void InitPredictor(); // Preprocess image and copy data to input buffer cv::Mat Preprocess(const cv::Mat& image_mat); // Postprocess image void Postprocess(int rows, int cols, std::vector<int> &out_data, std::vector<PaddleSegmentation::DataLane> &solLanes, std::vector<PaddleSegmentation::DataLane> &dasLanes); private: //static const int num_classes_ = 15; std::shared_ptr<nvinfer1::ICudaEngine> mEngine_; SampleUniquePtr<nvinfer1::IExecutionContext> context_seg_lane_; std::vector<void*> bindings_; std::string seg_model_dir_; int gpuMaxBufSize = 1280 * 720; // output }; }//namespace PaddleSegmentation
#include "LaneSegInferTRT.hpp" namespace { class Logger : public nvinfer1::ILogger { public: Logger(Severity severity = Severity::kWARNING) : severity_(severity) {} virtual void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= severity_) { //std::cout << msg << std::endl; } } nvinfer1::ILogger& getTRTLogger() noexcept { return *this; } private: Severity severity_; }; } namespace TRTSegmentation { #define CHECK(status) \ do \ { \ auto ret = (status); \ if (ret != 0) \ { \ std::cerr << "Cuda failure: " << ret << std::endl; \ } \ } while (0) void LaneSegInferTRT::InitPredictor() { if (seg_model_dir_.empty()) { throw "Predictor must receive seg_model!"; } std::ifstream ifs(seg_model_dir_, std::ifstream::binary); if (!ifs) { throw "seg_model_dir error!"; } ifs.seekg(0, std::ios_base::end); int size = ifs.tellg(); ifs.seekg(0, std::ios_base::beg); std::unique_ptr<char> pData(new char[size]); ifs.read(pData.get(), size); ifs.close(); // engine模型 Logger logger(nvinfer1::ILogger::Severity::kVERBOSE); SampleUniquePtr<nvinfer1::IRuntime> runtime{nvinfer1::createInferRuntime(logger.getTRTLogger()) }; mEngine_ = std::shared_ptr<nvinfer1::ICudaEngine>( runtime->deserializeCudaEngine(pData.get(), size), InferDeleter()); this->context_seg_lane_ = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine_->createExecutionContext()); bindings_.resize(mEngine_->getNbBindings()); CHECK(cudaMalloc(&bindings_[0], sizeof(float) * 3 * gpuMaxBufSize)); // n*3*h*w CHECK(cudaMalloc(&bindings_[1], sizeof(int) * 1 * gpuMaxBufSize)); // n*1*h*w } cv::Mat LaneSegInferTRT::Preprocess(const cv::Mat& image_mat) { cv::Mat img; cv::cvtColor(image_mat, img, cv::COLOR_BGR2RGB); if (true/*is_normalize*/) { img.convertTo(img, CV_32F, 1.0 / 255, 0); img = (img - 0.5) / 0.5; } return img; } void LaneSegInferTRT::PredictSeg( const cv::Mat &image_mat, std::vector<PaddleSegmentation::DataLane> &solLanes , std::vector<PaddleSegmentation::DataLane> &dasLanes, std::vector<double>* times) { // Preprocess image cv::Mat img = Preprocess(image_mat); int rows = img.rows; int cols = img.cols; this->context_seg_lane_->setBindingDimensions(0, nvinfer1::Dims4{ 1, 3 , rows, cols }); int chs = img.channels(); std::vector<float> input_data(1 * chs * rows * cols, 0.0f); hwc_img_2_chw_data(img, input_data.data()); CHECK(cudaMemcpy(bindings_[0], static_cast<const void*>(input_data.data()), 3 * img.rows * img.cols * sizeof(float), cudaMemcpyHostToDevice)); // Run predictor 推理 context_seg_lane_->executeV2(bindings_.data()); // Get output tensor std::vector<int> out_data(1 * 1 * rows * cols); CHECK(cudaMemcpy(static_cast<void*>(out_data.data()), bindings_[1], out_data.size() * sizeof(int), cudaMemcpyDeviceToHost)); // Postprocessing Postprocess(rows, cols, out_data, solLanes,dasLanes); } void LaneSegInferTRT::Postprocess(int rows, int cols, vector<int>& out_data,std::vector<PaddleSegmentation::DataLane> &solLanes, std::vector<PaddleSegmentation::DataLane> &dasLanes) { PaddleSegmentation::LanePostProcess laneNet(rows, cols); laneNet.lanePostprocessForTRT(out_data,solLanes,dasLanes); } }//namespace PaddleSegmentation
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。