赞
踩
https://github.com/CVHub520/X-AnyLabeling/releases
根据自己的系统选择CPU还是GPU推理以及Linux或者win系统
https://github.com/CVHub520/X-AnyLabeling
在项目中打开终端安装所需的环境依赖:
pip install -r requirements.txt
安装完成后运行app.py
python anylabeling/app.py
注:当直接使用exe运行失败的话,最好就是采用第二种方式,可以通过终端知道报错的原因。
注:由于x-anylabeling是可以使用自己训练后的模型,然后自动生成标注数据的,但是第一次的话就需要自己标注数据。
首次打开可以进行语言的选择
打开需要标注数据的文件夹
点击矩形框或者使用快捷键(R)
直接进行标记并自己定义类
打开左上角“文件”选项,点击自动保存
保存的文件类型是json,你可以自己选择导出的类型。
yolov8训练的标签格式是txt,通常我标记的时候都是选择导出voc(xml格式)。
注:你也可以直接导出yolo标签格式
将标记好的数据进行训练的格式进行划分
import os import random import shutil # 输入文件夹路径和划分比例 folder_path = input("请输入文件夹路径:") train_ratio = float(input("请输入训练集比例:")) # 检查文件夹是否存在 if not os.path.exists(folder_path): print("文件夹不存在!") exit() # 获取所有jpg和txt文件 jpg_files = [file for file in os.listdir(folder_path) if file.endswith(".jpg")] txt_files = [file for file in os.listdir(folder_path) if file.endswith(".txt")] # 检查文件数量是否相等 if len(jpg_files) != len(txt_files): print("图片和标签数量不匹配!") exit() # 打乱文件顺序 random.shuffle(jpg_files) # 划分训练集和验证集 train_size = int(len(jpg_files) * train_ratio) train_jpg = jpg_files[:train_size] train_txt = [file.replace(".jpg", ".txt") for file in train_jpg] val_jpg = jpg_files[train_size:] val_txt = [file.replace(".jpg", ".txt") for file in val_jpg] # 创建文件夹和子文件夹 if not os.path.exists("images/train"): os.makedirs("images/train") if not os.path.exists("images/val"): os.makedirs("images/val") if not os.path.exists("labels/train"): os.makedirs("labels/train") if not os.path.exists("labels/val"): os.makedirs("labels/val") # 复制文件到目标文件夹 for file in train_jpg: shutil.copy(os.path.join(folder_path, file), "images/train") for file in train_txt: shutil.copy(os.path.join(folder_path, file), "labels/train") for file in val_jpg: shutil.copy(os.path.join(folder_path, file), "images/val") for file in val_txt: shutil.copy(os.path.join(folder_path, file), "labels/val") print("处理完成!")
生成images和labels的文件夹
我这里没有加入测试集,只使用了训练集和验证集
训练好自己的模型后
将生成的.onnx和.yaml放在一个路径下
yaml文件的配置,注意这个类不要用数字,会被认定为int型,然后导致无法生成框,也就是报错。这个类的名称和个数一定要与训练的时候进行配置的一样
就是这里面的class names,这里填的什么,那么上面配置的yaml文件也要一样。
yolov8的源代码:
https://github.com/ultralytics/ultralytics
首先安装yolov8运行所依赖的库
pip install ultralytics
根据代码进行
首先下载一个预训练模型:https://docs.ultralytics.com/tasks/detect/
from ultralytics import YOLO
# Load a models
model = YOLO("D:\MyProject\yolov8s.pt") # load a pretrained model (recommended for training)
# Use the model
model.train(data="D:\MyProject\data\myData.yaml", epochs=3) # train the model
metrics = model.val() # evaluate model performance on the validation set
results = model("https://ultralytics.com/images/bus.jpg") # predict on an image
path = model.export(format="onnx") # export the model to ONNX format
将前面标记好的数据放在路径下,配置好myData.yaml,如下:
首先把前面训练好的模型pt通过model.export(format="onnx")
转换成onnx。
工程文件如下:
环境配置(一):需要配置anaconda、opencv、cuda以及tensorRT。
tensorRT的安装与使用:链接
环境配置(二):
环境配置(三):
opencv_world455.lib
myelin64_1.lib
nvinfer.lib
nvonnxparser.lib
nvparsers.lib
nvinfer_plugin.lib
cuda.lib
cudadevrt.lib
cudart_static.lib
由于x64中Release里面的依赖项太大,所以进行了分卷上传
yolov8使用tensorRT部署的环境依赖项(一)
yolov8使用tensorRT部署的环境依赖项(二)
将上面两个压缩包下载后放到一个文件夹里面,直接解压001,就可以将两个压缩包里面的依赖项全部解压出来。把解压的dll所有文件复制到/x64/Release这个路径下。
代码执行:
#include <iostream> #include "logging.h" #include "NvOnnxParser.h" #include "NvInfer.h" #include <fstream> using namespace nvinfer1; using namespace nvonnxparser; static Logger gLogger; int main(int argc, char** argv) { const char* onnx_filename = "D://TR_YOLOV8_DLL//zy_onnx2engine//models//best.onnx"; const char* engine_filename = "D://TR_YOLOV8_DLL//zy_onnx2engine//models//best.engine"; // 1 onnx解析器 IBuilder* builder = createInferBuilder(gLogger); const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); INetworkDefinition* network = builder->createNetworkV2(explicitBatch); nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger); parser->parseFromFile(onnx_filename, static_cast<int>(Logger::Severity::kWARNING)); for (int i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; } std::cout << "successfully load the onnx model" << std::endl; // 2build the engine unsigned int maxBatchSize = 1; builder->setMaxBatchSize(maxBatchSize); IBuilderConfig* config = builder->createBuilderConfig(); config->setMaxWorkspaceSize(1 << 20); //config->setMaxWorkspaceSize(128 * (1 << 20)); // 16MB config->setFlag(BuilderFlag::kFP16); ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); // 3serialize Model IHostMemory* gieModelStream = engine->serialize(); std::ofstream p(engine_filename, std::ios::binary); if (!p) { std::cerr << "could not open plan output file" << std::endl; return -1; } p.write(reinterpret_cast<const char*>(gieModelStream->data()), gieModelStream->size()); gieModelStream->destroy(); std::cout << "successfully generate the trt engine model" << std::endl; return 0; }
完整项目:https://download.csdn.net/download/qq_44747572/88791740
这里的两个工程环境部署都跟上面部署的方式一样。进行重复的动作即可
// Xray_test.cpp : 定义控制台应用程序的入口点。 #define _AFXDLL #include <iomanip> #include <string> #include <fstream> #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include <io.h> #include "segmentationModel.h" // stuff we know about the network and the input/output blobs #define input_h 640 #define input_w 640 #define channel 3 #define classe 2 // 80个类 #define segWidth 160 #define segHeight 160 #define segChannels 32 #define Num_box 34000 //8400 1280 33600 MODELDLL predictClasse; #pragma comment(lib, "..//x64//Release//segmentationModel.lib") using namespace cv; using namespace std; int main(int argc, char** argv) { //检测测试 string engine_filename = "best.engine"; string img_filename = "imgs//"; predictClasse.LoadYoloV8DetectEngine(engine_filename); string pattern_jpg = img_filename + "*.jpg"; // test_images vector<cv::String> image_files; glob(pattern_jpg, image_files); vector<ObjectTR> output; float confTh = 0.25; for (int i = 0; i < image_files.size(); i++) { Mat src = imread(image_files[i], 1); Mat dst; clock_t start = clock(); predictClasse.YoloV8DetectPredict(src, dst, channel, classe, input_h, input_w, Num_box, confTh, output); clock_t end = clock(); std::cout << "总时间:" << end - start << "ms" << std::endl; cv::namedWindow("output.jpg", 0); cv::imshow("output.jpg", dst); cv::waitKey(0); } 分割测试 //string engine_filename = "yolov8n-seg.engine"; //string img_filename = "imgs//bus.jpg"; //predictClasse.LoadYoloV8SegEngine(engine_filename); //Mat src = imread(img_filename, 1); //Mat dst; //predictClasse.YoloV8SegPredict(src, dst, channel, classe, input_h, input_w, segChannels, segWidth, segHeight, Num_box); //cv::imshow("output.jpg", dst); //cv::waitKey(0); return 0; }
#include "pch.h" #include "segmentationModel.h" #define DEVICE 0 // GPU id static const float CONF_THRESHOLD = 0.25; static const float NMS_THRESHOLD = 0.5; static const float MASK_THRESHOLD = 0.5; const char* INPUT_BLOB_NAME = "images"; const char* OUTPUT_BLOB_NAME = "output0";//detect const char* OUTPUT_BLOB_NAME1 = "output1";//mask static Logger gLogger; IRuntime* runtimeYolov8Seg; ICudaEngine* engineYolov8Seg; IExecutionContext* contextYolov8Seg; MODELDLL::MODELDLL() { } MODELDLL::~MODELDLL() { } //yolov8检测推理 bool MODELDLL::LoadYoloV8DetectEngine(const std::string& engineName) { // create a model using the API directly and serialize it to a stream char* trtModelStream{ nullptr }; //char* trtModelStream==nullptr; 开辟空指针后 要和new配合使用,比如 trtModelStream = new char[size] size_t size{ 0 };//与int固定四个字节不同有所不同,size_t的取值range是目标平台下最大可能的数组尺寸,一些平台下size_t的范围小于int的正数范围,又或者大于unsigned int. 使用Int既有可能浪费,又有可能范围不够大。 std::ifstream file(engineName, std::ios::binary); if (file.good()) { std::cout << "load engine success" << std::endl; file.seekg(0, file.end);//指向文件的最后地址 size = file.tellg();//把文件长度告诉给size //std::cout << "\nfile:" << argv[1] << " size is"; //std::cout << size << ""; file.seekg(0, file.beg);//指回文件的开始地址 trtModelStream = new char[size];//开辟一个char 长度是文件的长度 assert(trtModelStream);// file.read(trtModelStream, size);//将文件内容传给trtModelStream file.close();//关闭 } else { std::cout << "load engine failed" << std::endl; return 1; } runtimeYolov8Seg = createInferRuntime(gLogger); assert(runtimeYolov8Seg != nullptr); bool didInitPlugins = initLibNvInferPlugins(nullptr, ""); engineYolov8Seg = runtimeYolov8Seg->deserializeCudaEngine(trtModelStream, size, nullptr); assert(engineYolov8Seg != nullptr); contextYolov8Seg = engineYolov8Seg->createExecutionContext(); assert(contextYolov8Seg != nullptr); delete[] trtModelStream; return true; } bool MODELDLL::YoloV8DetectPredict(const Mat& src, Mat& dst, const int& channel, const int& classe, const int& input_h, const int& input_w, const int& Num_box, float& CONF_THRESHOLD, vector<ObjectTR>& output) { cudaSetDevice(DEVICE); if (src.empty()) { std::cout << "image load faild" << std::endl; return 1; } int img_width = src.cols; int img_height = src.rows; std::cout << "宽高:" << img_width << " " << img_height << std::endl; // Subtract mean from image float* data = new float[channel * input_h * input_w]; Mat pr_img0, pr_img; std::vector<int> padsize; Mat tempImg = src.clone(); pr_img = preprocess_img(tempImg, input_h, input_w, padsize); // Resize int newh = padsize[0], neww = padsize[1], padh = padsize[2], padw = padsize[3]; float ratio_h = (float)src.rows / newh; float ratio_w = (float)src.cols / neww; int i = 0;// [1,3,INPUT_H,INPUT_W] //std::cout << "pr_img.step" << pr_img.step << std::endl; clock_t start_p = clock(); for (int row = 0; row < input_h; ++row) { uchar* uc_pixel = pr_img.data + row * pr_img.step;//pr_img.step=widthx3 就是每一行有width个3通道的值 for (int col = 0; col < input_w; ++col) { data[i] = (float)uc_pixel[2] / 255.0; data[i + input_h * input_w] = (float)uc_pixel[1] / 255.0; data[i + 2 * input_h * input_w] = (float)uc_pixel[0] / 255.; uc_pixel += 3; ++i; } } //优化一:从30多ms降速到20多,仅提速10ms左右,效果不明显 //#pragma omp parallel for // for (int row = 0; row < input_h; ++row) { // const uchar* uc_pixel = pr_img.data + row * pr_img.step; // int i = row * input_w; // for (int col = 0; col < input_w; ++col) { // float r = static_cast<float>(uc_pixel[2]) / 255.0f; // float g = static_cast<float>(uc_pixel[1]) / 255.0f; // float b = static_cast<float>(uc_pixel[0]) / 255.0f; // data[i] = r; // data[i + input_h * input_w] = g; // data[i + 2 * input_h * input_w] = b; // uc_pixel += 3; // ++i; // } // } clock_t end_p = clock(); std::cout << "preprocess_img时间:" << end_p - start_p << "ms" << std::endl; // Run inference static const int OUTPUT_SIZE = Num_box * (classe + 4);//output0 float* prob = new float[OUTPUT_SIZE]; //for (int i = 0; i < 10; i++) {//计算10次的推理速度 // auto start = std::chrono::system_clock::now(); // doInference(*context, data, prob, prob1, 1); // auto end = std::chrono::system_clock::now(); // std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl; // } //auto start = std::chrono::system_clock::now(); clock_t start = clock(); //推理 int batchSize = 1; const ICudaEngine& engine = (*contextYolov8Seg).getEngine(); // Pointers to input and output device buffers to pass to engine. // Engine requires exactly IEngine::getNbBindings() number of buffers. assert(engine.getNbBindings() == 3); void* buffers[3]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME); const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。