查看香橙派 AIpro SoC版本
根据上面查看到SoC版本是 310B4,在转换模型时选择Ascend310B4
# -*- coding: utf-8 -*- import torch import torchvision import onnx import onnxruntime import torch.nn as nn # 创建 PyTorch ResNet50 模型实例 #在线下载 #model = torchvision.models.resnet50(pretrained=True) #本地加载 checkpoint_path ="/home/model/resnet50-19c8e357.pth" model = torchvision.models.resnet50().to("cpu") checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu')) model.load_state_dict(checkpoint) model.eval() batch_size = 1 input_shape = (batch_size, 3, 224, 224) input_data = torch.randn(input_shape) # 将模型转换为 ONNX 格式 output_path_static = "resnet_static.onnx" output_path_dynamic = "resnet_dynamic.onnx" # dynamic torch.onnx.export(model, input_data, output_path_dynamic, input_names=["input"], output_names=["output"], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}) #static torch.onnx.export(model, input_data, output_path_static, input_names=["input"], output_names=["output"]) # 简单测试 session = onnxruntime.InferenceSession(output_path_dynamic) new_batch_size = 2 new_input_shape = (new_batch_size, 3, 224, 224) new_input_data = torch.randn(new_input_shape) outputs = session.run(["output"], {"input": new_input_data.numpy()}) print(outputs)
# -*- coding: utf-8 -*- import onnxruntime import numpy as np from torchvision import datasets, models, transforms from PIL import Image import torch.nn as nn import torch def postprocess(outputs): res = list() outputs_exp = np.exp(outputs) outputs = outputs_exp / np.sum(outputs_exp, axis=1)[:,None] predictions = np.argmax(outputs, axis = 1) for pred, output in zip(predictions, outputs): score = output[pred] res.append((pred.tolist(),float(score))) return res onnx_model_path = "/home/model/resnet50_static.onnx" ort_session = onnxruntime.InferenceSession(onnx_model_path) transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) image = Image.open("/home/dog1_1024_683.jpg") image = transform(image).unsqueeze(0) # 增加批处理维度 input_data = image.detach().numpy() outputs_np = ort_session.run(None, {'input': input_data}) outputs = outputs_np[0] res = postprocess(outputs) print(res)
[(162, 0.9634788632392883)]
curl -o imagenet_classes.txt https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
# -*- coding: utf-8 -*- import onnxruntime import numpy as np from torchvision import datasets, models, transforms from PIL import Image import torch.nn as nn import torch from onnx import numpy_helper import time with open("imagenet_classes.txt", "r") as f: categories = [s.strip() for s in f.readlines()] def softmax(x): e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() onnx_model_path = "/home/model/resnet50_static.onnx" ort_session = onnxruntime.InferenceSession(onnx_model_path) transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) image = Image.open("/home/dog1_1024_683.jpg") image = transform(image).unsqueeze(0) # 增加批处理维度 session = onnxruntime.InferenceSession(onnx_model_path, providers=['CPUExecutionProvider']) latency = [] start = time.time() input_arr = image.detach().numpy() output = session.run([], {'input':input_arr})[0] latency.append(time.time() - start) output = output.flatten() output = softmax(output) top5_catid = np.argsort(-output)[:5] for catid in top5_catid: print(catid, categories[catid], output[catid]) print("ONNX Runtime CPU Inference time = {} ms".format(format(sum(latency) * 1000 / len(latency), '.2f')))
162 beagle 0.963479
167 English foxhound 0.020814817
166 Walker hound 0.011742038
161 basset 0.0024754668
164 bluetick 0.0004774033
ONNX Runtime CPU Inference time = 20.01 ms
transforms.Resize 处理方式不同,有的地方是256,有的地方用的是224,
YOLOv8由Ultralytics 提供,YOLOv8 支持全方位的视觉 AI 任务,包括检测、分割、姿态估计、跟踪和分类。
# Parameters nc: 1000 # number of classes scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' # [depth, width, max_channels] n: [0.33, 0.25, 1024] s: [0.33, 0.50, 1024] m: [0.67, 0.75, 1024] l: [1.00, 1.00, 1024] x: [1.00, 1.25, 1024] # YOLOv8.0n backbone backbone: # [from, repeats, module, args] - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2 - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4 - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8 - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16 - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32 # YOLOv8.0n head head: - [-1, 1, Classify, [nc]] # Classify
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation def classify_transforms( size=224, mean=DEFAULT_MEAN, std=DEFAULT_STD, interpolation=Image.BILINEAR, crop_fraction: float = DEFAULT_CROP_FRACTION, ): """ Classification transforms for evaluation/inference. Inspired by timm/data/transforms_factory.py. Args: size (int): image size mean (tuple): mean values of RGB channels std (tuple): std values of RGB channels interpolation (T.InterpolationMode): interpolation mode. default is T.InterpolationMode.BILINEAR. crop_fraction (float): fraction of image to crop. default is 1.0. Returns: (T.Compose): torchvision transforms """ import torchvision.transforms as T # scope for faster 'import ultralytics' if isinstance(size, (tuple, list)): assert len(size) == 2 scale_size = tuple(math.floor(x / crop_fraction) for x in size) else: scale_size = math.floor(size / crop_fraction) scale_size = (scale_size, scale_size) # Aspect ratio is preserved, crops center within image, no borders are added, image is lost if scale_size[0] == scale_size[1]: # Simple case, use torchvision built-in Resize with the shortest edge mode (scalar size arg) tfl = [T.Resize(scale_size[0], interpolation=interpolation)] else: # Resize the shortest edge to matching target dim for non-square target tfl = [T.Resize(scale_size)] tfl += [T.CenterCrop(size)] tfl += [ T.ToTensor(), T.Normalize( mean=torch.tensor(mean), std=torch.tensor(std), ), ] return T.Compose(tfl)
namespace { const float min_chn_0 = 123.675; const float min_chn_1 = 116.28; const float min_chn_2 = 103.53; const float var_reci_chn_0 = 0.0171247538316637; const float var_reci_chn_1 = 0.0175070028011204; const float var_reci_chn_2 = 0.0174291938997821; } Result SampleResnetQuickStart::ProcessInput(const string testImgPath) { // read image from file by cv imagePath = testImgPath; srcImage = imread(testImgPath); Mat resizedImage; // zoom image to modelWidth_ * modelHeight_ resize(srcImage, resizedImage, Size(modelWidth_, modelHeight_)); // get properties of image int32_t channel = resizedImage.channels(); int32_t resizeHeight = resizedImage.rows; int32_t resizeWeight = resizedImage.cols; // data standardization float meanRgb[3] = {min_chn_2, min_chn_1, min_chn_0}; float stdRgb[3] = {var_reci_chn_2, var_reci_chn_1, var_reci_chn_0}; // create malloc of image, which is shape with NCHW imageBytes = (float*)malloc(channel * resizeHeight * resizeWeight * sizeof(float)); memset(imageBytes, 0, channel * resizeHeight * resizeWeight * sizeof(float)); uint8_t bgrToRgb=2; // image to bytes with shape HWC to CHW, and switch channel BGR to RGB for (int c = 0; c < channel; ++c) { for (int h = 0; h < resizeHeight; ++h) { for (int w = 0; w < resizeWeight; ++w) { int dstIdx = (bgrToRgb - c) * resizeHeight * resizeWeight + h * resizeWeight + w; imageBytes[dstIdx] = static_cast<float>((resizedImage.at<cv::Vec3b>(h, w)[c] - 1.0f*meanRgb[c]) * 1.0f*stdRgb[c] ); } } } return SUCCESS; }
img_origin = Image.open(pic_path).convert('RGB')
from torchvision import transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
trans_list = transforms.Compose([transforms.Resize(256),
img = trans_list(img_origin)
(base) HwHiAiUser@orangepiaipro:~/samples/model-adapter-models/cls/edge_infer$ ./run.sh set env successfully!! start exec atc [Sample] init resource stage: Init resource success load model mobilenetv3_100_bs1.om Init model resource [Model] create model output dataset: [Model] create model output dataset success [Model] class Model init resource stage success acl.mdl.execute exhaust 0:00:00.004750 class result : cat pic name: cat pre cost:7050.8ms forward cost:6.8ms post cost:0.0ms total cost:7057.6ms FPS:0.1 image name :./data/cat/cat.23.jpg, infer result: cat acl.mdl.execute exhaust 0:00:00.004660 class result : cat pic name: cat pre cost:14.0ms forward cost:5.2ms post cost:0.0ms total cost:19.2ms FPS:52.2 image name :./data/cat/cat.76.jpg, infer result: cat
Normalize 数值的不同,YOLOv8和PyTorch 是IMAGENET_MEAN 和 IMAGENET_STD
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
namespace {
const float min_chn_0 = 123.675;
const float min_chn_1 = 116.28;
const float min_chn_2 = 103.53;
const float var_reci_chn_0 = 0.0171247538316637;
const float var_reci_chn_1 = 0.0175070028011204;
const float var_reci_chn_2 = 0.0174291938997821;
YOLOv8和PyTorch都有 CenterCrop 中心剪裁处理
根据对比的结果所以我们只要处理IMAGENET_MEAN 和 IMAGENET_STD ,在加上CenterCrop 中心剪裁处理
static const float IMAGENET_MEAN[3] = { 0.485, 0.456, 0.406 }; static const float IMAGENET_STD[3] = { 0.229, 0.224, 0.225 }; void centercrop_and_resize(const cv::Mat& src_img, cv::Mat& dst_img,int target_size) { int height = src_img.rows; int width = src_img.cols; if(height >= width)// hw { cv::resize(src_img, dst_img, cv::Size(target_size,target_size * height / width), 0, 0, cv::INTER_AREA); } else { cv::resize(src_img, dst_img, cv::Size(target_size * width / height,target_size), 0, 0, cv::INTER_AREA); } height = dst_img.rows; width = dst_img.cols; cv::Point center(width/2, height/2); cv::Size size(target_size, target_size); cv::getRectSubPix(dst_img, size, center, dst_img); } Result SampleResnetQuickStart::ProcessInput(const string testImgPath) { // read image from file by cv imagePath = testImgPath; srcImage = imread(testImgPath); cv::cvtColor(srcImage, srcImage, cv::COLOR_BGR2RGB); Mat resizedImage; centercrop_and_resize(srcImage,resizedImage,224); // get properties of image int32_t channel = resizedImage.channels(); int32_t resizeHeight = resizedImage.rows; int32_t resizeWeight = resizedImage.cols; std::vector<cv::Mat> rgbChannels(3); cv::split(resizedImage, rgbChannels); for (size_t i = 0; i < rgbChannels.size(); i++) // resizedImage = resizedImage / 255.0; { rgbChannels[i].convertTo(rgbChannels[i], CV_32FC1, 1.0 / ( 255.0* IMAGENET_STD[i]), (0.0 - IMAGENET_MEAN[i]) / IMAGENET_STD[i]); } int len = channel * resizeHeight * resizeWeight * sizeof(float); imageBytes = (float *)malloc(len); memset(imageBytes, 0, len); int index = 0; for (int c = 0; c <3; c++) { // R,G,B for (int h = 0; h < modelHeight_; ++h) { for (int w = 0; w < modelWidth_; ++w) { imageBytes[index] = rgbChannels[c].at<float>(h, w); // R->G->B index++; } } } return SUCCESS; }
char* centercrop_and_resize(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg) { if (iImg.channels() == 3) { oImg = iImg.clone(); cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); } else { cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); } int h = iImg.rows; int w = iImg.cols; int m = min(h, w); int top = (h - m) / 2; int left = (w - m) / 2; cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1))); return RET_OK; }
cv::Mat img = cv::imread(img_path);
std::vector<int> imgSize = { 640, 640 };
cv::Mat processedImg;
centercrop_and_resize(iImg, imgSize, processedImg);
就是我们要得到的cv::Mat 。图像经过centercrop,最后大小是640, 640,通道顺序是RGB
# namespace { # const float min_chn_0 = 123.675; # const float min_chn_1 = 116.28; # const float min_chn_2 = 103.53; # const float var_reci_chn_0 = 0.0171247538316637; # const float var_reci_chn_1 = 0.0175070028011204; # const float var_reci_chn_2 = 0.0174291938997821; # } import numpy as np mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) print(mean * 255)# [123.675 116.28 103.53 ] print(1/(std*255))#[0.01712475 0.017507 0.01742919]
# 0.485 × 255 = 123.675
# 0.456 × 255 = 116.28
# 0.406 × 255 = 103.53
# 0.229 × 255 = 58.395
# 0.224 × 255 = 57.12
# 0.225 × 255 = 57.375
# 1 ÷ 58.395 = 0.017124754
# 1 ÷ 57.12 = 0.017507003
# 1 ÷ 57.375 = 0.017429194
加上了 CenterCrop
