因此,引入自动化技术来提高快递包裹分拣的效率和准确性成为了一个研究热点。目前,基于计算机视觉的物体检测和识别技术在快递包裹分拣中得到了广泛应用。其中,YOLO(You Only Look Once)是一种非常流行的物体检测算法,其具有实时性和准确性的优势,被广泛应用于快递包裹分拣领域。
(2)打开labelImg并选择“Open Dir”来选择你的图片目录。
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import xml.etree.ElementTree as ET import os classes = [] # 初始化为空列表 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) def convert(size, box): dw = 1. / size[0] dh = 1. / size[1] x = (box[0] + box[1]) / 2.0 y = (box[2] + box[3]) / 2.0 w = box[1] - box[0] h = box[3] - box[2] x = x * dw w = w * dw y = y * dh h = h * dh return (x, y, w, h) def convert_annotation(image_id): in_file = open('./label_xml\%s.xml' % (image_id), encoding='UTF-8') out_file = open('./label_txt\%s.txt' % (image_id), 'w') # 生成txt格式文件 tree = ET.parse(in_file) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) for obj in root.iter('object'): cls = obj.find('name').text if cls not in classes: classes.append(cls) # 如果类别不存在,添加到classes列表中 cls_id = classes.index(cls) xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert((w, h), b) out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') xml_path = os.path.join(CURRENT_DIR, './label_xml/') # xml list img_xmls = os.listdir(xml_path) for img_xml in img_xmls: label_name = img_xml.split('.')[0] print(label_name) convert_annotation(label_name) print("Classes:") # 打印最终的classes列表 print(classes) # 打印最终的classes列表
| |-----images
| |-----labels
| |-----images
| |-----labels
Epoch gpu_mem box obj cls labels img_size
1/200 20.8G 0.01576 0.01955 0.007536 22 1280: 100%|██████████| 849/849 [14:42<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:14<00:00, 2.87it/s]
all 3395 17314 0.994 0.957 0.0957 0.0843
Epoch gpu_mem box obj cls labels img_size
2/200 20.8G 0.01578 0.01923 0.007006 22 1280: 100%|██████████| 849/849 [14:44<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:12<00:00, 2.95it/s]
all 3395 17314 0.996 0.956 0.0957 0.0845
Epoch gpu_mem box obj cls labels img_size
3/200 20.8G 0.01561 0.0191 0.006895 27 1280: 100%|██████████| 849/849 [10:56<00:00, 1.29it/s]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|███████ | 187/213 [00:52<00:00, 4.04it/s]
all 3395 17314 0.996 0.957 0.0957 0.0845
class YOLOv5Exporter: def __init__(self, weights, include): self.weights = weights self.include = include def export(self): if 'torchscript' in self.include: self.export_torchscript() if 'onnx' in self.include: self.export_onnx() if 'openvino' in self.include: self.export_openvino() if 'engine' in self.include: self.export_tensorrt() if 'coreml' in self.include: self.export_coreml() if 'saved_model' in self.include: self.export_saved_model() if 'pb' in self.include: self.export_graphdef() if 'tflite' in self.include: self.export_tflite() if 'edgetpu' in self.include: self.export_edgetpu() if 'tfjs' in self.include: self.export_tfjs() if 'paddle' in self.include: self.export_paddle() def export_torchscript(self): # YOLOv5 TorchScript model export LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') f = file.with_suffix('.torchscript') ts = torch.jit.trace(model, im, strict=False) d = {'shape': im.shape, 'stride': int(max(model.stride)), 'names': model.names} extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) else: ts.save(str(f), _extra_files=extra_files) return f, None def export_onnx(self): # YOLOv5 ONNX export check_requirements('onnx>=1.12.0') import onnx LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = str(file.with_suffix('.onnx')) output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0'] if dynamic: dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640) if isinstance(model, SegmentationModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160) elif isinstance(model, DetectionModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) torch.onnx.export( model.cpu() if dynamic else model, # --dynamic only compatible with cpu im.cpu() if dynamic else im, f, verbose=False, opset_version=opset, do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False input_names=['images'], output_names=output_names, dynamic_axes=dynamic or None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # Metadata d = {'stride': int(max(model.stride)), 'names': model.names} for k, v in d.items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) onnx.save(model_onnx, f) # Simplify if simplify: try: cuda = torch.cuda.is_available() check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1')) import onnxsim LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') model_onnx, check = onnxsim.simplify(model_onnx) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: LOGGER.info(f'{prefix} simplifier failure: {e}') return f, model_onnx def export_openvino(self): # YOLOv5 OpenVINO export check_requirements('openvino-dev>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.runtime as ov # noqa from openvino.tools import mo # noqa LOGGER.info(f'\n{prefix} starting export with openvino {ov.__version__}...') f = str(file).replace(file.suffix, f'_openvino_model{os.sep}') f_onnx = file.with_suffix('.onnx') f_ov = str(Path(f) / file.with_suffix('.xml').name) if int8: check_requirements('nncf>=2.4.0') # requires at least version 2.4.0 to use the post-training quantization import nncf import numpy as np from openvino.runtime import Core from utils.dataloaders import create_dataloader core = Core() onnx_model = core.read_model(f_onnx) # export def prepare_input_tensor(image: np.ndarray): input_tensor = image.astype(np.float32) # uint8 to fp16/32 input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 if input_tensor.ndim == 3: input_tensor = np.expand_dims(input_tensor, 0) return input_tensor def gen_dataloader(yaml_path, task='train', imgsz=640, w
export.py是一个用于将YOLOv5 PyTorch模型导出为其他格式的程序文件。它支持导出的格式包括PyTorch、TorchScript、ONNX、OpenVINO、TensorRT、CoreML、TensorFlow SavedModel、TensorFlow GraphDef、TensorFlow Lite、TensorFlow Edge TPU、TensorFlow.js和PaddlePaddle。通过运行export.py文件,可以将YOLOv5模型导出为所需的格式。
class Attention(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, groups=1, reduction=0.0625, kernel_num=4, min_channel=16): super(Attention, self).__init__() attention_channel = max(int(in_planes * reduction), min_channel) self.kernel_size = kernel_size self.kernel_num = kernel_num self.temperature = 1.0 self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Conv2d(in_planes, attention_channel, 1, bias=False) self.bn = nn.BatchNorm2d(attention_channel) self.relu = nn.ReLU(inplace=True) self.channel_fc = nn.Conv2d(attention_channel, in_planes, 1, bias=True) self.func_channel = self.get_channel_attention if in_planes == groups and in_planes == out_planes: # depth-wise convolution self.func_filter = self.skip else: self.filter_fc = nn.Conv2d(attention_channel, out_planes, 1, bias=True) self.func_filter = self.get_filter_attention if kernel_size == 1: # point-wise convolution self.func_spatial = self.skip else: self.spatial_fc = nn.Conv2d(attention_channel, kernel_size * kernel_size, 1, bias=True) self.func_spatial = self.get_spatial_attention if kernel_num == 1: self.func_kernel = self.skip else: self.kernel_fc = nn.Conv2d(attention_channel, kernel_num, 1, bias=True) self.func_kernel = self.get_kernel_attention self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def update_temperature(self, temperature): self.temperature = temperature @staticmethod def skip(_): return 1.0 def get_channel_attention(self, x): channel_attention = torch.sigmoid(self.channel_fc(x).view(x.size(0), -1, 1, 1) / self.temperature) return channel_attention def get_filter_attention(self, x): filter_attention = torch.sigmoid(self.filter_fc(x).view(x.size(0), -1, 1, 1) / self.temperature) return filter_attention def get_spatial_attention(self, x): spatial_attention = self.spatial_fc(x).view(x.size(0), 1, 1, 1, self.kernel_size, self.kernel_size) spatial_attention = torch.sigmoid(spatial_attention / self.temperature) return spatial_attention def get_kernel_attention(self, x): kernel_attention = self.kernel_fc(x).view(x.size(0), -1, 1, 1, 1, 1) kernel_attention = F.softmax(kernel_attention / self.temperature, dim=1) return kernel_attention def forward(self, x): x = self.avgpool(x) x = self.fc(x) x = self.bn(x) x = self.relu(x) return self.func_channel(x), self.func_filter(x), self.func_spatial(x), self.func_kernel(x) class ODConv2d(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, reduction=0.0625, kernel_num=4): super(ODConv2d, self).__init__() self.in_planes = in_planes self.out_planes = out_planes self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.kernel_num = kernel_num self.attention = Attention(in_planes, out_planes, kernel_size, groups=groups, reduction=reduction, kernel_num=kernel_num) self.weight = nn.Parameter(torch.randn(kernel_num, out_planes, in_planes//groups, kernel_size, kernel_size), requires_grad=True) self._initialize_weights() if self.kernel_size == 1 and self.kernel_num == 1: self._forward_impl = self._forward_impl_pw1x else: self._forward_impl = self._forward_impl_common def _initialize_weights(self): for i in range(self.kernel_num): nn.init.kaiming_normal_(self.weight[i], mode='fan_out', nonlinearity='relu') def update_temperature(self, temperature): self.attention.update_temperature(temperature) def _forward_impl_common(self, x): channel_attention, filter_attention, spatial_attention, kernel_attention = self.attention(x) batch_size, in_planes, height, width = x.size() x = x * channel_attention x = x.reshape(1, -1, height, width) aggregate_weight = spatial_attention * kernel_attention * self.weight.unsqueeze(dim=0) aggregate_weight = torch.sum(aggregate_weight, dim=1).view( [-1, self.in_planes // self.groups, self.kernel_size, self.kernel_size]) output = F.conv2d(x, weight=aggregate_weight, bias=None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups * batch_size) output = output.view(batch_size, self.out_planes, output.size(-2), output.size(-1)) output = output * filter_attention return output def _forward_impl_pw1x(self, x): channel_attention, filter_attention, spatial_attention, kernel_attention = self.attention(x) x = x * channel_attention output = F.conv2d(x, weight=self.weight.squeeze(dim=0), bias=None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) output = output * filter_attention return output def forward(self, x): return self._forward_impl(x)
class OD_MobileNetV2(nn.Module): def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8, block=InvertedResidual, norm_layer=nn.BatchNorm2d, dropout=0.2, reduction=0.0625, kernel_num=1, **kwargs): super(OD_MobileNetV2, self).__init__() input_channel = 32 last_channel = 1280 if inverted_residual_setting is None: inverted_residual_setting = [ [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format(inverted_residual_setting)) input_channel = _make_divisible(input_channel * width_mult, round_nearest) self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] for t, c, n, s in inverted_residual_setting: output_channel = _make_divisible(c * width_mult, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer, reduction=reduction, kernel_num=kernel_num)) input_channel = output_channel features.append(ODConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, reduction=reduction, kernel_num=kernel_num)) self.features = nn.Sequential(*features) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) self.channel = [i.size(1) for i in self.forward(torch.randn(2, 3, 640, 640))] def net_update_temperature(self, temperature): for m in self.modules(): if hasattr(m, "update_temperature"): m.update_temperature(temperature) def forward(self, x): input_size = x.size(2) scale = [4, 8, 16, 32] features = [None, None, None, None] for idx, layer in enumerate(self.features): x = layer(x) if input_size // x.size(2) in scale: features[scale.index(input_size // x.size(2))] = x return features
class OD_ResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduction=0.0625, kernel_num=1): super(OD_ResNet, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], reduction=reduction, kernel_num=kernel_num) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, reduction=reduction, kernel_num=kernel_num) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, reduction=reduction, kernel_num=kernel_num) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, reduction=reduction, kernel_num=kernel_num) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.zeros_(m.bias) self.channel = [i.size(1) for i in self.forward(torch.randn(2, 3, 640, 640))] def net_update_temperature(self, temperature): for m in self.modules(): if hasattr(m, "update_temperature"): m.update_temperature(temperature) def _make_layer(self, block, planes, blocks, stride=1, reduction=0.625, kernel_num=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, reduction=reduction, kernel_num=kernel_num)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, reduction=reduction, kernel_num=kernel_num)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x1 = self.relu(x) x = self.maxpool(x1) x2 = self.layer1(x) x3 = self.layer2(x2) x4 = self.layer3(x3) x5 = self.layer4(x4) return [x1, x2, x3, x4, x5]
class YOLOv5Trainer:
def __init__(self, hyp, opt, device, callbacks):
self.hyp = hyp
self.opt = opt
self.device = device
self.callbacks = callbacks
def train(self):
# Code for training the YOLOv5 model
class YOLOv5Model:
def __init__(self, weights='./best.pt', data=ROOT / 'data/coco128.yaml', device='', half=False, dnn=False):
self.weights = weights
self.data = data
self.device = device
self.half = half
self.dnn = dnn
self.model, self.stride, self.names, self.pt = self.load_model()
def load_model(self):
device = select_device
文件路径 | 功能 |
export.py | 将YOLOv5模型导出为其他格式的工具文件 |
odconv.py | 实现了Attention和ODConv2d类,用于增强卷积神经网络的表示能力 |
od_mobilenetv2.py | 实现了基于MobileNetV2的目标检测模型 |
od_resnet.py | 实现了基于ResNet的目标检测模型 |
train.py | YOLOv5模型的训练脚本 |
ui.py | 基于PyQt5的图形用户界面程序,用于快递包裹分拣分割信息提取系统 |
val.py | YOLOv5模型的验证脚本 |
yolo.py | YOLOv5模型的核心实现 |
classify/predict.py | 分类任务的预测脚本 |
classify/train.py | 分类任务的训练脚本 |
classify/val.py | 分类任务的验证脚本 |
models/common.py | 包含一些通用的模型组件和函数 |
models/experimental.py | 包含一些实验性的模型组件和函数 |
models/odconv.py | 包含ODConv2d类的实现 |
models/od_mobilenetv2.py | 基于ODConv2d的MobileNetV2模型实现 |
models/od_resnet.py | 基于ODConv2d的ResNet模型实现 |
models/tf.py | TensorFlow模型的相关函数和类 |
models/yolo.py | YOLOv5模型的实现 |
models/init.py | 模型模块的初始化文件 |
models/ODConv/odconv.py | ODConv2d类的实现 |
models/ODConv/od_mobilenetv2.py | 基于ODConv2d的MobileNetV2模型实现 |
models/ODConv/od_resnet.py | 基于ODConv2d的ResNet模型实现 |
ODConv/odconv.py | ODConv2d类的实现 |
ODConv/od_mobilenetv2.py | 基于ODConv2d的MobileNetV2模型实现 |
ODConv/od_resnet.py | 基于ODConv2d的ResNet模型实现 |
segment/predict.py | 分割任务的预测脚本 |
segment/train.py | 分割任务的训练脚本 |
segment/val.py | 分割任务的验证脚本 |
utils/activations.py | 包含一些激活函数的实现 |
utils/augmentations.py | 包含一些数据增强的函数和类 |
utils/autoanchor.py | 包含自动锚框生成的函数和类 |
utils/autobatch.py | 包含自动批处理大小调整的函数和类 |
utils/callbacks.py | 包含一些回调函数的实现 |
utils/dataloaders.py | 包含数据加载器的实现 |
utils/downloads.py | 包含下载数据集和模型的函数 |
utils/general.py | 包含一些通用的
,使得卷积运算不同w.r.t.所有空间位置、所有输入信道、所有滤波器和输入x的所有核,提供捕获丰富上下文线索的性能保证。因此,ODConv可以显著增强CNN基本卷积运算的特征提取能力。此外,具有单个卷积核的ODConv可以与标准CondConv和DyConv竞争或优于它们,为最终模型引入的额外参数大大减少。提供了大量实验来验证这些优点。通过比较等式1和等式2,我们可以清楚地看到,ODConv是一种更广义的动态卷积。此外,当设置n=1且 所有分量均为1时,只关注滤波器方向 的ODConv将减少为:将基于输入特征的SE变量应用于卷积滤波器,然后进行卷积运算(注意原始SE(Hu等人,2018b)基于输出特征,并且用于重新校准输出特征本身)。这种SE变体是ODConv的特例。
图:将ODConv中的四种注意类型逐步乘以卷积核的示例。(a) 沿空间维度的逐位置乘法运算,(b)沿输入信道维度的逐信道乘法运算、(c)沿输出信道维度的按滤波器乘法运算,以及(d)沿卷积核空间的核维度的按核乘法运算。方法部分对符号进行了说明
实现:对于ODConv,一个关键问题是如何计算卷积核的四种关注度 。继CondConv和DyConv之后,我们还使用SE型注意力模块(Hu等人,2018b),但将多个头部作为来计算它们,其结构如图所示。具体而言,首先通过逐通道全局平均池(GAP)运算将输入压缩到具有长度的特征向量中。随后,存在完全连接(FC)层和四个头部分支。ReLU(Krizhevsky等人,2012)位于FC层之后。FC层将压缩特征向量映射到具有缩减比的低维空间(根据消融实验,我们在所有主要实验中设置 ,避免了高模型复杂度)。对于四个头部分支,每个分支都有一个输出大小如图。
