赞
踩
**
**
https://github.com/fundamentalvision/deformable-detr
https://github.com/robinnarsinghranabhat/Deformable-DETR
这里我们要用的是大佬修改的代码
1、创建虚拟环境
conda create -n Deformable-detr python=3.8
2、安装pytorch以及项目所需要的库
注意:pytorch版本需要与电脑或服务器上cuda版本对应(以cuda11.8版本为例)
pip install torch2.0.1+cu118 torchvision0.15.2+cu118 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
移动到代码目录下,安装库:
pip install -r requirements.txt
生成MultiScaleDeformableAttention
cd modules/ops
sh ./make.sh
pip list
python test.py
通过pip list可看到MultiScaleDeformableAttention已生成
运行test.py后输出的结果均为TRUE即可
注意:出现文件权限报错的,修改make.sh文件的权限即可
我这里用的是VOC2007的数据集 voc2007和2012数据集需要可自取
Deformable-Detr需要COCO数据集形式,所以需要将VOC数据集转换
coco数据集:
voc数据集:
目标检测只需要VOC这几个文件
1、将所有图片分为训练集和验证集
import os, random, shutil from shutil import copy2 if __name__ == '__main__': fileDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/" # 源图片文件夹路径 trainDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/train2017/" # 移动到新的文件夹路径 valDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/val2017/" # testDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/test2017/' train = [] with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'r') as f: for line in f: train.append(line.strip('\n')) # print(train) for name in train: shutil.copy2(fileDir + name + '.jpg', trainDir + name + '.jpg') # shutil.copy2(fileDir + name + '.jpg', trainDir) val = [] with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt', 'r') as f: for line in f: val.append(line.strip('\n')) # print(train) for name in val: shutil.copy2(fileDir + name + '.jpg', valDir + name + '.jpg') # test = [] # with open('E:/yolo3/VOCdevkit/VOC2012/ImageSets/Main/test1.txt', 'r') as f: # for line in f: # test.append(line.strip('\n')) # # print(train) # for name in test: # shutil.copy2(fileDir + name + '.jpg', testDir + name + '.jpg')
我这里是按VOC的设定来划分的训练集和验证集
2、将xml文件按训练集和验证集分好
import os, random, shutil if __name__ == '__main__': fileDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations/" # 源图片文件夹路径 trainDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/xml/xml_train/' valDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/xml/xml_val/' #testDir = 'E:/yolo3/VOCdevkit/VOC2012/xml/xml_test/' train = [] with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'r') as f: for line in f: train.append(line.strip('\n')) # print(train) for name in train: shutil.copy2(fileDir + name + '.xml', trainDir + name + '.xml') val = [] with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt', 'r') as f: for line in f: val.append(line.strip('\n')) # print(train) for name in val: shutil.copy2(fileDir + name + '.xml', valDir + name + '.xml') # test = [] # with open('E:/yolo3/VOCdevkit/VOC2012/ImageSets/Main/test1.txt', 'r') as f: # for line in f: # test.append(line.strip('\n')) # print(train) # for name in test: # shutil.copy2(fileDir + name + '.xml', testDir + name + '.xml')
3、xml文件转换为json文件
#!/usr/bin/python # pip install lxml import sys import os import json import xml.etree.ElementTree as ET import glob START_BOUNDING_BOX_ID = 1 PRE_DEFINE_CATEGORIES = None # If necessary, pre-define category and its id # PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, # "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9, # "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, # "motorbike": 14, "person": 15, "pottedplant": 16, # "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20} def get(root, name): vars = root.findall(name) return vars def get_and_check(root, name, length): vars = root.findall(name) if len(vars) == 0: raise ValueError("Can not find %s in %s." % (name, root.tag)) if length > 0 and len(vars) != length: raise ValueError( "The size of %s is supposed to be %d, but is %d." % (name, length, len(vars)) ) if length == 1: vars = vars[0] return vars # # def get_filename_as_int(filename): # try: # filename = filename.replace("\\", "/") # filename = os.path.splitext(os.path.basename(filename))[0] # return int(filename) # except: # raise ValueError("Filename %s is supposed to be an integer." % (filename)) def get_filename_as_integer(filename): filename = filename.replace("\\", "/") filename = os.path.splitext(os.path.basename(filename))[0] filename1 = filename.split('_') filename2 = '' for i in range(len(filename1)): filename2 += filename1[i] return int(filename2) def get_categories(xml_files): """Generate category name to id mapping from a list of xml files. Arguments: xml_files {list} -- A list of xml file paths. Returns: dict -- category name to id mapping. """ classes_names = [] for xml_file in xml_files: tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall("object"): classes_names.append(member[0].text) classes_names = list(set(classes_names)) classes_names.sort() return {name: i for i, name in enumerate(classes_names)} def convert(xml_files, json_file): json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []} if PRE_DEFINE_CATEGORIES is not None: categories = PRE_DEFINE_CATEGORIES else: categories = get_categories(xml_files) bnd_id = START_BOUNDING_BOX_ID for xml_file in xml_files: tree = ET.parse(xml_file) root = tree.getroot() path = get(root, "path") if len(path) == 1: filename = os.path.basename(path[0].text) elif len(path) == 0: filename = get_and_check(root, "filename", 1).text else: raise ValueError("%d paths found in %s" % (len(path), xml_file)) ## The filename must be a number # image_id = get_filename_as_int(filename) image_id = get_filename_as_integer(filename) size = get_and_check(root, "size", 1) width = int(get_and_check(size, "width", 1).text) height = int(get_and_check(size, "height", 1).text) image = { "file_name": filename, "height": height, "width": width, "id": image_id, } json_dict["images"].append(image) ## Currently we do not support segmentation. # segmented = get_and_check(root, 'segmented', 1).text # assert segmented == '0' for obj in get(root, "object"): category = get_and_check(obj, "name", 1).text if category not in categories: new_id = len(categories) categories[category] = new_id category_id = categories[category] bndbox = get_and_check(obj, "bndbox", 1) xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1 ymin = int(float((get_and_check(bndbox, "ymin", 1).text))) - 1 xmax = int(get_and_check(bndbox, "xmax", 1).text) ymax = int(get_and_check(bndbox, "ymax", 1).text) assert xmax > xmin assert ymax > ymin o_width = abs(xmax - xmin) o_height = abs(ymax - ymin) ann = { "area": o_width * o_height, "iscrowd": 0, "image_id": image_id, "bbox": [xmin, ymin, o_width, o_height], "category_id": category_id, "id": bnd_id, "ignore": 0, "segmentation": [], } json_dict["annotations"].append(ann) bnd_id = bnd_id + 1 for cate, cid in categories.items(): cat = {"supercategory": "none", "id": cid, "name": cate} json_dict["categories"].append(cat) os.makedirs(os.path.dirname(json_file), exist_ok=True) json_fp = open(json_file, "w") json_str = json.dumps(json_dict) json_fp.write(json_str) json_fp.close() if __name__ == "__main__": import argparse root_path="D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/" parser = argparse.ArgumentParser( description="Convert Pascal VOC annotation to COCO format." ) parser.add_argument("xml_dir", default=root_path + "xml/xml_train", help="Directory path to xml files.", type=str) parser.add_argument("json_file", default=root_path + "coco/annotations/instances_train2017.json", help="Output COCO format json file.", type=str) args = parser.parse_args() xml_files = glob.glob(os.path.join(args.xml_dir, "*.xml")) # If you want to do train/test split, you can pass a subset of xml files to convert function. print("Number of xml files: {}".format(len(xml_files))) convert(xml_files, args.json_file) print("Success: {}".format(args.json_file))
至此,数据集转化已完成,将转换好的数据集按上面的coco数据集格式放好
预训练模型下载
#对经过训练的模型进行微调 :
python -u main.py --output_dir exps/iter_refine/ --with_box_refine --two_stage --resume ./saved_models/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage-checkpoint.pth --coco_path ./custom_files --num_classes=3`
重要参数 :
coco_path
: 数据集路径`
output_dir
: 模型输出路径.
resume
: 从提供的模型进行微调
num_classes
:
Deformable DETR 最初在91个类别上训练,假设,要对两个类进行微调,即yes-checkbox和no-checkbox。
将 num_classes
设置为 3 (数据集标签总数 + 1). 加1是为了说明无对象类.
不用命令行运行也可在main.py上修改这几个参数,运行main.py进行训练。
运行时会出现的警告
UserWarning: The parameter ‘pretrained’ is deprecated since 0.13 and may be removed in the future, please use ‘weights’ instead.
UserWarning: Arguments other than a weight enum orNone
for ‘weights’ are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passingweights=ResNet101_Weights.IMAGENET1K_V1
. You can also useweights=ResNet101_Weights.DEFAULT
to get the most up-to-date weights.
pretrain参数在torchvision0.13版本后弃用了,修改方法:
在models文件夹下的backbone.py
将这句改为提示的 weights=ResNet101_Weights.DEFAULT
解决警告 UserWarning: torch.meshgrid: in an upcoming release, it will be required to
将虚拟环境下的functional.py修改
将return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
修改为return _VF.meshgrid(tensors, **kwargs, indexing = ‘ij’) # type: ignore[attr-defined]
import cv2 from PIL import Image import numpy as np import os import time import torch from torch import nn # from torchvision.models import resnet50 import torchvision.transforms as T from main import get_args_parser as get_main_args_parser from models import build_model torch.set_grad_enabled(False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("[INFO] 当前使用{}做推断".format(device)) # 图像数据处理 transform = T.Compose([ T.Resize(800), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # 将xywh转xyxy def box_cxcywh_to_xyxy(x): x_c, y_c, w, h = x.unbind(1) b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)] return torch.stack(b, dim=1) # 将0-1映射到图像 def rescale_bboxes(out_bbox, size): img_w, img_h = size b = box_cxcywh_to_xyxy(out_bbox) b = b.cpu().numpy() b = b * np.array([img_w, img_h, img_w, img_h], dtype=np.float32) return b # plot box by opencv def plot_result(pil_img, prob, boxes, save_name=None, imshow=False, imwrite=False): LABEL = ['all','hat', 'person', 'groundrod', 'vest', 'workclothes_clothes', 'workclothes_trousers', 'winter_clothes', 'winter_trousers', 'noworkclothes_clothes', 'noworkclothes_trousers', 'height', 'safteybelt', 'smoking', 'noheight', 'fire', 'extinguisher', 'roll_workclothes', 'roll_noworkclothes', 'insulating_gloves', 'car', 'fence', 'bottle', 'shorts', 'holes', 'single_ladder', 'down', 'double_ladder', 'oxygen_horizontally', 'oxygen_vertically', 'acetylene_vertically', 'acetylene_horizontally'] len(prob) opencvImage = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) if len(prob) == 0: print("[INFO] NO box detect !!! ") if imwrite: if not os.path.exists("./result/pred_no"): os.makedirs("./result/pred_no") cv2.imwrite(os.path.join("./result/pred_no", save_name), opencvImage) return for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes): cl = p.argmax() label_text = '{}: {}%'.format(LABEL[cl], round(p[cl] * 100, 2)) cv2.rectangle(opencvImage, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 255, 0), 2) cv2.putText(opencvImage, label_text, (int(xmin) + 10, int(ymin) + 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2) if imshow: cv2.imshow('detect', opencvImage) cv2.waitKey(0) if imwrite: if not os.path.exists("./result/pred"): os.makedirs('./result/pred') cv2.imwrite('./result/pred/{}'.format(save_name), opencvImage) def load_model(model_path , args): model, _, _ = build_model(args) model.cuda() model.eval() state_dict = torch.load(model_path) # <-----------修改加载模型的路径 model.load_state_dict(state_dict["model"]) model.to(device) print("load model sucess") return model # 单张图像的推断 def detect(im, model, transform, prob_threshold=0.7): # mean-std normalize the input image (batch-size: 1) img = transform(im).unsqueeze(0) # demo model only support by default images with aspect ratio between 0.5 and 2 # if you want to use images with an aspect ratio outside this range # rescale your image so that the maximum size is at most 1333 for best results #assert img.shape[-2] <= 1600 and img.shape[ # -1] <= 1600, 'demo model only supports images up to 1600 pixels on each side' # propagate through the model img = img.to(device) start = time.time() outputs = model(img) #end = time.time() # keep only predictions with 0.7+ confidence # print(outputs['pred_logits'].softmax(-1)[0, :, :-1]) probas = outputs['pred_logits'].softmax(-1)[0, :, :-1] keep = probas.max(-1).values > prob_threshold #end = time.time() probas = probas.cpu().detach().numpy() keep = keep.cpu().detach().numpy() # convert boxes from [0; 1] to image scales bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size) end = time.time() return probas[keep], bboxes_scaled, end - start if __name__ == "__main__": main_args = get_main_args_parser().parse_args() #加载模型 dfdetr = load_model('exps/r50_deformable_detr/checkpoint0049.pth',main_args) files = os.listdir("coco/testdata/test2017") cn = 0 waste=0 for file in files: img_path = os.path.join("coco/testdata/test2017", file) im = Image.open(img_path) scores, boxes, waste_time = detect(im, dfdetr, transform) plot_result(im, scores, boxes, save_name=file, imshow=False, imwrite=True) print("{} [INFO] {} time: {} done!!!".format(cn,file, waste_time)) cn+=1 waste+=waste_time waste_avg = waste/cn print(waste_avg)
修改模型路径、测试集路径即可
标签需要与你的train2017.json文件的标签顺序一致(在json文件中查找categories可看到所有标签)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。