项目参考AAAI Association for the Advancement of Artificial Intelligence
(2)打开labelImg并选择“Open Dir”来选择你的图片目录。
import contextlib import json import cv2 import pandas as pd from PIL import Image from collections import defaultdict from utils import * # Convert INFOLKS JSON file into YOLO-format labels ---------------------------- def convert_infolks_json(name, files, img_path): # Create folders path = make_dirs() # Import json data = [] for file in glob.glob(files): with open(file) as f: jdata = json.load(f) jdata['json_file'] = file data.append(jdata) # Write images and shapes name = path + os.sep + name file_id, file_name, wh, cat = [], [], [], [] for x in tqdm(data, desc='Files and Shapes'): f = glob.glob(img_path + Path(x['json_file']).stem + '.*')[0] file_name.append(f) wh.append(exif_size(Image.open(f))) # (width, height) cat.extend(a['classTitle'].lower() for a in x['output']['objects']) # categories # filename with open(name + '.txt', 'a') as file: file.write('%s\n' % f) # Write *.names file names = sorted(np.unique(cat)) # names.pop(names.index('Missing product')) # remove with open(name + '.names', 'a') as file: [file.write('%s\n' % a) for a in names] # Write labels file for i, x in enumerate(tqdm(data, desc='Annotations')): label_name = Path(file_name[i]).stem + '.txt' with open(path + '/labels/' + label_name, 'a') as file: for a in x['output']['objects']: # if a['classTitle'] == 'Missing product': # continue # skip category_id = names.index(a['classTitle'].lower()) # The INFOLKS bounding box format is [x-min, y-min, x-max, y-max] box = np.array(a['points']['exterior'], dtype=np.float32).ravel() box[[0, 2]] /= wh[i][0] # normalize x by width box[[1, 3]] /= wh[i][1] # normalize y by height box = [box[[0, 2]].mean(), box[[1, 3]].mean(), box[2] - box[0], box[3] - box[1]] # xywh if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0 file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box)) # Split data into train, test, and validate files split_files(name, file_name) write_data_data(name + '.data', nc=len(names)) print(f'Done. Output saved to {os.getcwd() + os.sep + path}') # Convert vott JSON file into YOLO-format labels ------------------------------- def convert_vott_json(name, files, img_path): # Create folders path = make_dirs() name = path + os.sep + name # Import json data = [] for file in glob.glob(files): with open(file) as f: jdata = json.load(f) jdata['json_file'] = file data.append(jdata) # Get all categories file_name, wh, cat = [], [], [] for i, x in enumerate(tqdm(data, desc='Files and Shapes')): with contextlib.suppress(Exception): cat.extend(a['tags'][0] for a in x['regions']) # categories # Write *.names file names = sorted(pd.unique(cat)) with open(name + '.names', 'a') as file: [file.write('%s\n' % a) for a in names] # Write labels file n1, n2 = 0, 0 missing_images = [] for i, x in enumerate(tqdm(data, desc='Annotations')): f = glob.glob(img_path + x['asset']['name'] + '.jpg') if len(f): f = f[0] file_name.append(f) wh = exif_size(Image.open(f)) # (width, height) n1 += 1 if (len(f) > 0) and (wh[0] > 0) and (wh[1] > 0): n2 += 1 # append filename to list with open(name + '.txt', 'a') as file: file.write('%s\n' % f) # write labelsfile label_name = Path(f).stem + '.txt' with open(path + '/labels/' + label_name, 'a') as file: for a in x['regions']: category_id = names.index(a['tags'][0]) # The INFOLKS bounding box format is [x-min, y-min, x-max, y-max] box = a['boundingBox'] box = np.array([box['left'], box['top'], box['width'], box['height']]).ravel() box[[0, 2]] /= wh[0] # normalize x by width box[[1, 3]] /= wh[1] # normalize y by height box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3]] # xywh if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0 file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box)) else: missing_images.append(x['asset']['name']) print('Attempted %g json imports, found %g images, imported %g annotations successfully' % (i, n1, n2)) if len(missing_images): print('WARNING, missing images:', missing_images) # Split data into train, test, and validate files split_files(name, file_name) print(f'Done. Output saved to {os.getcwd() + os.sep + path}') # Convert ath JSON file into YOLO-format labels -------------------------------- def convert_ath_json(json_dir): # dir contains json annotations and images # Create folders dir = make_dirs() # output directory jsons = [] for dirpath, dirnames, filenames in os.walk(json_dir): jsons.extend( os.path.join(dirpath, filename) for filename in [ f for f in filenames if f.lower().endswith('.json') ] ) # Import json n1, n2, n3 = 0, 0, 0 missing_images, file_name = [], [] for json_file in sorted(jsons): with open(json_file) as f: data = json.load(f) # # Get classes # try: # classes = list(data['_via_attributes']['region']['class']['options'].values()) # classes # except: # classes = list(data['_via_attributes']['region']['Class']['options'].values()) # classes # # Write *.names file # names = pd.unique(classes) # preserves sort order # with open(dir + 'data.names', 'w') as f: # [f.write('%s\n' % a) for a in names] # Write labels file for x in tqdm(data['_via_img_metadata'].values(), desc=f'Processing {json_file}'): image_file = str(Path(json_file).parent / x['filename']) f = glob.glob(image_file) # image file if len(f): f = f[0] file_name.append(f) wh = exif_size(Image.open(f)) # (width, height) n1 += 1 # all images if len(f) > 0 and wh[0] > 0 and wh[1] > 0: label_file = dir + 'labels/' + Path(f).stem + '.txt' nlabels = 0 try: with open(label_file, 'a') as file: # write labelsfile # try: # category_id = int(a['region_attributes']['class']) # except: # category_id = int(a['region_attributes']['Class']) category_id = 0 # single-class for a in x['regions']: # bounding box format is [x-min, y-min, x-max, y-max] box = a['shape_attributes'] box = np.array([box['x'], box['y'], box['width'], box['height']], dtype=np.float32).ravel() box[[0, 2]] /= wh[0] # normalize x by width box[[1, 3]] /= wh[1] # normalize y by height box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3]] # xywh (left-top to center x-y) if box[2] > 0. and box[3] > 0.: # if w > 0 and h > 0 file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box)) n3 += 1 nlabels += 1 if nlabels == 0: # remove non-labelled images from dataset os.system(f'rm {label_file}') # print('no labels for %s' % f) continue # next file # write image img_size = 4096 # resize to maximum img = cv2.imread(f) # BGR assert img is not None, 'Image Not Found ' + f r = img_size / max(img.shape) # size ratio if r < 1: # downsize if necessary h, w, _ = img.shape img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) ifile = dir + 'images/' + Path(f).name if cv2.imwrite(ifile, img): # if success append image to list with open(dir + 'data.txt', 'a') as file: file.write('%s\n' % ifile) n2 += 1 # correct images except Exception: os.system(f'rm {label_file}') print(f'problem with {f}') else: missing_images.append(image_file) nm = len(missing_images) # number missing print('\nFound %g JSONs with %g labels over %g images. Found %g images, labelled %g images successfully' % (len(jsons), n3, n1, n1 - nm, n2)) if len(missing_images): print('WARNING, missing images:', missing_images) # Write *.names file names = ['knife'] # preserves sort order with open(dir + 'data.names', 'w') as f: [f.write('%s\n' % a) for a in names] # Split data into train, test, and validate files split_rows_simple(dir + 'data.txt') write_data_data(dir + 'data.data', nc=1) print(f'Done. Output saved to {Path(dir).absolute()}') def convert_coco_json(json_dir='../coco/annotations/', use_segments=False, cls91to80=False): save_dir = make_dirs() # output directory coco80 = coco91_to_coco80_class() # Import json for json_file in sorted(Path(json_dir).resolve().glob('*.json')): fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name fn.mkdir() with open(json_file) as f: data = json.load(f) # Create image dict images = {'%g' % x['id']: x for x in data['images']} # Create image-annotations dict imgToAnns = defaultdict(list) for ann in data['annotations']: imgToAnns[ann['image_id']].append(ann) # Write labels file for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'): img = images['%g' % img_id] h, w, f = img['height'], img['width'], img['file_name'] bboxes = [] segments = [] for ann in anns: if ann['iscrowd']: continue # The COCO box format is [top left x, top left y, width, height] box = np.array(ann['bbox'], dtype=np.float64) box[:2] += box[2:] / 2 # xy top-left corner to center box[[0, 2]] /= w # normalize x box[[1, 3]] /= h # normalize y if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0 continue cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1 # class box = [cls] + box.tolist() if box not in bboxes: bboxes.append(box) # Segments if use_segments: if len(ann['segmentation']) > 1: s = merge_multi_segment(ann['segmentation']) s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist() else: s = [j for i in ann['segmentation'] for j in i] # all segments concatenated s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist() s = [cls] + s if s not in segments: segments.append(s) # Write with open((fn / f).with_suffix('.txt'), 'a') as file: for i in range(len(bboxes)): line = *(segments[i] if use_segments else bboxes[i]), # cls, box or segments file.write(('%g ' * len(line)).rstrip() % line + '\n') def min_index(arr1, arr2): """Find a pair of indexes with the shortest distance. Args: arr1: (N, 2). arr2: (M, 2). Return: a pair of indexes(tuple). """ dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1) return np.unravel_index(np.argmin(dis, axis=None), dis.shape) def merge_multi_segment(segments): """Merge multi segments to one list. Find the coordinates with min distance between each segment, then connect these coordinates with one thin line to merge all segments into one. Args: segments(List(List)): original segmentations in coco's json file. like [segmentation1, segmentation2,...], each segmentation is a list of coordinates. """ s = [] segments = [np.array(i).reshape(-1, 2) for i in segments] idx_list = [[] for _ in range(len(segments))] # record the indexes with min distance between each segment for i in range(1, len(segments)): idx1, idx2 = min_index(segments[i - 1], segments[i]) idx_list[i - 1].append(idx1) idx_list[i].append(idx2) # use two round to connect all the segments for k in range(2): # forward connection if k == 0: for i, idx in enumerate(idx_list): # middle segments have two indexes # reverse the index of middle segments if len(idx) == 2 and idx[0] > idx[1]: idx = idx[::-1] segments[i] = segments[i][::-1, :] segments[i] = np.roll(segments[i], -idx[0], axis=0) segments[i] = np.concatenate([segments[i], segments[i][:1]]) # deal with the first segment and the last one if i in [0, len(idx_list) - 1]: s.append(segments[i]) else: idx = [0, idx[1] - idx[0]] s.append(segments[i][idx[0]:idx[1] + 1]) else: for i in range(len(idx_list) - 1, -1, -1): if i not in [0, len(idx_list) - 1]: idx = idx_list[i] nidx = abs(idx[1] - idx[0]) s.append(segments[i][nidx:]) return s def delete_dsstore(path='../datasets'): # Delete apple .DS_store files from pathlib import Path files = list(Path(path).rglob('.DS_store')) print(files) for f in files: f.unlink() if __name__ == '__main__': source = 'COCO' if source == 'COCO': convert_coco_json('./annotations', # directory with *.json use_segments=True, cls91to80=True) elif source == 'infolks': # Infolks https://infolks.info/ convert_infolks_json(name='out', files='../data/sm4/json/*.json', img_path='../data/sm4/images/') elif source == 'vott': # VoTT https://github.com/microsoft/VoTT convert_vott_json(name='data', files='../../Downloads/athena_day/20190715/*.json', img_path='../../Downloads/athena_day/20190715/') # images folder elif source == 'ath': # ath format convert_ath_json(json_dir='../../Downloads/athena/') # images folder # zip results # os.system('zip -r ../coco.zip ../coco')
| |-----images
| |-----labels
| |-----images
| |-----labels
Epoch gpu_mem box obj cls labels img_size
1/200 20.8G 0.01576 0.01955 0.007536 22 1280: 100%|██████████| 849/849 [14:42<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:14<00:00, 2.87it/s]
all 3395 17314 0.994 0.957 0.0957 0.0843
Epoch gpu_mem box obj cls labels img_size
2/200 20.8G 0.01578 0.01923 0.007006 22 1280: 100%|██████████| 849/849 [14:44<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:12<00:00, 2.95it/s]
all 3395 17314 0.996 0.956 0.0957 0.0845
Epoch gpu_mem box obj cls labels img_size
3/200 20.8G 0.01561 0.0191 0.006895 27 1280: 100%|██████████| 849/849 [10:56<00:00, 1.29it/s]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|███████ | 187/213 [00:52<00:00, 4.04it/s]
all 3395 17314 0.996 0.957 0.0957 0.0845
class TridentBlock(nn.Module): def __init__(self, c1, c2, stride=1, c=False, e=0.5, padding=[1, 2, 3], dilate=[1, 2, 3], bias=False): super(TridentBlock, self).__init__() self.stride = stride self.c = c c_ = int(c2 * e) self.padding = padding self.dilate = dilate self.share_weightconv1 = nn.Parameter(torch.Tensor(c_, c1, 1, 1)) self.share_weightconv2 = nn.Parameter(torch.Tensor(c2, c_, 3, 3)) self.bn1 = nn.BatchNorm2d(c_) self.bn2 = nn.BatchNorm2d(c2) self.act = nn.SiLU() nn.init.kaiming_uniform_(self.share_weightconv1, nonlinearity="relu") nn.init.kaiming_uniform_(self.share_weightconv2, nonlinearity="relu") if bias: self.bias = nn.Parameter(torch.Tensor(c2)) else: self.bias = None if self.bias is not None: nn.init.constant_(self.bias, 0) def forward_for_small(self, x): residual = x out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias) out = self.bn1(out) out = self.act(out) out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride, padding=self.padding[0], dilation=self.dilate[0]) out = self.bn2(out) out += residual out = self.act(out) return out def forward_for_middle(self, x): residual = x out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias) out = self.bn1(out) out = self.act(out) out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride, padding=self.padding[1], dilation=self.dilate[1]) out = self.bn2(out) out += residual out = self.act(out) return out def forward_for_big(self, x): residual = x out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias) out = self.bn1(out) out = self.act(out) out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride, padding=self.padding[2], dilation=self.dilate[2]) out = self.bn2(out) out += residual out = self.act(out) return out def forward(self, x): xm = x base_feat = [] if self.c is not False: x1 = self.forward_for_small(x) x2 = self.forward_for_middle(x) x3 = self.forward_for_big(x) else: x1 = self.forward_for_small(xm[0]) x2 = self.forward_for_middle(xm[1]) x3 = self.forward_for_big(xm[2]) base_feat.append(x1) base_feat.append(x2) base_feat.append(x3) return base_feat class RFEM(nn.Module): def __init__(self, c1, c2, n=1, e=0.5, stride=1): super(RFEM, self).__init__() c = True layers = [] layers.append(TridentBlock(c1, c2, stride=stride, c=c, e=e)) c1 = c2 for i in range(1, n): layers.append(TridentBlock(c1, c2)) self.layer = nn.Sequential(*layers) self.bn = nn.BatchNorm2d(c2) self.act = nn.SiLU() def forward(self, x): out = self.layer(x) out = out[0] + out[1] + out[2] + x out = self.act(self.bn(out)) return out
class GradCAM: def __init__(self, model_path, img_path, output_dir, img_size, target_layer, method, device, visual_path, names): self.model_path = model_path self.img_path = img_path self.output_dir = output_dir self.img_size = img_size self.target_layer = target_layer self.method = method self.device = device self.visual_path = visual_path self.names = names def get_res_img(self, bbox, mask, res_img): mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8) heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET).astype(np.float32) bbox = [int(b) for b in bbox] tmp = np.ones_like(res_img,dtype=np.float32) * 0 tmp[bbox[1]:bbox[3],bbox[0]:bbox[2]] = 1 res_img = cv2.add(res_img, heatmap) res_img = (res_img / res_img.max()) return res_img, heatmap def put_text_box(self, bbox, cls_name, res_img, thickness=2): x1, y1, x2, y2 = [int(b) for b in bbox] res_img = cv2.rectangle(res_img, (x1, y1), (x2, y2), (0,0,255), thickness=2) w, h = cv2.getTextSize(cls_name, 0, fontScale=thickness, thickness=2)[0] # text width, height outside = y1 - h - 3 >= 0 # label fits outside box t0, t1 = x1, y1 - 2 if outside else y1 + h + 2 res_img = cv2.putText(res_img, cls_name, (t0, t1), color=(0,0,225), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2, lineType=cv2.LINE_AA) return res_img def run(self): device = self.device input_size = (self.img_size, self.img_size) print('[INFO] Loading the model') model = YOLOV5TorchObjectDetector(self.model_path, device, img_size=input_size, names=None if self.names is None else self.names.strip().split(",")) if self.method == 'gradcam': saliency_method = YOLOV5GradCAM(model=model, layer_name=self.target_layer, img_size=input_size) img_path = Path.cwd() / self.img_path img = cv2.imread(str(img_path)) torch_img = model.preprocessing(img[..., ::-1]) tic = time.time() masks, logits, [boxes, _, class_names, _] = saliency_method(torch_img) print('bbbooooox', type(class_names[0])) print("total time:", round(time.time() - tic, 4)) result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy() result = result[..., ::-1] # convert to bgr save_path = Path.cwd() / self.output_dir / '{}'.format(img_path.stem) if not save_path.exists(): save_path.mkdir() for i, mask in enumerate(masks): res_img = result.copy() bbox, cls_name = boxes[0][i], class_names[0][i] print(mask.shape) res_img, heatmat = self.get_res_img(bbox, mask, res_img) color_img = (res_img * 255).astype(np.uint8) color_img = self.put_text_box(bbox, cls_name, color_img) cv2.imwrite(str(save_path / '{0}_{1}.jpg'.format(img_path.stem, i)), color_img)
程序的入口是if name == ‘main’,表示只有当直接运行该文件时才会执行main函数。
class ObjectDetector: def __init__(self, weights, source, img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, plot_label=False, save_txt=False, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False): self.weights = weights self.source = source self.img_size = img_size self.conf_thres = conf_thres self.iou_thres = iou_thres self.device = device self.view_img = view_img self.plot_label = plot_label self.save_txt = save_txt self.save_conf = save_conf self.nosave = nosave self.classes = classes self.agnostic_nms = agnostic_nms self.augment = augment self.update = update self.project = project self.name = name self.exist_ok = exist_ok def detect(self): source, weights, view_img, plot_label, save_txt, imgsz = self.source, self.weights, self.view_img, \ self.plot_label, self.save_txt, self.img_size save_img = not self.nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(self.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) * 2 # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=self.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if self.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' # plot_only_box(xyxy, im0, color=colors[int(cls)], line_thickness=3) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') if plot_label: label = f"the num of face is {len(det)}" plot_text_label(im0, label=label, line_thickness=3) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if vid
文件路径 | 功能 |
C3RFEM.py | 定义了TridentBlock、RFEM和C3RFEM类,用于实现改进的分割头部网络 |
cam_vis.py | 使用YOLOv5模型和GradCAM方法生成热力图进行可视化 |
detect.py | 使用YOLOv5模型进行目标检测,并计算性能指标 |
export.py | 将YOLOv5模型导出为其他格式 |
hubconf.py | 提供了创建和加载YOLOv5模型的函数 |
test.py | 对目标检测模型进行测试,并保存预测结果 |
train.py | 训练目标检测模型,并保存训练过程中的权重和统计信息 |
ui.py | 用户界面脚本,用于交互式操作和展示结果 |
data/convert.py | 数据转换工具,用于将数据集转换为YOLOv5格式 |
data/voc_label.py | VOC数据集标签处理工具 |
models/common.py | 定义了一些通用的模型组件 |
models/experimental.py | 定义了一些实验性的模型组件 |
models/export.py | 导出模型为ONNX格式的工具函数 |
models/gradcam.py | 实现了GradCAM方法用于可视化模型的注意力区域 |
models/tf.py | TensorFlow模型转换工具函数 |
models/yolo.py | 定义了YOLOv5模型的网络结构 |
models/init.py | 模型模块的初始化文件 |
segment/train.py | 训练分割头部网络的脚本 |
segment/val.py | 在验证集上评估分割头部网络的脚本 |
utils/activations.py | 激活函数定义 |
utils/augmentations.py | 数据增强函数定义 |
utils/autoanchor.py | 自动锚框生成工具函数 |
utils/autobatch.py | 自动批处理工具函数 |
utils/callbacks.py | 回调函数定义 |
utils/dataloaders.py | 数据加载器定义 |
utils/datasets.py | 数据集定义 |
utils/downloads.py | 下载工具函数 |
utils/general.py | 通用工具函数 |
utils/google_utils.py | Google云存储工具函数 |
utils/loss.py | 损失函数定义 |
utils/metrics.py | 模型性能评估指标定义 |
utils/plots.py | 绘图工具函数 |
utils/RepulsionLoss.py | Repulsion Loss损失函数定义 |
utils/torch_utils.py | PyTorch工具函数 |
utils/triton.py | Triton Inference Server工具函数 |
utils/init.py | 工具模块的初始化文件 |
utils/aws/resume.py | AWS训练恢复工具函数 |
utils/aws/init.py | AWS工具模块的初始化文件 |
utils/flask_rest_api/example_request.py | Flask REST API示例请求 |
utils/flask_rest_api/restapi.py | Flask REST API实现 |
utils/loggers/init.py | 日志记录器模块的初始化文件 |
utils/loggers/clearml/clearml_utils.py | ClearML日志记录器工具函数 |
utils/loggers/clearml/hpo.py | ClearML超参数优化工具函数 |
utils/loggers/clearml/init.py | ClearML日志记录器模块的初始化文件 |
utils/loggers/comet/comet_utils.py | Comet日志记录器工具函数 |
utils/loggers/comet/hpo.py | Comet超参数优化工具函数 |
utils/loggers/comet/init.py | Comet日志记录器模 |
在开始分析分割头部网络之前,让我们先简要了解一下YOLOv5(You Only Look Once,YOLO)模型。YOLO是一种目标检测和分割模型,它在单一前向传递中同时执行目标检测和分割任务,具有高度的效率和准确性。
在分割任务中,IoU(Intersection over Union)是一个重要的指标,用于衡量预测的掩码与真实掩码之间的重叠程度。mask_iou和masks_iou函数用于计算掩码之间的IoU,这有助于评估分割的准确性。
参考Ziping Yu提出的RFE模块,由于不同大小的接收场意味着捕获远程依赖的能力不同,因此我们设计了C3RFE模块,充分利用了特征图中接收场的优势,采用了扩展卷积的方法。在TriDetNet的启发下,我们使用四个分支,不同的扩张卷积率来捕获多尺度信息和不同的依赖范围。所有的枝条都有共同的权重,唯一的区别就是它们各自不同的领域。一方面,它减少了参数的数量,从而降低了潜在的过度拟合风险。另一方面,它可以充分利用每一个样本。所提出的RFE模块可分为两部分:基于扩张卷积的多分支和图2所示的聚集加权层。多分支部分分别以1,2和3作为不同扩张卷积的速率,它们都使用固定的卷积核大小3x3。此外,我们还增加了一个残余连接,以防止在训练中发生爆炸和消失的问题。采集和加权层用于从不同的分支收集信息,并对特征的每个分支进行加权。加权运算用于平衡不同分支的表示。
为了说明这一点,我们将YOLOv 5中C3模块的瓶颈替换为RFE模块,以增加特征映射的接受域,从而提高多尺度目标检测和识别的精度,如图所示。
学习率 (Learning Rate): 初始学习率为 0.01 0.01,并在训练过程中进行调整。
动量 (Momentum): 动量值为 0.937 0.937,用于加速模型的收敛。
权重衰减 (Weight Decay): 权重衰减系数为 0.0005 0.0005,用于防止过拟合。
损失函数权重: 包括框损失 (box loss),分类损失 (classification loss),目标损失 (object loss) 等。这些权重用于平衡不同部分的损失。
数据增强: 包括颜色抖动 (HSV),平移、缩放、剪切、透视变换等,用于提升模型的泛化能力。
# Display training batch images
display_images([files["train_batch0"], files["train_batch1"], files["train_batch2"]])
裂缝的多样性: 训练数据包括了不同程度、不同形态的裂缝。有的裂缝较为明显,有的则较为细微。这有助于模型学习到裂缝的多样性。
背景的多样性: 背景包括了不同的墙体材质和颜色,这有助于提高模型的泛化能力。
标注: 从图中可以看出,裂缝的位置已经被准确标注出来。
损失函数的变化: 包括训练过程中的框损失 (box loss),分割损失 (segmentation loss),目标损失 (object loss) 和分类损失 (classification loss)。
性能指标的变化: 包括精确度 (Precision),召回率 (Recall) 和 mAP (Mean Average Precision)。
# Clean the column names by removing leading and trailing spaces results_df.columns = [col.strip() for col in results_df.columns] # Plot loss functions over epochs plt.figure(figsize=(12, 8)) plt.plot(results_df['epoch'], results_df['train/box_loss'], label='Box Loss') plt.plot(results_df['epoch'], results_df['train/seg_loss'], label='Segmentation Loss') plt.plot(results_df['epoch'], results_df['train/obj_loss'], label='Object Loss') plt.plot(results_df['epoch'], results_df['train/cls_loss'], label='Classification Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('Loss Functions over Epochs') plt.legend() plt.grid(True) plt.show()
框损失 (Box Loss): 随着训练周期的进行,框损失呈下降趋势。这意味着模型逐渐学习到了正确预测物体边界框的能力。
分割损失 (Segmentation Loss): 分割损失也呈下降趋势,说明模型在学习如何准确分割图像上取得了进步。
目标损失 (Object Loss) 和 分类损失 (Classification Loss): 两者同样呈下降趋势,说明模型在识别目标和分类上的表现也在逐渐提升。
# Plot performance metrics over epochs plt.figure(figsize=(15, 10)) # Precision and Recall for B plt.subplot(2, 2, 1) plt.plot(results_df['epoch'], results_df['metrics/precision(B)'], label='Precision (B)') plt.plot(results_df['epoch'], results_df['metrics/recall(B)'], label='Recall (B)') plt.xlabel('Epoch') plt.ylabel('Score') plt.title('Precision and Recall for B over Epochs') plt.legend() plt.grid(True) # mAP for B plt.subplot(2, 2, 2) plt.plot(results_df['epoch'], results_df['metrics/mAP_0.5(B)'], label='mAP@0.5 (B)') plt.plot(results_df['epoch'], results_df['metrics/mAP_0.5:0.95(B)'], label='mAP@0.5:0.95 (B)') plt.xlabel('Epoch') plt.ylabel('Score') plt.title('mAP for B over Epochs') plt.legend() plt.grid(True) # Precision and Recall for M plt.subplot(2, 2, 3) plt.plot(results_df['epoch'], results_df['metrics/precision(M)'], label='Precision (M)') plt.plot(results_df['epoch'], results_df['metrics/recall(M)'], label='Recall (M)') plt.xlabel('Epoch') plt.ylabel('Score') plt.title('Precision and Recall for M over Epochs') plt.legend() plt.grid(True) # mAP for M plt.subplot(2, 2, 4) plt.plot(results_df['epoch'], results_df['metrics/mAP_0.5(M)'], label='mAP@0.5 (M)') plt.plot(results_df['epoch'], results_df['metrics/mAP_0.5:0.95(M)'], label='mAP@0.5:0.95 (M)') plt.xlabel('Epoch') plt.ylabel('Score') plt.title('mAP for M over Epochs') plt.legend() plt.grid(True) plt.tight_layout() plt.show()
精确度和召回率 (Precision & Recall):
B 类别: 随着训练的进行,B 类别的精确度和召回率都呈上升趋势,这表明模型在检测 B 类别对象时的性能在逐步提升。
M 类别: 同样,M 类别的精确度和召回率也在逐渐提升,说明模型在检测 M 类别对象时也表现良好。
平均精确度 (mAP):
B 类别: mAP@0.5 和 mAP@0.5:0.95 都呈上升趋势,这意味着模型在不同的 IoU 阈值下都能取得较好的性能。
M 类别: M 类别的 mAP 同样呈上升趋势,显示出模型的检测能力在稳步提升。
模型学习能力: 模型在训练过程中的损失逐渐减小,性能指标逐渐提升,这表明模型具有良好的学习能力。
性能表现: 在验证集上的精确度、召回率和 mAP 的提升表明模型在实际任务上的性能表现优秀。
[1]郭磊,王邱龙,薛伟,等.基于注意力机制的光线昏暗条件下口罩佩戴检测[J].电子科技大学学报.2022,51(1).DOI:10.12178/1001-0548.2021222 .
[2]杨其晟,李文宽,杨晓峰,等.改进YOLOv5的苹果花生长状态检测方法[J].计算机工程与应用.2022,58(4).DOI:10.3778/j.issn.1002-8331.2107-0523 .
[3]卢宏涛,罗沐昆.基于深度学习的计算机视觉研究新进展[J].数据采集与处理.2022,37(2).DOI:10.16337/j.1004-9037.2022.02.001 .
[4]岳有军,刘杰琼,王红君,等.基于改进YOLOv3模型的苹果树叶片病斑检测[J].中国科技论文.2021,(11).DOI:10.3969/j.issn.2095-2783.2021.11.008 .
[5]何东,陈金令,王熙.基于改进YOLOv3的红外行人目标检测[J].中国科技论文.2021,(7).DOI:10.3969/j.issn.2095-2783.2021.07.012 .
[7]张宁.基于Faster R-CNN的公路路面病害检测算法的研究[J].华东交通大学.2019.
[9]Yuchuan Du,Ning Pan,Zihao Xu,等.Pavement distress detection and classification based on YOLO network[J].International Journal of Pavement Engineering.2021,22(13).1659-1672.DOI:10.1080/10298436.2020.1714047 .
[10]He, Kaiming,Zhang, Xiangyu,Ren, Shaoqing,等.Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition[J].IEEE Transactions on Pattern Analysis and Machine Intelligence.2015,37(9).1904-1916.DOI:10.1109/TPAMI.2015.2389824 .
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。