数据从数据集路径读取到数据迭代器Dataset后再送入加载器DataLoader,然后通过for in 激活DataLoader,使DataLoader在内部按批分配索引,通过索引采样-index=self._next_index()函数划分索引列表,通过数据采样-data = self._dataset_fetcher.fetch(index)函数下发索引给Dataset,使得 def getitem(self, index):根据索引处理后返给DataLoader加载器,按批将索引全部下发并且返回该批的所有数据后,就可以进一步进行模型的训练。
class YoloDataset(Dataset): def __init__(self, annotation_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length, \ mosaic, mixup, mosaic_prob, mixup_prob, train, special_aug_ratio = 0.7): super(YoloDataset, self).__init__() self.annotation_lines = annotation_lines self.input_shape = input_shape self.num_classes = num_classes self.anchors = anchors self.anchors_mask = anchors_mask self.epoch_length = epoch_length self.mosaic = mosaic self.mosaic_prob = mosaic_prob self.mixup = mixup self.mixup_prob = mixup_prob self.train = train self.special_aug_ratio = special_aug_ratio self.epoch_now = -1 self.length = len(self.annotation_lines) self.bbox_attrs = 5 + num_classes def __len__(self): return self.length
self.annotation_lines是训练集数据对应的txt文件内容,格式为 [图片绝对路径,图片所有真实框]。如图所示
input_shape 为设定输入模型的固定尺寸,该例设为 [300,300]
self.num_classes 类别数量
self.anchors 锚框的尺寸,这里有9个锚框
index = index % self.length
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): line = annotation_line.split() #------------------------------# # 读取图像并转换成RGB图像 #------------------------------# image = Image.open(line[0]) image = cvtColor(image) #------------------------------# # 获得图像的高宽与目标高宽 #------------------------------# iw, ih = image.size h, w = input_shape #------------------------------# # 获得真实框 #------------------------------# box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
dx = int(self.rand(0, w-nw))
dy = int(self.rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
flip = self.rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
image_data = np.array(image, np.uint8)
色域变换 这里用的是HSV变换
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
dtype = image_data.dtype
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
if len(box)>0:
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
return image_data, box
调整完后,将图片跟真实框返给image, box 。
image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
box = np.array(box, dtype=np.float32)
通过self.mosaic、self.mosaic_prob、self.epoch_now、self.special_aug_ratio、self.mixup 、self.mixup_prob来控制是否要对图像进行Mosaic处理,具体为:
if self.mosaic and self.rand() < self.mosaic_prob and self.epoch_now < self.epoch_length * self.special_aug_ratio:
lines = sample(self.annotation_lines, 3)
image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
if self.mixup and self.rand() < self.mixup_prob:
lines = sample(self.annotation_lines, 1)
image_2, box_2 = self.get_random_data(lines[0], self.input_shape, random = self.train)
image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2)
flip = self.rand()<.5
if flip and len(box)>0:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
box[:, [0,2]] = iw - box[:, [2,0]]
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.4, 1)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
if index == 0: dx = int(w*min_offset_x) - nw dy = int(h*min_offset_y) - nh elif index == 1: dx = int(w*min_offset_x) - nw dy = int(h*min_offset_y) elif index == 2: dx = int(w*min_offset_x) dy = int(h*min_offset_y) elif index == 3: dx = int(w*min_offset_x) dy = int(h*min_offset_y) - nh new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image)
if len(box)>0:
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
cutx = int(w * min_offset_x)
cuty = int(h * min_offset_y)
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
new_image = np.array(new_image, np.uint8)
def merge_bboxes(self, bboxes, cutx, cuty): merge_bbox = [] for i in range(len(bboxes)): for box in bboxes[i]: tmp_box = [] x1, y1, x2, y2 = box[0], box[1], box[2], box[3] if i == 0: if y1 > cuty or x1 > cutx: continue if y2 >= cuty and y1 <= cuty: y2 = cuty if x2 >= cutx and x1 <= cutx: x2 = cutx if i == 1: if y2 < cuty or x1 > cutx: continue if y2 >= cuty and y1 <= cuty: y1 = cuty if x2 >= cutx and x1 <= cutx: x2 = cutx if i == 2: if y2 < cuty or x2 < cutx: continue if y2 >= cuty and y1 <= cuty: y1 = cuty if x2 >= cutx and x1 <= cutx: x1 = cutx if i == 3: if y1 > cuty or x2 < cutx: continue if y2 >= cuty and y1 <= cuty: y2 = cuty if x2 >= cutx and x1 <= cutx: x1 = cutx tmp_box.append(x1) tmp_box.append(y1) tmp_box.append(x2) tmp_box.append(y2) tmp_box.append(box[-1]) merge_bbox.append(tmp_box) return merge_bbox
if self.mixup and self.rand() < self.mixup_prob:
if self.mixup and self.rand() < self.mixup_prob:
lines = sample(self.annotation_lines, 1)
image_2, box_2 = self.get_random_data(lines[0], self.input_shape, random = self.train)
image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2)
image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2),这个函数将Mosaic处理完的图片,与随机抽取的图片进行Mixup操作。
def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
if len(box_1) == 0:
new_boxes = box_2
elif len(box_2) == 0:
new_boxes = box_1
new_boxes = np.concatenate([box_1, box_2], axis=0)
return new_image, new_boxes
nL = len(box)
labels_out = np.zeros((nL, 6))
if nL:
# 对真实框进行归一化,调整到0-1之间
box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
# 序号为0、1的部分,为真实框的中心
# 序号为2、3的部分,为真实框的宽高
# 序号为4的部分,为真实框的种类
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
# 调整顺序,符合训练的格式
# labels_out中序号为0的部分在collect时处理
labels_out[:, 1] = box[:, -1]
labels_out[:, 2:] = box[:, :4]
def yolo_dataset_collate(batch):
images = []
bboxes = []
for i, (img, box) in enumerate(batch):
box[:, 0] = i
images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
bboxes = torch.from_numpy(np.concatenate(bboxes, 0)).type(torch.FloatTensor)
return images, bboxes
for iteration, batch in enumerate(gen):
if iteration >= epoch_step:
images, targets = batch[0], batch[1]
with torch.no_grad():
if cuda:
images = images.cuda(local_rank)
targets = targets.cuda(local_rank)
