赞
踩
使用labelimg(本文不进行讲解)进行数据标注,直接选择yolo数据集格式,之后将图片与标注好的标签文件(.txt)放在两个不同的文件夹中(默认行为)。
比对两个文件夹中的文件个数是否相同,正常来说txt文件夹中的文件应该比图片文件夹中的数量多一个。在txt文件夹中会有一个class.txt文件。
如果文件数量不符合上述,使用代码将具有相同名称的不同类型文件选出(图片对应的标签文件与图片名称相同,例如图片为1.jpg则标签文件为1.txt)。
使用下面代码将相同名称文件放入另一个文件夹:
- import os
-
-
-
- # 图片文件夹路径
-
- #img_folder = 'path_to_image_folder'
-
-
-
- img_folder = 'C://Users//Administrator//Desktop//abc//JPEGImages'
-
-
-
- # 文本文件夹路径
-
- #txt_folder = 'path_to_txt_folder'
-
-
-
- txt_folder = 'C://Users//Administrator//Desktop//abc//Annotations'
-
-
-
- # 存放相同文件名的文件夹路径
-
- output_folder = 'C://Users//Administrator//Desktop//abc//aaa'
-
-
-
- # 获取图片文件夹中的文件名(不包括扩展名)
-
- img_files = [os.path.splitext(file)[0] for file in os.listdir(img_folder)]
-
-
-
- # 获取txt文件夹中的文件名(不包括扩展名)
-
- txt_files = [os.path.splitext(file)[0] for file in os.listdir(txt_folder)]
-
-
-
- # 找出两个文件夹中文件名相同的文件
-
- common_files = set(img_files) & set(txt_files)
-
-
-
- # 输出相同文件名的文件到另一个文件夹
-
- for file in common_files:
-
- img_path = os.path.join(img_folder, file + '.jpg')
-
- txt_path = os.path.join(txt_folder, file + '.txt')
-
- output_img_path = os.path.join(output_folder, file + '.jpg')
-
- output_txt_path = os.path.join(output_folder, file + '.txt')
-
-
-
- # 移动文件
-
- os.rename(img_path, output_img_path)
-
- os.rename(txt_path, output_txt_path)
-
-
-
- print(f'共找到{len(common_files)}个相同文件名的文件,并已移动到{output_folder}文件夹中。')

在你的yolov7项目文件夹中建立一个文件夹,我命名为adata
在adata中建立两个文件夹images和labels,分别用来存放划分后的图片和标签文件(不需要自己手动操作),使用如下代码:
- # 将图片和标注数据按比例切分为 训练集和测试集
-
- import shutil
-
- import random
-
- import os
-
-
-
- # 原始路径
-
- image_original_path = "E:/yolo/yolov7-main/adata/data/tp"
-
- label_original_path = "E:/yolo/yolov7-main/adata/data/txt" # 该路径下不要有classes.txt
-
-
-
- cur_path = os.getcwd()
-
-
-
- # 训练集路径
-
- train_image_path = os.path.join(cur_path, "images/train/")
-
- train_label_path = os.path.join(cur_path, "labels/train/")
-
-
-
- # 验证集路径
-
- val_image_path = os.path.join(cur_path, "images/val/")
-
- val_label_path = os.path.join(cur_path, "labels/val/")
-
-
-
- # 测试集路径
-
- test_image_path = os.path.join(cur_path, "images/test/")
-
- test_label_path = os.path.join(cur_path, "labels/test/")
-
-
-
- # 训练集目录
-
- list_train = os.path.join(cur_path, "train.txt")
-
- list_val = os.path.join(cur_path, "val.txt")
-
- list_test = os.path.join(cur_path, "test.txt")
-
-
-
- train_percent = 0.8
-
- val_percent = 0.1
-
- test_percent = 0.1
-
-
-
-
-
- def del_file(path):
-
- for i in os.listdir(path):
-
- file_data = path + "\\" + i
-
- os.remove(file_data)
-
-
-
-
-
- def mkdir():
-
- if not os.path.exists(train_image_path):
-
- os.makedirs(train_image_path)
-
- else:
-
- del_file(train_image_path)
-
- if not os.path.exists(train_label_path):
-
- os.makedirs(train_label_path)
-
- else:
-
- del_file(train_label_path)
-
-
-
- if not os.path.exists(val_image_path):
-
- os.makedirs(val_image_path)
-
- else:
-
- del_file(val_image_path)
-
- if not os.path.exists(val_label_path):
-
- os.makedirs(val_label_path)
-
- else:
-
- del_file(val_label_path)
-
-
-
- if not os.path.exists(test_image_path):
-
- os.makedirs(test_image_path)
-
- else:
-
- del_file(test_image_path)
-
- if not os.path.exists(test_label_path):
-
- os.makedirs(test_label_path)
-
- else:
-
- del_file(test_label_path)
-
-
-
-
-
- def clearfile():
-
- if os.path.exists(list_train):
-
- os.remove(list_train)
-
- if os.path.exists(list_val):
-
- os.remove(list_val)
-
- if os.path.exists(list_test):
-
- os.remove(list_test)
-
-
-
-
-
- def main():
-
- mkdir()
-
- clearfile()
-
-
-
- file_train = open(list_train, 'w')
-
- file_val = open(list_val, 'w')
-
- file_test = open(list_test, 'w')
-
-
-
- total_txt = os.listdir(label_original_path)
-
- num_txt = len(total_txt)
-
- list_all_txt = range(num_txt)
-
-
-
- num_train = int(num_txt * train_percent)
-
- num_val = int(num_txt * val_percent)
-
- num_test = num_txt - num_train - num_val
-
-
-
- train = random.sample(list_all_txt, num_train)
-
- # train从list_all_txt取出num_train个元素
-
- # 所以list_all_txt列表只剩下了这些元素
-
- val_test = [i for i in list_all_txt if not i in train]
-
- # 再从val_test取出num_val个元素,val_test剩下的元素就是test
-
- val = random.sample(val_test, num_val)
-
-
-
- print("训练集数目:{}, 验证集数目:{}, 测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))
-
- for i in list_all_txt:
-
- name = total_txt[i][:-4]
-
-
-
- srcImage = image_original_path + "/"+name + '.jpg'
-
- srcLabel = label_original_path + "/"+name + ".txt"
-
-
-
- if i in train:
-
- dst_train_Image = train_image_path + "/"+name + '.jpg'
-
- dst_train_Label = train_label_path + "/"+name + '.txt'
-
- shutil.copyfile(srcImage, dst_train_Image)
-
- shutil.copyfile(srcLabel, dst_train_Label)
-
- file_train.write(dst_train_Image + '\n')
-
- elif i in val:
-
- dst_val_Image = val_image_path + "/"+name + '.jpg'
-
- dst_val_Label = val_label_path + "/"+name + '.txt'
-
- shutil.copyfile(srcImage, dst_val_Image)
-
- shutil.copyfile(srcLabel, dst_val_Label)
-
- file_val.write(dst_val_Image + '\n')
-
- else:
-
- dst_test_Image = test_image_path + "/"+name + '.jpg'
-
- dst_test_Label = test_label_path + "/"+name + '.txt'
-
- shutil.copyfile(srcImage, dst_test_Image)
-
- shutil.copyfile(srcLabel, dst_test_Label)
-
- file_test.write(dst_test_Image + '\n')
-
-
-
- file_train.close()
-
- file_val.close()
-
- file_test.close()
-
-
-
- if __name__ == "__main__":
-
- main()

Images
每个文件夹中都有图片
Labels
同理每个文件夹中都有txt标签文件
除此之外在adata文件夹中还应该有三个txt文件用来存放图片路径
三个txt文件用来存放图片路径,如下图:
代码如下:
- train: E:\yolo\yolov7-main\adata\train.txt
-
- val: E:\yolo\yolov7-main\adata\val.txt
-
- test: E:\yolo\yolov7-main\adata\test.txt
-
- ## leibie
-
- nc : 4
-
- # class names
-
- names : ["3A","2A","1A","0A"] ## 0 , 1 , 2
到此可以直接将.yaml文件作为输入开始训练。
如果使用该步骤建立数据集训练过程中遇到什么问题,可以私信博主,看到会及时回的,写作不易,烦请点个赞,点个关注一起学习进步。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。