赞
踩
脚本如下:
import json import os def read_json(json_file): with open(json_file,'r') as f: load_dict = json.load(f) f.close() return load_dict def json2txt(json_path,txt_path): for json_file in os.listdir(json_path): txt_name = txt_path+json_file[0:-5]+'.txt' txt_file = open(txt_name, 'w') json_file_path = os.path.join(json_path,json_file) json_data = read_json(json_file_path) imageWidth = json_data['imageWidth'] imageHeight = json_data['imageHeight'] for i in range(len(json_data['shapes'])): label = json_data['shapes'][i]['label'] if label=='Lesions': index=0 else: index=1 x1 = json_data['shapes'][i]['points'][0][0] x2 = json_data['shapes'][i]['points'][1][0] y1 = json_data['shapes'][i]['points'][0][1] y2 = json_data['shapes'][i]['points'][1][1] #将标注框按照图像大小压缩 x_center = (x1+x2)/2/imageWidth y_center = (y1+y2)/2/imageHeight bbox_w = (x2-x1)/imageWidth bbox_h = (y2-y1)/imageHeight bbox = (x_center,y_center,bbox_w,bbox_h) txt_file.write( str(index) + " " + " ".join([str(a) for a in bbox]) + '\n') print(label) if __name__ == "__main__": json_path = 'H:\images_json' txt_path = 'data/' json2txt(json_path,txt_path)
json_path是你放置json文件的文件夹路径
txt_path是你放置转化成txt的存储位置
这里的label改成你自己打标的类别,我这里这么写是因为我一张图里有两个类别
划分数据集代码如下,只有image和label要改,改成你自己放图片和txt的路径就行
# -*- coding: utf-8 -*- """ 将数据集划分为训练集,验证集,测试集 """ import os import random import shutil # 创建保存数据的文件夹 def makedir(new_dir): if not os.path.exists(new_dir): os.makedirs(new_dir) def split_data(img_dir,label_dir): random.seed(1) # 随机种子 # 1.确定原图片数据集路径 datasetimg_dir = img_dir #确定原label数据集路径 datasetlabel_dir = label_dir # 2.确定数据集划分后保存的路径 split_dir = os.path.join(".", "dataset") train_dir = os.path.join(split_dir, "train") valid_dir = os.path.join(split_dir, "valid") test_dir = os.path.join(split_dir, "test") dir_list = [train_dir,valid_dir,test_dir] image_label = ['images','labels'] for i in range(len(dir_list)): for j in range(len(image_label)): makedir(os.path.join(dir_list[i],image_label[j])) # 3.确定将数据集划分为训练集,验证集,测试集的比例 train_pct = 0.8 valid_pct = 0.1 test_pct = 0.1 # 4.划分 imgs = os.listdir(datasetimg_dir) # 展示目标文件夹下所有的文件名 imgs = list(filter(lambda x: x.endswith('.tif'), imgs)) # 取到所有以.png结尾的文件,如果改了图片格式,这里需要修改 random.shuffle(imgs) # 乱序路径 img_count = len(imgs) # 计算图片数量 train_point = int(img_count * train_pct) # 0:train_pct valid_point = int(img_count * (train_pct + valid_pct)) # train_pct:valid_pct for i in range(img_count): if i < train_point: # 保存0-train_point的图片到训练集 out_dir = os.path.join(train_dir, 'images') label_out_dir = os.path.join(train_dir, 'labels') elif i < valid_point: # 保存train_point-valid_point的图片到验证集 out_dir = os.path.join(valid_dir, 'images') label_out_dir = os.path.join(valid_dir, 'labels') else: # 保存test_point-结束的图片到测试集 out_dir = os.path.join(test_dir, 'images') label_out_dir = os.path.join(test_dir, 'labels') target_path = os.path.join(out_dir, imgs[i]) # 指定目标保存路径 src_path = os.path.join(datasetimg_dir, imgs[i]) #指定目标原图像路径 label_target_path = os.path.join(label_out_dir, imgs[i][0:-4]+'.txt') label_src_path = os.path.join(datasetlabel_dir,imgs[i][0:-4]+'.txt') shutil.copy(src_path, target_path) # 复制图片 shutil.copy(label_src_path, label_target_path) #复制txt print('train:{}, valid:{}, test:{}'.format(train_point, valid_point-train_point, img_count-valid_point)) if __name__ == "__main__": img_dir = './images' label_dir = './data' split_data(img_dir,label_dir)
img_dir是原始图片路径,label_dir是原始txt路径。只需要改这俩就能直接划分数据集了,嘎嘎好用。
划分后效果如图所示:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。