赞
踩
我是用ADE20k格式训练的数据,先说一个我踩过的坑,刚开始只知道语义分割需要将json转为灰度mask,我的数据集加上背景,算是三分类问题,为了区分每个类别像素点,分别将将灰度值设为了[0,0,0],[34,34,34],[78,78,78],到训练时就出问题了,除了背景可以识别到,剩下两个类别指标全为0,后来通过这篇博客才知道,json转mask时对于灰度值是有要求的,灰度值取值范围必须在[0,num_classes]之间,我的应该在[0,2]范围内训练才能得到正常指标。
OK,下面看一下目录结构设置
images:存放原始图像
labelme_json:存放json格式数据
masks:新建masks文件夹,用来存放转换后的png格式
import seedir as sd import os import json import numpy as np import cv2 import shutil from tqdm import tqdm #查看目录结构 sd.seedir('coalData', style='emoji', depthlimit=1) Dataset_Path = 'coalData' # 0-背景,从 1 开始 # 每个类别信息及画mask顺序(按照由大到小,由粗到精的顺序) # 修改自己的label,我的第一类是belt,第二类是coal class_info = [ {'label':'belt', 'type':'polygon', 'color':1}, # 使用ploygon多点标记 {'label':'coal', 'type':'polygon', 'color':2}, ] #单张图像转mask def labelme2mask_single_img(img_path, labelme_json_path): ''' 输入原始图像路径和labelme标注路径,输出 mask ''' img_bgr = cv2.imread(img_path) img_mask = np.zeros(img_bgr.shape[:2]) # 创建空白图像 0-背景 with open(labelme_json_path, 'r', encoding='utf-8') as f: labelme = json.load(f) for one_class in class_info: # 按顺序遍历每一个类别 for each in labelme['shapes']: # 遍历所有标注,找到属于当前类别的标注 if each['label'] == one_class['label']: if one_class['type'] == 'polygon': # polygon 多段线标注 # 获取点的坐标 points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))] # 在空白图上画 mask(闭合区域) img_mask = cv2.fillPoly(img_mask, points, color=one_class['color']) elif one_class['type'] == 'line' or one_class['type'] == 'linestrip': # line 或者 linestrip 线段标注 # 获取点的坐标 points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))] # 在空白图上画 mask(非闭合区域) img_mask = cv2.polylines(img_mask, points, isClosed=False, color=one_class['color'], thickness=one_class['thickness']) elif one_class['type'] == 'circle': # circle 圆形标注 points = np.array(each['points'], dtype=np.int32) center_x, center_y = points[0][0], points[0][1] # 圆心点坐标 edge_x, edge_y = points[1][0], points[1][1] # 圆周点坐标 radius = np.linalg.norm(np.array([center_x, center_y] - np.array([edge_x, edge_y]))).astype( 'uint32') # 半径 img_mask = cv2.circle(img_mask, (center_x, center_y), radius, one_class['color'], one_class['thickness']) else: print('未知标注类型', one_class['type']) return img_mask # labelme转mask - 批量 os.chdir(Dataset_Path) #os.mkdir('masks') os.chdir('images') for img_path in tqdm(os.listdir()): try: labelme_json_path = os.path.join('../', 'labelme_jsons', '.'.join(img_path.split('.')[:-1]) + '.json') img_mask = labelme2mask_single_img(img_path, labelme_json_path) mask_path = img_path.split('.')[0] + '.png' cv2.imwrite(os.path.join('../', 'masks', mask_path), img_mask) except Exception as E: print(img_path, '转换失败', E)
原图和转换后的图像如下图所示:
由于种类太少,转换后的像素值分布在[0,2]之间,这样是看不出来的,可以用电脑自带的画图工具查看是否转换正确
按照8:2划分训练集和测试集
import os import shutil import random from tqdm import tqdm #指定数据集路径 Dataset_Path = 'coalData' #查看数据集目录结构 import seedir as sd sd.seedir(Dataset_Path, style='emoji', depthlimit=1) #创建训练、测试文件夹 os.chdir(Dataset_Path) os.mkdir('training') os.mkdir('validation') len(os.listdir('images')) len(os.listdir('masks')) # 划分训练测试集 test_frac = 0.2 # 测试集比例 random.seed(123) # 随机数种子,便于复现 folder = 'images' img_paths = os.listdir(folder) random.shuffle(img_paths) # 随机打乱 val_number = int(len(img_paths) * test_frac) # 测试集文件个数 train_files = img_paths[val_number:] # 训练集文件名列表 val_files = img_paths[:val_number] # 测试集文件名列表 print('数据集文件总数', len(img_paths)) print('训练集文件个数', len(train_files)) print('测试集文件个数', len(val_files)) # 将训练集移入training目录 for each in tqdm(train_files): src_path = os.path.join(folder, each) dst_path = os.path.join('training', each) shutil.move(src_path, dst_path) # 将测试集移入validation目录 for each in tqdm(val_files): src_path = os.path.join(folder, each) dst_path = os.path.join('validation', each) shutil.move(src_path, dst_path) # 将training和validation剪切至images len(os.listdir('training')) + len(os.listdir('validation')) shutil.move('training', 'images/training') shutil.move('validation', 'images/validation') # 在标注文件夹中,划分训练集和测试集 folder = 'masks' os.mkdir('training') os.mkdir('validation') # 将训练集移动至training目录中 for each in tqdm(train_files): src_path = os.path.join(folder, each.split('.')[0]+'.png') dst_path = os.path.join('training', each.split('.')[0]+'.png') shutil.move(src_path, dst_path) # 将测试集移动到validation目录中 for each in tqdm(val_files): src_path = os.path.join(folder, each.split('.')[0]+'.png') dst_path = os.path.join('validation', each.split('.')[0]+'.png') shutil.move(src_path, dst_path) # 将training和validation剪切至masks中 len(os.listdir('training')) + len(os.listdir('validation')) shutil.move('training', 'masks/training') shutil.move('validation', 'masks/validation')
在虚拟环境路径下的mmseg/datasets/ 下面对数据集进行初始定义,我的路径是这样的home/amax/anaconda3/envs/internImage/lib/python3.7/site-packages/mmseg/datasets
在./mmseg/datasets路径下将stare.py文件复制一份,命名为my_dataset.py,根据自己数据集进行修改
需要修改如下地方:
STAREDataset:数据名称,根据自己的数据集取一个名字即可
CLASSES:数据类别名称,建议尽量和前面json转mask时数据名称顺序一致
PALETTE:着色板,这个RGB设置与前面的转格式那块的灰度值无关,可以任意取值,用做最后的测试mask用
img_suffix:原图后缀名
seg_map_suffix:mask图像后缀
# Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from .builder import DATASETS from .custom import CustomDataset @DATASETS.register_module() #修改为自己的数据集 class CoalDataset(CustomDataset): """STARE dataset. In segmentation map annotation for STARE, 0 stands for background, which is included in 2 categories. ``reduce_zero_label`` is fixed to False. The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to '.ah.png'. """ # CLASSES = ('background', 'belt','coal')#改为自己的标注类型 #PALETTE = [[0,0,0], [38, 38, 38],[75,75,75]] #添加RGB值 PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0]] # 添加RGB值 def __init__(self, **kwargs): super(CoalDataset, self).__init__( #更改数据集名称 img_suffix='.jpg',#修改图像后缀 seg_map_suffix='.png',#mask后缀 reduce_zero_label=False, **kwargs) assert osp.exists(self.img_dir)
打开./mmseg/dataset文件夹下的__init__.py,对自己的数据集进行注册
# Copyright (c) OpenMMLab. All rights reserved. from .ade import ADE20KDataset from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset from .chase_db1 import ChaseDB1Dataset from .cityscapes import CityscapesDataset from .coco_stuff import COCOStuffDataset from .custom import CustomDataset from .dark_zurich import DarkZurichDataset from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, RepeatDataset) from .drive import DRIVEDataset from .hrf import HRFDataset from .isaid import iSAIDDataset from .isprs import ISPRSDataset from .loveda import LoveDADataset from .night_driving import NightDrivingDataset from .pascal_context import PascalContextDataset, PascalContextDataset59 from .potsdam import PotsdamDataset from .stare import STAREDataset from .voc import PascalVOCDataset from .my_dataset import CoalDataset #添加自己的数据集名称,和前面修改的名称一致 __all__ = [ 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset','CoalDataset'# 将自己的数据集名称写入 ]
回到代码中,在./InternImage/segmentation/configs/base/datasets/ 文件夹下,找到stare.py 文件,复制一份,重命名为 my_dataset.py,打开文件进行修改。
# dataset settings dataset_type = 'CoalDataset' # 自己的数据集名称 data_root = 'data/coalData' # 数据集存储路径 img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale = (512, 288) #自己的图像大小 crop_size = (128, 128) # 数据增强时裁剪大小,根据情况修改 train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=img_scale, # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=4, #相当于batch_size workers_per_gpu=4, train=dict( type='RepeatDataset', times=40000, dataset=dict( type=dataset_type, data_root=data_root, img_dir='images/training', #训练图像路径 ann_dir='annotations/training', # 训练mask路径 pipeline=train_pipeline)), val=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', # 验证图像路径 ann_dir='annotations/validation', # 验证mask路径 pipeline=test_pipeline), test=dict( type=dataset_type, data_root=data_root, img_dir='images/validation', # 测试图像路径 ann_dir='annotations/validation', # 测试图像mask路径 pipeline=test_pipeline))
修改关联数据集读取文件
在 ./segmentation/configs/ade20k/ 文件夹下,找到upernet_internimage_h_896_160k_ade20k 文件,复制一份,重命名为upernet_internimage_h_896_160k_coal.py,打开文件进行修改,这块使用的配置文件不同,根据具体情况进行修改。
# -------------------------------------------------------- # InternImage # Copyright (c) 2022 OpenGVLab # Licensed under The MIT License [see LICENSE for details] # -------------------------------------------------------- _base_ = [ '../_base_/models/upernet_r50.py', #自己的网络骨架,可以修改,注意单卡训练,需要将里面的SyncBN改为BN '../_base_/datasets/my_dataset.py', # 换成自己定义的数据集 '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] #pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth' pretrained = '/home/amax/PycharmProjects/InternImage-master/segmentation/checkpoint_dir/seg/upernet_internimage_h_896_160k_ade20k.pth' model = dict( backbone=dict( _delete_=True, type='InternImage', core_op='DCNv3', channels=320, depths=[6, 6, 32, 6], groups=[10, 20, 40, 80], mlp_ratio=4., drop_path_rate=0.5, norm_layer='LN', layer_scale=None, offset_scale=1.0, post_norm=False, dw_kernel_size=5, # for InternImage-H/G res_post_norm=True, # for InternImage-H/G level2_post_norm=True, # for InternImage-H/G level2_post_norm_block_ids=[5, 11, 17, 23, 29], # for InternImage-H/G center_feature_scale=True, # for InternImage-H/G with_cp=False, out_indices=(0, 1, 2, 3), init_cfg=dict(type='Pretrained', checkpoint=pretrained) ), decode_head=dict(num_classes=3, in_channels=[320, 640, 1280, 2560]), # 将num_classes修改为自己的种类 auxiliary_head=dict(num_classes=3, in_channels=1280), #将num_classes修改为自己的种类 test_cfg=dict(mode='whole')) img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (896, 896) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', reduce_zero_label=True), dict(type='Resize', img_scale=(3584, 896), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(3584, 896), # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='ResizeToMultiple', size_divisor=32), dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), ]) ] optimizer = dict( _delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05, constructor='CustomLayerDecayOptimizerConstructor', paramwise_cfg=dict(num_layers=50, layer_decay_rate=0.95, depths=[6, 6, 32, 6], offset_lr_scale=1.0)) lr_config = dict(_delete_=True, policy='poly', warmup='linear', warmup_iters=1500, warmup_ratio=1e-6, power=1.0, min_lr=0.0, by_epoch=False) # By default, models are trained on 16 GPUs with 1 images per GPU data = dict(samples_per_gpu=1, train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) runner = dict(type='IterBasedRunner') optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)) checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1) evaluation = dict(interval=1600, metric='mIoU', save_best='mIoU') # fp16 = dict(loss_scale=dict(init_scale=512))
进入segmentation目录下,使用命令进行训练,格式为:
python train.py {配置文件} --work-dir={路径}
其中配置文件是最后上一步修改好的.py文件,我的是segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py
work-dir是结果保存路径
python train.py /home/amax/PycharmProjects/InternImagemaster/segmentation/configs/ade20k/upernet_internimage_h_896_160k_coal.py --work-dir=/home/amax/PycharmProjects/InternImage-master/segmentation/runs/coal_upernet_h2
可以看到各项指标正常
参考链接:
labelme2mask
InternImage segmentation部分代码复现及训练自己的数据集(二)
mmsegmentation教程1:自定义数据集、config文件修改、训练教程
最后插播一个消息,自己建了一个图像分割群,里面有用yolov8做实例分割的,有做语义分割,如果做这方面课题,或者对图像分割感兴趣的话欢迎私信,拉你进群。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。