赞
踩
仅需要准备数据,适当修改格式等(下面也有相应代码)
应该不挑环境,这里我用的是python3.8+cuda12.1+pytorch2.1.0,显卡是RTX3080
工作文件夹下建立data文件夹、model文件夹、utils文件夹、train.py、predict.py、其他一些代码(可选择性使用)
------------------------------------
data文件夹下建立test文件夹、train文件夹
test文件夹内直接放入要预测的图片,预测后的结果也在此文件夹中
train文件夹下建立image文件夹、label文件夹(都是图片)
要求/注意:图片均为PNG,512*512,三通道,image下位深可以不用管,label正常8通道,可把dataset.py中读取image后面加的函数复制过去也不再考虑位深,推荐label数据为二值化图像,读取也不会出错,image如果是从遥感影像tif格式转为png,且遥感影像有四通道(如近红波段),注意查看是否会有透明度的变化,因为png有RGBA 四通道,如果变了可以将原始的tif在Arcgis中图层右键导出数据,选择使用渲染器,强行RGB,格式选png导出。(但此方法未发现在Arcgis中批量导出的方法)
-----------------------------------
model文件夹内有三个.py文件:
__init__.py
unet_model.py
- """ Full assembly of the parts to form the complete network """
- """Refer https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_model.py"""
-
- import torch.nn.functional as F
-
- from .unet_parts import *
-
-
- class UNet(nn.Module):
- def __init__(self, n_channels, n_classes, bilinear=True):
- super(UNet, self).__init__()
- self.n_channels = n_channels
- self.n_classes = n_classes
- self.bilinear = bilinear
-
- self.inc = DoubleConv(n_channels, 64)
- self.down1 = Down(64, 128)
- self.down2 = Down(128, 256)
- self.down3 = Down(256, 512)
- self.down4 = Down(512, 512)
- self.up1 = Up(1024, 256, bilinear)
- self.up2 = Up(512, 128, bilinear)
- self.up3 = Up(256, 64, bilinear)
- self.up4 = Up(128, 64, bilinear)
- self.outc = OutConv(64, n_classes)
-
- def forward(self, x):
- x1 = self.inc(x)
- x2 = self.down1(x1)
- x3 = self.down2(x2)
- x4 = self.down3(x3)
- x5 = self.down4(x4)
- x = self.up1(x5, x4)
- x = self.up2(x, x3)
- x = self.up3(x, x2)
- x = self.up4(x, x1)
- logits = self.outc(x)
- return logits
-
- if __name__ == '__main__':
- net = UNet(n_channels=3, n_classes=1)
- print(net)
unet_parts.py
- """ Parts of the U-Net model """
- """https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_parts.py"""
-
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
-
-
- class DoubleConv(nn.Module):
- """(convolution => [BN] => ReLU) * 2"""
-
- def __init__(self, in_channels, out_channels):
- super().__init__()
- self.double_conv = nn.Sequential(
- nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
- nn.BatchNorm2d(out_channels),
- nn.ReLU(inplace=True),
- nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
- nn.BatchNorm2d(out_channels),
- nn.ReLU(inplace=True)
- )
-
- def forward(self, x):
- return self.double_conv(x)
-
-
- class Down(nn.Module):
- """Downscaling with maxpool then double conv"""
-
- def __init__(self, in_channels, out_channels):
- super().__init__()
- self.maxpool_conv = nn.Sequential(
- nn.MaxPool2d(2),
- DoubleConv(in_channels, out_channels)
- )
-
- def forward(self, x):
- return self.maxpool_conv(x)
-
-
- class Up(nn.Module):
- """Upscaling then double conv"""
-
- def __init__(self, in_channels, out_channels, bilinear=True):
- super().__init__()
-
- # if bilinear, use the normal convolutions to reduce the number of channels
- if bilinear:
- self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
- else:
- self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)
-
- self.conv = DoubleConv(in_channels, out_channels)
-
- def forward(self, x1, x2):
- x1 = self.up(x1)
- # input is CHW
- diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
- diffX = torch.tensor([x2.size()[3] - x1.size()[3]])
-
- x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
- diffY // 2, diffY - diffY // 2])
-
- x = torch.cat([x2, x1], dim=1)
- return self.conv(x)
-
-
- class OutConv(nn.Module):
- def __init__(self, in_channels, out_channels):
- super(OutConv, self).__init__()
- self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
-
- def forward(self, x):
- return self.conv(x)
-----------------------------------
utils文件夹内有一个dataset.py文件:
- import torch
- import cv2
- import os
- import glob
- from torch.utils.data import Dataset
- import random
-
-
- class ISBI_Loader(Dataset):
- def __init__(self, data_path):
- # 初始化函数,读取所有data_path下的图片
- self.data_path = data_path
- self.imgs_path = glob.glob(os.path.join(data_path, 'image/*.png'))
-
- def augment(self, image, flipCode):
- # 使用cv2.flip进行数据增强,filpCode为1水平翻转,0垂直翻转,-1水平+垂直翻转
- flip = cv2.flip(image, flipCode)
- return flip
-
- def __getitem__(self, index):
- # 根据index读取图片
- image_path = self.imgs_path[index]
- # 根据image_path生成label_path
- label_path = image_path.replace('image', 'label')
- # 读取训练图片和标签图片
- image = cv2.imread(image_path,flags=1)
- label = cv2.imread(label_path)
- # 将数据转为单通道的图片
- image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- label = cv2.cvtColor(label, cv2.COLOR_BGR2GRAY)
- image = image.reshape(1, image.shape[0], image.shape[1])
- label = label.reshape(1, label.shape[0], label.shape[1])
- # 处理标签,将像素值为255的改为1
- if label.max() > 1:
- label = label / 255
- # 随机进行数据增强,为2时不做处理
- flipCode = random.choice([-1, 0, 1, 2])
- if flipCode != 2:
- image = self.augment(image, flipCode)
- label = self.augment(label, flipCode)
- return image, label
-
- def __len__(self):
- # 返回训练集大小
- return len(self.imgs_path)
-
-
- if __name__ == "__main__":
- isbi_dataset = ISBI_Loader("data/train/")
- print("数据个数:", len(isbi_dataset))
- train_loader = torch.utils.data.DataLoader(dataset=isbi_dataset,
- batch_size=2,
- shuffle=True)
- for image, label in train_loader:
- print(image.shape)
-----------------------------------
train.py
- from model.unet_model import UNet
- from utils.dataset import ISBI_Loader
- from torch import optim
- import torch.nn as nn
- import torch
-
- def train_net(net, device, data_path, epochs=40, batch_size=1, lr=0.00001):
- # 加载训练集
- isbi_dataset = ISBI_Loader(data_path)
- train_loader = torch.utils.data.DataLoader(dataset=isbi_dataset,
- batch_size=batch_size,
- shuffle=True)
- # 定义RMSprop算法
- optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8, momentum=0.9)
- # 定义Loss算法
- criterion = nn.BCEWithLogitsLoss()
- # best_loss统计,初始化为正无穷
- best_loss = float('inf')
- # 训练epochs次
- for epoch in range(epochs):
- # 训练模式
- net.train()
- # 按照batch_size开始训练
- for image, label in train_loader:
- optimizer.zero_grad()
- # 将数据拷贝到device中
- image = image.to(device=device, dtype=torch.float32)
- label = label.to(device=device, dtype=torch.float32)
- # 使用网络参数,输出预测结果
- pred = net(image)
- # 计算loss
- loss = criterion(pred, label)
- print('Loss/train', loss.item())
- # 保存loss值最小的网络参数
- if loss < best_loss:
- best_loss = loss
- torch.save(net.state_dict(), 'best_model.pth')
- # 更新参数
- loss.backward()
- optimizer.step()
-
- if __name__ == "__main__":
- # 选择设备,有cuda用cuda,没有就用cpu
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- # 加载网络,图片单通道1,分类为1。
- net = UNet(n_channels=1, n_classes=1)
- # 将网络拷贝到deivce中
- net.to(device=device)
- # 指定训练集地址,开始训练
- data_path = './data/train/'
- train_net(net, device, data_path)
-----------------------------------
predict.py
- import glob
- import numpy as np
- import torch
- import os
- import cv2
- from model.unet_model import UNet
-
- if __name__ == "__main__":
- # 选择设备,有cuda用cuda,没有就用cpu
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- # 加载网络,图片单通道,分类为1。
- net = UNet(n_channels=1, n_classes=1)
- # 将网络拷贝到deivce中
- net.to(device=device)
- # 加载模型参数
- net.load_state_dict(torch.load('best_model.pth', map_location=device))
- # 测试模式
- net.eval()
- # 读取所有图片路径
- tests_path = glob.glob('data/test/*.png')
- # 遍历素有图片
- for test_path in tests_path:
- # 保存结果地址
- save_res_path = test_path.split('.')[0] + '_res.png'
- # 读取图片
- img = cv2.imread(test_path)
- # 转为灰度图
- img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
- # 转为batch为1,通道为1,大小为512*512的数组
- img = img.reshape(1, 1, img.shape[0], img.shape[1])
- # 转为tensor
- img_tensor = torch.from_numpy(img)
- # 将tensor拷贝到device中,只用cpu就是拷贝到cpu中,用cuda就是拷贝到cuda中。
- img_tensor = img_tensor.to(device=device, dtype=torch.float32)
- # 预测
- pred = net(img_tensor)
- # 提取结果
- pred = np.array(pred.data.cpu()[0])[0]
- # 处理结果
- pred[pred >= 0.5] = 255
- pred[pred < 0.5] = 0
- # 保存图片
- cv2.imwrite(save_res_path, pred)
-----------------------------------
剩下是一些杂七杂八的比较有针对性功能的代码:
格式转换tif变为png,但这个不能解决有透明度的问题,解决办法见文章最上面!
- from PIL import Image
- import os
-
- def tif_to_png(input_path, output_path):
- for file in os.listdir(input_path):
-
- if file.endswith('.tif'):
- with Image.open(os.path.join(input_path, file)) as im:
- im.save(os.path.join(output_path, file.replace('.tif', '.png')))
-
- # 示例
- tif_to_png('D:/deeplearning/tif',
- 'D:/deeplearning/png')
格式转换jpg变为png
- import os
- from PIL import Image
-
-
- # 获取指定目录下的所有png图片
- def get_all_png_files(dir):
- files_list = []
- for root, dirs, files in os.walk(dir):
- for file in files:
- if os.path.splitext(file)[1] == '.jpg':
- files_list.append(os.path.join(root, file))
- return files_list
-
-
- # 批量转换png图片为jpg格式并保存到新的文件夹
- def png2jpg(files_list, output_dir):
- for file in files_list:
- img = Image.open(file)
- new_file = os.path.splitext(file)[0] + '.png'
- output_file = os.path.join(output_dir, os.path.basename(new_file))
- img.convert('RGB').save(output_file)
-
-
- if __name__ == '__main__':
- dir = r'D:\deeplearning\test' # png图片目录
- output_dir = r'D:\deeplearning\test' # 新的文件夹路径
- files_list = get_all_png_files(dir)
- png2jpg(files_list, output_dir)
将文件夹内图像批量重命名:
- #coding=gbk
- import os
- import sys
- def rename():
- path=input("请输入路径(例如D:\\\\picture):")
- name=input("请输入开头名:")
- startNumber=input("请输入开始数:")
- fileType=input("请输入后缀名(如 .jpg、.txt等等):")
- print("正在生成以"+name+startNumber+fileType+"迭代的文件名")
- count=0
- filelist=os.listdir(path)
- for files in filelist:
- Olddir=os.path.join(path,files)
- if os.path.isdir(Olddir):
- continue
- Newdir=os.path.join(path,name+str(count+int(startNumber))+fileType)
- os.rename(Olddir,Newdir)
- count+=1
- print("一共修改了"+str(count)+"个文件")
-
- rename()
参考包括:
Pytorch深度学习实战教程(三):UNet模型训练 (qq.com)
GitHub:Deep-Learning/Tutorial/lesson-2 at master · Jack-Cherish/Deep-Learning · GitHub
B站up主:Bubbliiiing
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。