赞
踩
全卷积网络(Fully Convolutional Networks,FCN)是UC Berkeley的Jonathan Long等人在2015年提出的用于图像语义分割的一种框架。FCN是首个端到端进行像素级预测的全卷积网络。
图像语义分割是图像处理和机器视觉技术中的重要环节,常应用于人脸识别、物体检测、医学影像、卫星图像分析、自动驾驶感知等领域。语义分割的目的是对图像中每个像素点进行分类,与普通的分类任务不同,语义分割任务输出与输入大小相同的图像,每个像素对应了输入图像每个像素的类别。
FCN主要用于图像分割领域,是一种端到端的分割方法。通过进行像素级的预测直接得出与原图大小相等的label map。因FCN丢弃全连接层替换为全卷积层,网络所有层均为卷积层,故称为全卷积网络。
卷积化(Convolutional)
上采样(Upsample)
跳跃结构(Skip Layer)
实验前需确保本地已经安装Python环境及MindSpore。以下是数据下载和预处理的代码示例:
from download import download
url = "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/dataset_fcn8s.tar"
download(url, "./dataset", kind="tar", replace=True)
由于PASCAL VOC 2012数据集中图像的分辨率大多不一致,需做标准化处理。
import numpy as np import cv2 import mindspore.dataset as ds class SegDataset: def __init__(self, image_mean, image_std, data_file='', batch_size=32, crop_size=512, max_scale=2.0, min_scale=0.5, ignore_label=255, num_classes=21, num_readers=2, num_parallel_calls=4): self.data_file = data_file self.batch_size = batch_size self.crop_size = crop_size self.image_mean = np.array(image_mean, dtype=np.float32) self.image_std = np.array(image_std, dtype=np.float32) self.max_scale = max_scale self.min_scale = min_scale self.ignore_label = ignore_label self.num_classes = num_classes self.num_readers = num_readers self.num_parallel_calls = num_parallel_calls def preprocess_dataset(self, image, label): image_out = cv2.imdecode(np.frombuffer(image, dtype=np.uint8), cv2.IMREAD_COLOR) label_out = cv2.imdecode(np.frombuffer(label, dtype=np.uint8), cv2.IMREAD_GRAYSCALE) sc = np.random.uniform(self.min_scale, self.max_scale) new_h, new_w = int(sc * image_out.shape[0]), int(sc * image_out.shape[1]) image_out = cv2.resize(image_out, (new_w, new_h), interpolation=cv2.INTER_CUBIC) label_out = cv2.resize(label_out, (new_w, new_h), interpolation=cv2.INTER_NEAREST) image_out = (image_out - self.image_mean) / self.image_std out_h, out_w = max(new_h, self.crop_size), max(new_w, self.crop_size) pad_h, pad_w = out_h - new_h, out_w - new_w if pad_h > 0 or pad_w > 0: image_out = cv2.copyMakeBorder(image_out, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=0) label_out = cv2.copyMakeBorder(label_out, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=self.ignore_label) offset_h = np.random.randint(0, out_h - self.crop_size + 1) offset_w = np.random.randint(0, out_w - self.crop_size + 1) image_out = image_out[offset_h: offset_h + self.crop_size, offset_w: offset_w + self.crop_size, :] label_out = label_out[offset_h: offset_h + self.crop_size, offset_w: offset_w+self.crop_size] if np.random.uniform(0.0, 1.0) > 0.5: image_out = image_out[:, ::-1, :] label_out = label_out[:, ::-1] image_out = image_out.transpose((2, 0, 1)) image_out = image_out.copy() label_out = label_out.copy() label_out = label_out.astype("int32") return image_out, label_out def get_dataset(self): ds.config.set_numa_enable(True) dataset = ds.MindDataset(self.data_file, columns_list=["data", "label"], shuffle=True, num_parallel_workers=self.num_readers) transforms_list = self.preprocess_dataset dataset = dataset.map(operations=transforms_list, input_columns=["data", "label"], output_columns=["data", "label"], num_parallel_workers=self.num_parallel_calls) dataset = dataset.shuffle(buffer_size=self.batch_size * 10) dataset = dataset.batch(self.batch_size, drop_remainder=True) return dataset
以下是构建FCN-8s网络的代码示例:
import mindspore.nn as nn class FCN8s(nn.Cell): def __init__(self, n_class): super().__init__() self.n_class = n_class self.conv1 = nn.SequentialCell( nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(64), nn.ReLU() ) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.SequentialCell( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(128), nn.ReLU() ) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = nn.SequentialCell( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(256), nn.ReLU() ) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv4 = nn.SequentialCell( nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU() ) self.pool 4 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv5 = nn.SequentialCell( nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, weight_init='xavier_uniform'), nn.BatchNorm2d(512), nn.ReLU() ) self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2) self.fc6 = nn.SequentialCell( nn.Conv2d(in_channels=512, out_channels=4096, kernel_size=7, weight_init='xavier_uniform'), nn.BatchNorm2d(4096), nn.ReLU(), nn.Dropout(0.5) ) self.fc7 = nn.SequentialCell( nn.Conv2d(in_channels=4096, out_channels=4096, kernel_size=1, weight_init='xavier_uniform'), nn.BatchNorm2d(4096), nn.ReLU(), nn.Dropout(0.5) ) self.score_fr = nn.Conv2d(in_channels=4096, out_channels=n_class, kernel_size=1, weight_init='xavier_uniform') self.upscore2 = nn.Conv2dTranspose(in_channels=n_class, out_channels=n_class, kernel_size=4, stride=2, padding=1, weight_init='xavier_uniform') self.upscore8 = nn.Conv2dTranspose(in_channels=n_class, out_channels=n_class, kernel_size=16, stride=8, padding=4, weight_init='xavier_uniform') self.score_pool3 = nn.Conv2d(in_channels=256, out_channels=n_class, kernel_size=1, weight_init='xavier_uniform') self.score_pool4 = nn.Conv2d(in_channels=512, out_channels=n_class, kernel_size=1, weight_init='xavier_uniform') def construct(self, x): h = self.conv1(x) h = self.pool1(h) h = self.conv2(h) h = self.pool2(h) h = self.conv3(h) pool3 = h h = self.pool3(h) h = self.conv4(h) pool4 = h h = self.pool4(h) h = self.conv5(h) h = self.pool5(h) h = self.fc6(h) h = self.fc7(h) h = self.score_fr(h) h = self.upscore2(h) upscore2 = h h = self.score_pool4(pool4) h = h[:, :, 5:5+upscore2.shape[2], 5:5+upscore2.shape[3]] h = h + upscore2 h = self.upscore8(h) h = h[:, :, 31:31+x.shape[2], 31:31+x.shape[3]] h = self.score_pool3(pool3) h = h + upscore8 return h
构建完网络后,需要配置训练过程,包括定义损失函数、优化器及训练循环。以下是FCN-8s的训练过程:
import mindspore as ms from mindspore import nn, Model, dataset as ds from mindspore import context from mindspore.common.initializer import XavierUniform from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor from mindspore.nn import SoftmaxCrossEntropyWithLogits # 设置运行环境 context.set_context(mode=context.GRAPH_MODE, device_target="CPU") # 设定参数 num_classes = 21 batch_size = 8 lr = 0.001 epoch_size = 50 momentum = 0.9 weight_decay = 5e-4 # 加载数据集 image_mean = [123.68, 116.78, 103.94] image_std = [58.393, 57.12, 57.375] data_path = "./dataset/fcn8s_train.mindrecord" train_dataset = SegDataset(image_mean, image_std, data_file=data_path, batch_size=batch_size) train_ds = train_dataset.get_dataset() # 定义FCN-8s模型 net = FCN8s(n_class=num_classes) net.initialize_weights() # 定义损失函数 loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # 定义优化器 optimizer = nn.Momentum(net.trainable_params(), lr, momentum, weight_decay) # 构建模型 model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'accuracy'}) # 设置保存模型的回调 config_ck = CheckpointConfig(save_checkpoint_steps=50, keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix="fcn8s", directory="./checkpoint", config=config_ck) # 训练模型 print("Start Training...") model.train(epoch_size, train_ds, callbacks=[ckpoint_cb, LossMonitor()], dataset_sink_mode=False) print("Training Completed.")
在训练完模型后,需要对模型进行评估。以下是模型评估的代码示例:
import mindspore as ms
from mindspore import dataset as ds
# 加载验证集数据
val_data_path = "./dataset/fcn8s_val.mindrecord"
val_dataset = SegDataset(image_mean, image_std, data_file=val_data_path, batch_size=batch_size)
val_ds = val_dataset.get_dataset()
# 评估模型
print("Start Evaluating...")
result = model.eval(val_ds, dataset_sink_mode=False)
print("Evaluation result:", result)
最后,使用训练好的模型进行图像分割预测。以下是模型推理的代码示例:
import numpy as np import cv2 from mindspore import Tensor def infer_image(image_path, model, crop_size=512): image = cv2.imread(image_path) h, w, _ = image.shape image = cv2.resize(image, (crop_size, crop_size), interpolation=cv2.INTER_CUBIC) image = (image - image_mean) / image_std image = image.transpose((2, 0, 1)) image = image[np.newaxis, :] image = Tensor(image, ms.float32) output = model.predict(image) output = output.asnumpy() output = np.argmax(output, axis=1) output = output[0, :, :] output = cv2.resize(output, (w, h), interpolation=cv2.INTER_NEAREST) return output # 使用训练好的模型进行推理 model_file = "./checkpoint/fcn8s-50_500.ckpt" model.load_checkpoint(model_file) image_path = "./dataset/demo_image.jpg" segmented_image = infer_image(image_path, model) # 显示结果 cv2.imshow('Segmented Image', segmented_image) cv2.waitKey(0) cv2.destroyAllWindows()
全卷积网络(FCN)通过卷积操作代替全连接操作,实现了任意尺寸输入图像的语义分割。其结构中的跳跃连接(Skip Connection)和上采样(Upsample)步骤使得模型能够更好地结合全局和局部信息,提高了分割结果的精确性和细致性。通过MindSpore实现FCN-8s的构建、训练、评估和推理,可以直观地理解语义分割的全过程。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。