赞
踩
1.迁移学习
迁移学习的概念早在深度学习之前就已提出,是机器学习中一直都得到关注的一个研究领域。在迁移学习中,把已有的学习数据源叫做源域(sourcedomain),要学习的新数据源叫做目标域(target domain),迁移学习就是研究如何把源域的知识迁移到目标域上。近几年随着深度学习技术的发展,深度神经网络模型显示出良好的迁移学习性能。深度迁移学习思想主要通过在源数据集.上训练深度神经网络模型得到能够提取通用特征的参数,然后再在目标域上进行微调。通常的深度学习神经网络往往有海量的参数,为了训练得到良好的参数不至于过拟合,往往需要大量的数据输入模型进行训练,但大规模的数据集获取成本往往较高。因此针对小数据集,研究人员通常采用迁移学习作为一种解决方案。目前的深度学习领域迁移学习往往是采用在大规模通用数据集上训练得到的预训练模型,在自己特有领域的数据集.上进行微调训练。
2.InceptionV3
Inception 系列网络由谷歌研究人员于2014年提出,它主要受network-in-network思想的启发,其内部结构往往是由许多局部网络连接而成,每一个局部网络往往拥有多个分支。最常见的形式通常是每个分支一个1x1卷积,紧跟着3x3卷积,然后最后将输出联接在一起。可以认为,它每一个内部网络都相当于将多尺度卷积核的卷积结果集成起来,从而学习到更多的特征。一个典型的Inception局部网络结构如下图:
3.ResNet
ResNet网络模型由何悦明在2015年提出,其主要思想利用了恒等 映射,达到自动学习网络深度的效果。网络引入残差连接结构,残差连接通常是将前面层的输出直接加到后面的某一层,通过注入残差,从而达到缓解随着网络加深信息丢失的现象。残差连接局部结构示例如下图:
代码展示
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision import models
import sys
import os
from matplotlib import pyplot as plt
import numpy
import time
# 用于展示图片
def show_images(imgs, num_rows, num_cols, scale=2):
# num_rows几列,num_cols几行
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j])
axes[i][j].axes.get_xaxis().set_visible(False)
axes[i][j].axes.get_yaxis().set_visible(False)
plt.show()
return axes
def evaluate_accuracy(data_iter, net, device=None):
if device is None and isinstance(net, torch.nn.Module):
# 如果没指定device就使用net的device
device = list(net.parameters())[0].device
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
if ('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
def train(train_iter, test_iter, net, loss, optimizer, device, num_epochs):
net = net.to(device)
print("training on ", device)
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
def train_fine_tuning(net, optimizer, batch_size=72, num_epochs=10):
train_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'CeramicsQingXinxin/train'), transform=train_augs),
batch_size, shuffle=True)
test_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'CeramicsQingXinxin/test'), transform=test_augs),
batch_size)
loss = torch.nn.CrossEntropyLoss()
train(train_iter, test_iter, net, loss, optimizer, device, num_epochs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_dir = '/home/dell/桌面/文物检测/杨沁瑜文物项目'
os.listdir(os.path.join(data_dir, "CeramicsQingXinxin")) # ['train', 'test']
train_imgs = ImageFolder(os.path.join(data_dir, 'CeramicsQingXinxin/train'))
test_imgs = ImageFolder(os.path.join(data_dir, 'CeramicsQingXinxin/test'))
# 查看测试集里面的图片
# test_pics = [test_imgs[i][0] for i in range(140)]
# show_images(test_pics, 7, 20)
# 指定RGB三个通道的均值和方差来将图像通道归一化 在使用预训练模型时,一定要和预训练时作同样的预处理
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_augs = transforms.Compose([
transforms.RandomResizedCrop(size=299),
transforms.RandomHorizontalFlip(), # 依概率p垂直翻转,默认为0.5
transforms.ToTensor(),
normalize
])
test_augs = transforms.Compose([
transforms.Resize(size=299), # 将图像的高和宽均缩放为256像素
transforms.CenterCrop(size=299), # 然后从中裁剪出高和宽均为224像素的中心区域作为输入
transforms.ToTensor(),
normalize
])
pretrained_net = models.inception_v3(pretrained=True)# resnet18(pretrained=True)
pretrained_net.fc = nn.Linear(2048, 7)
pretrained_net.AuxLogits.fc = nn.Linear(768, 7)
pretrained_net.aux_logits=False
print(pretrained_net.fc)
output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
{'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
lr=lr, weight_decay=0.001)
print('微调:')
train_fine_tuning(pretrained_net, optimizer)
print('从头学:')
scratch_net = models.resnet18(pretrained=False, num_classes=7)
lr = 0.1
optimizer = optim.SGD(scratch_net.parameters(), lr=lr, weight_decay=0.001)
train_fine_tuning(scratch_net, optimizer)
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision import models
import sys
import os
from matplotlib import pyplot as plt
import numpy
import time
# 用于展示图片
def show_images(imgs, num_rows, num_cols, scale=2):
# num_rows几列,num_cols几行
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j])
axes[i][j].axes.get_xaxis().set_visible(False)
axes[i][j].axes.get_yaxis().set_visible(False)
plt.show()
return axes
def evaluate_accuracy(data_iter, net, device=None):
if device is None and isinstance(net, torch.nn.Module):
# 如果没指定device就使用net的device
device = list(net.parameters())[0].device
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
if ('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
def train(train_iter, test_iter, net, loss, optimizer, device, num_epochs):
net = net.to(device)
print("training on ", device)
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
def train_fine_tuning(net, optimizer, batch_size=128, num_epochs=5):
train_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'CeramicsQing/train'), transform=train_augs),
batch_size, shuffle=True)
test_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'CeramicsQing/test'), transform=test_augs),
batch_size)
loss = torch.nn.CrossEntropyLoss()
train(train_iter, test_iter, net, loss, optimizer, device, num_epochs)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_dir = 'D:\大创文物分类'
os.listdir(os.path.join(data_dir, "CeramicsQing")) # ['train', 'test']
train_imgs = ImageFolder(os.path.join(data_dir, 'CeramicsQing/train'))
test_imgs = ImageFolder(os.path.join(data_dir, 'CeramicsQing/test'))
# 查看测试集里面的图片
# test_pics = [test_imgs[i][0] for i in range(140)]
# show_images(test_pics, 7, 20)
# 指定RGB三个通道的均值和方差来将图像通道归一化 在使用预训练模型时,一定要和预训练时作同样的预处理
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_augs = transforms.Compose([
transforms.RandomResizedCrop(size=224),
transforms.RandomHorizontalFlip(), # 依概率p垂直翻转,默认为0.5
transforms.ToTensor(),
normalize
])
test_augs = transforms.Compose([
transforms.Resize(size=256), # 将图像的高和宽均缩放为256像素
transforms.CenterCrop(size=224), # 然后从中裁剪出高和宽均为224像素的中心区域作为输入
transforms.ToTensor(),
normalize
])
# 我们使用在ImageNet数据集上预训练的ResNet-18作为源模型。这里指定pretrained=True来自动下载并加载预训练的模型参数
pretrained_net = models.resnet18(pretrained=True)
pretrained_net.fc = nn.Linear(512, 7)
print(pretrained_net.fc)
output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
{'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
lr=lr, weight_decay=0.001)
train_fine_tuning(pretrained_net, optimizer)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。