赞
踩
公众号后台回复关键字:Pytorch,获取项目github地址。
Pytorch没有官方的高阶API。一般通过nn.Module来构建模型并编写自定义训练循环。
为了更加方便地训练模型,作者编写了仿keras的Pytorch模型接口:torchkeras, 作为Pytorch的高阶API。
本章我们主要详细介绍Pytorch的高阶API如下相关的内容。
构建模型的3种方法(继承nn.Module基类,使用nn.Sequential,辅助应用模型容器)
训练模型的3种方法(脚本风格,函数风格,torchkeras.Model类风格)
使用GPU训练模型(单GPU训练,多GPU训练)
本篇我们介绍训练模型的3种方法。
pytorch通常需要用户编写自定义训练循环,训练循环的代码风格因人而异。
有3类典型的训练循环代码风格:脚本形式训练循环,函数形式训练循环,类形式训练循环。
下面以minist数据集的分类模型的训练为例,演示这3种训练模型的风格。
- import torch
- from torch import nn
- from torchkeras import summary,Model
-
- import torchvision
- from torchvision import transforms
-
- transform = transforms.Compose([transforms.ToTensor()])
-
- ds_train = torchvision.datasets.MNIST(root="./data/minist/",train=True,download=True,transform=transform)
- ds_valid = torchvision.datasets.MNIST(root="./data/minist/",train=False,download=True,transform=transform)
-
- dl_train = torch.utils.data.DataLoader(ds_train, batch_size=128, shuffle=True, num_workers=4)
- dl_valid = torch.utils.data.DataLoader(ds_valid, batch_size=128, shuffle=False, num_workers=4)
-
- print(len(ds_train))
- print(len(ds_valid))
-
- 60000
- 10000
- %matplotlib inline
- %config InlineBackend.figure_format = 'svg'
-
- #查看部分样本
- from matplotlib import pyplot as plt
-
- plt.figure(figsize=(8,8))
- for i in range(9):
- img,label = ds_train[i]
- img = torch.squeeze(img)
- ax=plt.subplot(3,3,i+1)
- ax.imshow(img.numpy())
- ax.set_title("label = %d"%label)
- ax.set_xticks([])
- ax.set_yticks([])
- plt.show()
脚本风格的训练循环最为常见。
- net = nn.Sequential()
- net.add_module("conv1",nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3))
- net.add_module("pool1",nn.MaxPool2d(kernel_size = 2,stride = 2))
- net.add_module("conv2",nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5))
- net.add_module("pool2",nn.MaxPool2d(kernel_size = 2,stride = 2))
- net.add_module("dropout",nn.Dropout2d(p = 0.1))
- net.add_module("adaptive_pool",nn.AdaptiveMaxPool2d((1,1)))
- net.add_module("flatten",nn.Flatten())
- net.add_module("linear1",nn.Linear(64,32))
- net.add_module("relu",nn.ReLU())
- net.add_module("linear2",nn.Linear(32,10))
-
- print(net)
- Sequential(
- (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
- (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
- (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (dropout): Dropout2d(p=0.1, inplace=False)
- (adaptive_pool): AdaptiveMaxPool2d(output_size=(1, 1))
- (flatten): Flatten()
- (linear1): Linear(in_features=64, out_features=32, bias=True)
- (relu): ReLU()
- (linear2): Linear(in_features=32, out_features=10, bias=True)
- )
summary(net,input_shape=(1,32,32))
- ----------------------------------------------------------------
- Layer (type) Output Shape Param #
- ================================================================
- Conv2d-1 [-1, 32, 30, 30] 320
- MaxPool2d-2 [-1, 32, 15, 15] 0
- Conv2d-3 [-1, 64, 11, 11] 51,264
- MaxPool2d-4 [-1, 64, 5, 5] 0
- Dropout2d-5 [-1, 64, 5, 5] 0
- AdaptiveMaxPool2d-6 [-1, 64, 1, 1] 0
- Flatten-7 [-1, 64] 0
- Linear-8 [-1, 32] 2,080
- ReLU-9 [-1, 32] 0
- Linear-10 [-1, 10] 330
- ================================================================
- Total params: 53,994
- Trainable params: 53,994
- Non-trainable params: 0
- ----------------------------------------------------------------
- Input size (MB): 0.003906
- Forward/backward pass size (MB): 0.359695
- Params size (MB): 0.205971
- Estimated Total Size (MB): 0.569572
- ----------------------------------------------------------------
- import datetime
- import numpy as np
- import pandas as pd
- from sklearn.metrics import accuracy_score
-
- def accuracy(y_pred,y_true):
- y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1).data
- return accuracy_score(y_true,y_pred_cls)
-
- loss_func = nn.CrossEntropyLoss()
- optimizer = torch.optim.Adam(params=net.parameters(),lr = 0.01)
- metric_func = accuracy
- metric_name = "accuracy"
-
- epochs = 3
- log_step_freq = 100
-
- dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name])
- print("Start Training...")
- nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- print("=========="*8 + "%s"%nowtime)
-
- for epoch in range(1,epochs+1):
-
- # 1,训练循环-------------------------------------------------
- net.train()
- loss_sum = 0.0
- metric_sum = 0.0
- step = 1
-
- for step, (features,labels) in enumerate(dl_train, 1):
-
- # 梯度清零
- optimizer.zero_grad()
-
- # 正向传播求损失
- predictions = net(features)
- loss = loss_func(predictions,labels)
- metric = metric_func(predictions,labels)
-
- # 反向传播求梯度
- loss.backward()
- optimizer.step()
-
- # 打印batch级别日志
- loss_sum += loss.item()
- metric_sum += metric.item()
- if step%log_step_freq == 0:
- print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
- (step, loss_sum/step, metric_sum/step))
-
- # 2,验证循环-------------------------------------------------
- net.eval()
- val_loss_sum = 0.0
- val_metric_sum = 0.0
- val_step = 1
-
- for val_step, (features,labels) in enumerate(dl_valid, 1):
-
- predictions = net(features)
- val_loss = loss_func(predictions,labels)
- val_metric = metric_func(predictions,labels)
-
- val_loss_sum += val_loss.item()
- val_metric_sum += val_metric.item()
-
- # 3,记录日志-------------------------------------------------
- info = (epoch, loss_sum/step, metric_sum/step,
- val_loss_sum/val_step, val_metric_sum/val_step)
- dfhistory.loc[epoch-1] = info
-
- # 打印epoch级别日志
- print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
- " = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f")
- %info)
- nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- print("\n"+"=========="*8 + "%s"%nowtime)
-
- print('Finished Training...')
-
- Start Training...
- ================================================================================2020-06-26 12:49:16
- [step = 100] loss: 0.742, accuracy: 0.745
- [step = 200] loss: 0.466, accuracy: 0.843
- [step = 300] loss: 0.363, accuracy: 0.880
- [step = 400] loss: 0.310, accuracy: 0.898
-
- EPOCH = 1, loss = 0.281,accuracy = 0.908, val_loss = 0.087, val_accuracy = 0.972
-
- ================================================================================2020-06-26 12:50:32
- [step = 100] loss: 0.103, accuracy: 0.970
- [step = 200] loss: 0.114, accuracy: 0.966
- [step = 300] loss: 0.112, accuracy: 0.967
- [step = 400] loss: 0.108, accuracy: 0.968
-
- EPOCH = 2, loss = 0.111,accuracy = 0.967, val_loss = 0.082, val_accuracy = 0.976
-
- ================================================================================2020-06-26 12:51:47
- [step = 100] loss: 0.093, accuracy: 0.972
- [step = 200] loss: 0.095, accuracy: 0.971
- [step = 300] loss: 0.092, accuracy: 0.972
- [step = 400] loss: 0.093, accuracy: 0.972
-
- EPOCH = 3, loss = 0.098,accuracy = 0.971, val_loss = 0.113, val_accuracy = 0.970
-
- ================================================================================2020-06-26 12:53:09
- Finished Training...
该风格在脚本形式上作了简单的函数封装。
- class Net(nn.Module):
- def __init__(self):
- super(Net, self).__init__()
- self.layers = nn.ModuleList([
- nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3),
- nn.MaxPool2d(kernel_size = 2,stride = 2),
- nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
- nn.MaxPool2d(kernel_size = 2,stride = 2),
- nn.Dropout2d(p = 0.1),
- nn.AdaptiveMaxPool2d((1,1)),
- nn.Flatten(),
- nn.Linear(64,32),
- nn.ReLU(),
- nn.Linear(32,10)]
- )
- def forward(self,x):
- for layer in self.layers:
- x = layer(x)
- return x
- net = Net()
- print(net)
- Net(
- (layers): ModuleList(
- (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
- (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
- (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (4): Dropout2d(p=0.1, inplace=False)
- (5): AdaptiveMaxPool2d(output_size=(1, 1))
- (6): Flatten()
- (7): Linear(in_features=64, out_features=32, bias=True)
- (8): ReLU()
- (9): Linear(in_features=32, out_features=10, bias=True)
- )
- )
summary(net,input_shape=(1,32,32))
- ----------------------------------------------------------------
- Layer (type) Output Shape Param #
- ================================================================
- Conv2d-1 [-1, 32, 30, 30] 320
- MaxPool2d-2 [-1, 32, 15, 15] 0
- Conv2d-3 [-1, 64, 11, 11] 51,264
- MaxPool2d-4 [-1, 64, 5, 5] 0
- Dropout2d-5 [-1, 64, 5, 5] 0
- AdaptiveMaxPool2d-6 [-1, 64, 1, 1] 0
- Flatten-7 [-1, 64] 0
- Linear-8 [-1, 32] 2,080
- ReLU-9 [-1, 32] 0
- Linear-10 [-1, 10] 330
- ================================================================
- Total params: 53,994
- Trainable params: 53,994
- Non-trainable params: 0
- ----------------------------------------------------------------
- Input size (MB): 0.003906
- Forward/backward pass size (MB): 0.359695
- Params size (MB): 0.205971
- Estimated Total Size (MB): 0.569572
- ----------------------------------------------------------------
- import datetime
- import numpy as np
- import pandas as pd
- from sklearn.metrics import accuracy_score
-
- def accuracy(y_pred,y_true):
- y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1).data
- return accuracy_score(y_true,y_pred_cls)
-
- model = net
- model.optimizer = torch.optim.SGD(model.parameters(),lr = 0.01)
- model.loss_func = nn.CrossEntropyLoss()
- model.metric_func = accuracy
- model.metric_name = "accuracy"
-
- def train_step(model,features,labels):
-
- # 训练模式,dropout层发生作用
- model.train()
-
- # 梯度清零
- model.optimizer.zero_grad()
-
- # 正向传播求损失
- predictions = model(features)
- loss = model.loss_func(predictions,labels)
- metric = model.metric_func(predictions,labels)
-
- # 反向传播求梯度
- loss.backward()
- model.optimizer.step()
-
- return loss.item(),metric.item()
-
- def valid_step(model,features,labels):
-
- # 预测模式,dropout层不发生作用
- model.eval()
-
- predictions = model(features)
- loss = model.loss_func(predictions,labels)
- metric = model.metric_func(predictions,labels)
-
- return loss.item(), metric.item()
-
-
- # 测试train_step效果
- features,labels = next(iter(dl_train))
- train_step(model,features,labels)
-
(2.32741117477417, 0.1015625)
- def train_model(model,epochs,dl_train,dl_valid,log_step_freq):
-
- metric_name = model.metric_name
- dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name])
- print("Start Training...")
- nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- print("=========="*8 + "%s"%nowtime)
-
- for epoch in range(1,epochs+1):
-
- # 1,训练循环-------------------------------------------------
- loss_sum = 0.0
- metric_sum = 0.0
- step = 1
-
- for step, (features,labels) in enumerate(dl_train, 1):
-
- loss,metric = train_step(model,features,labels)
-
- # 打印batch级别日志
- loss_sum += loss
- metric_sum += metric
- if step%log_step_freq == 0:
- print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
- (step, loss_sum/step, metric_sum/step))
-
- # 2,验证循环-------------------------------------------------
- val_loss_sum = 0.0
- val_metric_sum = 0.0
- val_step = 1
-
- for val_step, (features,labels) in enumerate(dl_valid, 1):
-
- val_loss,val_metric = valid_step(model,features,labels)
-
- val_loss_sum += val_loss
- val_metric_sum += val_metric
-
- # 3,记录日志-------------------------------------------------
- info = (epoch, loss_sum/step, metric_sum/step,
- val_loss_sum/val_step, val_metric_sum/val_step)
- dfhistory.loc[epoch-1] = info
-
- # 打印epoch级别日志
- print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
- " = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f")
- %info)
- nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- print("\n"+"=========="*8 + "%s"%nowtime)
-
- print('Finished Training...')
- return dfhistory
-
- epochs = 3
- dfhistory = train_model(model,epochs,dl_train,dl_valid,log_step_freq = 100)
-
- Start Training...
- ================================================================================2020-06-26 13:10:00
- [step = 100] loss: 2.298, accuracy: 0.137
- [step = 200] loss: 2.288, accuracy: 0.145
- [step = 300] loss: 2.278, accuracy: 0.165
- [step = 400] loss: 2.265, accuracy: 0.183
-
- EPOCH = 1, loss = 2.254,accuracy = 0.195, val_loss = 2.158, val_accuracy = 0.301
-
- ================================================================================2020-06-26 13:11:23
- [step = 100] loss: 2.127, accuracy: 0.302
- [step = 200] loss: 2.080, accuracy: 0.338
- [step = 300] loss: 2.025, accuracy: 0.374
- [step = 400] loss: 1.957, accuracy: 0.411
-
- EPOCH = 2, loss = 1.905,accuracy = 0.435, val_loss = 1.469, val_accuracy = 0.710
-
- ================================================================================2020-06-26 13:12:43
- [step = 100] loss: 1.435, accuracy: 0.615
- [step = 200] loss: 1.324, accuracy: 0.647
- [step = 300] loss: 1.221, accuracy: 0.672
- [step = 400] loss: 1.132, accuracy: 0.696
-
- EPOCH = 3, loss = 1.074,accuracy = 0.711, val_loss = 0.582, val_accuracy = 0.878
-
- ================================================================================2020-06-26 13:13:59
- Finished Training...
此处使用torchkeras中定义的模型接口构建模型,并调用compile方法和fit方法训练模型。
使用该形式训练模型非常简洁明了。推荐使用该形式。
- class CnnModel(nn.Module):
- def __init__(self):
- super().__init__()
- self.layers = nn.ModuleList([
- nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3),
- nn.MaxPool2d(kernel_size = 2,stride = 2),
- nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
- nn.MaxPool2d(kernel_size = 2,stride = 2),
- nn.Dropout2d(p = 0.1),
- nn.AdaptiveMaxPool2d((1,1)),
- nn.Flatten(),
- nn.Linear(64,32),
- nn.ReLU(),
- nn.Linear(32,10)]
- )
- def forward(self,x):
- for layer in self.layers:
- x = layer(x)
- return x
- model = torchkeras.Model(CnnModel())
- print(model)
- CnnModel(
- (layers): ModuleList(
- (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
- (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
- (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- (4): Dropout2d(p=0.1, inplace=False)
- (5): AdaptiveMaxPool2d(output_size=(1, 1))
- (6): Flatten()
- (7): Linear(in_features=64, out_features=32, bias=True)
- (8): ReLU()
- (9): Linear(in_features=32, out_features=10, bias=True)
- )
- )
model.summary(input_shape=(1,32,32))
- ----------------------------------------------------------------
- Layer (type) Output Shape Param #
- ================================================================
- Conv2d-1 [-1, 32, 30, 30] 320
- MaxPool2d-2 [-1, 32, 15, 15] 0
- Conv2d-3 [-1, 64, 11, 11] 51,264
- MaxPool2d-4 [-1, 64, 5, 5] 0
- Dropout2d-5 [-1, 64, 5, 5] 0
- AdaptiveMaxPool2d-6 [-1, 64, 1, 1] 0
- Flatten-7 [-1, 64] 0
- Linear-8 [-1, 32] 2,080
- ReLU-9 [-1, 32] 0
- Linear-10 [-1, 10] 330
- ================================================================
- Total params: 53,994
- Trainable params: 53,994
- Non-trainable params: 0
- ----------------------------------------------------------------
- Input size (MB): 0.003906
- Forward/backward pass size (MB): 0.359695
- Params size (MB): 0.205971
- Estimated Total Size (MB): 0.569572
- ----------------------------------------------------------------
- from sklearn.metrics import accuracy_score
-
- def accuracy(y_pred,y_true):
- y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1).data
- return accuracy_score(y_true.numpy(),y_pred_cls.numpy())
-
- model.compile(loss_func = nn.CrossEntropyLoss(),
- optimizer= torch.optim.Adam(model.parameters(),lr = 0.02),
- metrics_dict={"accuracy":accuracy})
-
- dfhistory = model.fit(3,dl_train = dl_train, dl_val=dl_valid, log_step_freq=100)
-
- Start Training ...
-
- ================================================================================2020-06-26 13:22:39
- {'step': 100, 'loss': 0.976, 'accuracy': 0.664}
- {'step': 200, 'loss': 0.611, 'accuracy': 0.795}
- {'step': 300, 'loss': 0.478, 'accuracy': 0.841}
- {'step': 400, 'loss': 0.403, 'accuracy': 0.868}
-
- +-------+-------+----------+----------+--------------+
- | epoch | loss | accuracy | val_loss | val_accuracy |
- +-------+-------+----------+----------+--------------+
- | 1 | 0.371 | 0.879 | 0.087 | 0.972 |
- +-------+-------+----------+----------+--------------+
-
- ================================================================================2020-06-26 13:23:59
- {'step': 100, 'loss': 0.182, 'accuracy': 0.948}
- {'step': 200, 'loss': 0.176, 'accuracy': 0.949}
- {'step': 300, 'loss': 0.173, 'accuracy': 0.95}
- {'step': 400, 'loss': 0.174, 'accuracy': 0.951}
-
- +-------+-------+----------+----------+--------------+
- | epoch | loss | accuracy | val_loss | val_accuracy |
- +-------+-------+----------+----------+--------------+
- | 2 | 0.175 | 0.951 | 0.152 | 0.958 |
- +-------+-------+----------+----------+--------------+
-
- ================================================================================2020-06-26 13:25:22
- {'step': 100, 'loss': 0.143, 'accuracy': 0.961}
- {'step': 200, 'loss': 0.151, 'accuracy': 0.959}
- {'step': 300, 'loss': 0.149, 'accuracy': 0.96}
- {'step': 400, 'loss': 0.152, 'accuracy': 0.959}
-
- +-------+-------+----------+----------+--------------+
- | epoch | loss | accuracy | val_loss | val_accuracy |
- +-------+-------+----------+----------+--------------+
- | 3 | 0.153 | 0.959 | 0.086 | 0.975 |
- +-------+-------+----------+----------+--------------+
-
- ================================================================================2020-06-26 13:26:48
- Finished Training...
如果本书对你有所帮助,想鼓励一下作者,记得给本项目加一颗星星star⭐️,并分享给你的朋友们喔????!
如果对本书内容理解上有需要进一步和作者交流的地方,可以在公众号后台回复关键字:加群,加入读者交流群和大家讨论。
公众号后台回复关键字:pytorch,获取项目github地址。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。