赞
踩
torch.optim.lr_scheduler
模块提供了一些根据epoch训练次数来调整学习率(learning rate)的方法。一般情况下我们会设置随着epoch的增大而逐渐减小学习率从而达到更好的训练效果。
- torch.optim.lr_scheduler.LambdaLR(
- optimizer,
- lr_lambda,
- last_epoch=-1)
更新策略
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import LambdaLR
- import matplotlib.pyplot as plt
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 = LambdaLR(
- optimizer_1,
- lr_lambda=lambda epoch: 1/(epoch+1))
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
-
- for epoch in range(1, 11):
- optimizer_1.zero_grad()
- optimizer_1.step()
- print("第%d个epoch的学习率:%f" % (epoch, optimizer_1.param_groups[0]['lr']))
- scheduler_1.step()
-
- '''
- 初始化的学习率: 0.1
- 第1个epoch的学习率:0.100000
- 第2个epoch的学习率:0.050000
- 第3个epoch的学习率:0.033333
- 第4个epoch的学习率:0.025000
- 第5个epoch的学习率:0.020000
- 第6个epoch的学习率:0.016667
- 第7个epoch的学习率:0.014286
- 第8个epoch的学习率:0.012500
- 第9个epoch的学习率:0.011111
- 第10个epoch的学习率:0.010000
- '''
-
- plt.plot(lst)
- torch.optim.lr_scheduler.StepLR(
- optimizer,
- step_size,
- gamma=0.1,
- last_epoch=-1)
更新策略:
每过step_size个epoch,更新一次
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import StepLR
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 = StepLR(
- optimizer_1,
- step_size=3,
- gamma=0.1)
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
- lst=[]
- for epoch in range(1, 11):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- print("第%d个epoch的学习率:%f" % (epoch, optimizer_1.param_groups[0]['lr']))
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
- '''
- 初始化的学习率: 0.1
- 第1个epoch的学习率:0.100000
- 第2个epoch的学习率:0.100000
- 第3个epoch的学习率:0.100000
- 第4个epoch的学习率:0.010000
- 第5个epoch的学习率:0.010000
- 第6个epoch的学习率:0.010000
- 第7个epoch的学习率:0.001000
- 第8个epoch的学习率:0.001000
- 第9个epoch的学习率:0.001000
- 第10个epoch的学习率:0.000100
- '''
-
- plt.plot(lst)
- torch.optim.lr_scheduler.MultiStepLR(
- optimizer,
- milestones,
- gamma=0.1,
- last_epoch=-1)
更新策略:
每次遇到milestones
中的epoch,做一次更新:
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
- lst=[]
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 = MultiStepLR(
- optimizer_1,
- milestones=[3,9],
- gamma=0.1)
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
-
- for epoch in range(1, 11):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- print("第%d个epoch的学习率:%f" % (epoch, optimizer_1.param_groups[0]['lr']))
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
- '''
- 初始化的学习率: 0.1
- 第1个epoch的学习率:0.100000
- 第2个epoch的学习率:0.100000
- 第3个epoch的学习率:0.100000
- 第4个epoch的学习率:0.010000
- 第5个epoch的学习率:0.010000
- 第6个epoch的学习率:0.010000
- 第7个epoch的学习率:0.010000
- 第8个epoch的学习率:0.010000
- 第9个epoch的学习率:0.010000
- 第10个epoch的学习率:0.001000
- '''
-
- plt.plot(lst)
第三个和第九个epoch之后,学习率发生改变
- torch.optim.lr_scheduler.ExponentialLR(
- optimizer,
- gamma,
- last_epoch=-1)
更新策略:
每个epoch更新一次
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
- lst=[]
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 =ExponentialLR(
- optimizer_1,
- gamma=0.1)
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
-
- for epoch in range(1, 11):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- print("第%d个epoch的学习率:%f" % (epoch, optimizer_1.param_groups[0]['lr']))
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
-
- plt.plot(lst)
-
- '''
- 初始化的学习率: 0.1
- 第1个epoch的学习率:0.100000000
- 第2个epoch的学习率:0.010000000
- 第3个epoch的学习率:0.001000000
- 第4个epoch的学习率:0.000100000
- 第5个epoch的学习率:0.000010000
- 第6个epoch的学习率:0.000001000
- 第7个epoch的学习率:0.000000100
- 第8个epoch的学习率:0.000000010
- 第9个epoch的学习率:0.000000001
- 第10个epoch的学习率:0.000000000
- '''
采用周期变化的策略调整学习率,能够使模型跳出在训练过程中遇到的局部最低点和鞍点
- torch.optim.lr_scheduler.CosineAnnealingLR(
- optimizer,
- T_max,
- eta_min=0,
- last_epoch=-1)
更新策略:
这时候learning rate的取值范围是[eta_min,initial_lr]
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import CosineAnnealingLR
- import matplotlib.pyplot as plt
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 = CosineAnnealingLR(
- optimizer_1,
- T_max=20)
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
-
- lst=[]
- for epoch in range(1, 101):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- lst.append(optimizer_1.param_groups[0]['lr'])
- #print("第%d个epoch的学习率:%.9f" % (epoch, optimizer_1.param_groups[0]['lr']))
- scheduler_1.step()
-
- plt.plot(lst)
- torch.optim.lr_scheduler.MultiplicativeLR(optimizer,
- lr_lambda,
- last_epoch=-1,
- verbose=False,)
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import MultiplicativeLR
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.Adam(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 = MultiplicativeLR(
- optimizer_1,
- lr_lambda=lambda epoch: 1/(epoch+1))
-
- print("初始化的学习率:", optimizer_1.defaults['lr'])
-
- lst=[]
-
- for epoch in range(1, 11):
- optimizer_1.zero_grad()
- optimizer_1.step()
- print("第%d个epoch的学习率:%f" % (epoch, optimizer_1.param_groups[0]['lr']))
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
- '''
- 初始化的学习率: 0.1
- 第1个epoch的学习率:0.100000
- 第2个epoch的学习率:0.050000
- 第3个epoch的学习率:0.016667
- 第4个epoch的学习率:0.004167
- 第5个epoch的学习率:0.000833
- 第6个epoch的学习率:0.000139
- 第7个epoch的学习率:0.000020
- 第8个epoch的学习率:0.000002
- 第9个epoch的学习率:0.000000
- 第10个epoch的学习率:0.000000
- '''
-
- plt.plot(lst)
学习率周期性变化。
base_lr | 循环中学习率的下边界 |
max_lr | 循环中学习率的上边界 |
step_size_up | 学习率上升的步数 |
step_size_down | 学习率下降的步数 |
mode | {triangular, triangular2, exp_range} 中的一个。默认: 'triangular' |
gamma (float) | 在mode='exp_range'时,gamma**(cycle iterations), 默认:1.0 |
最基本的模式,学习率会在base_lr
(最小学习率)和max_lr
(最大学习率)之间周期性地进行线性往返变化
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
-
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.SGD(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 =CyclicLR(
- optimizer_1,
- base_lr=0.1,
- max_lr=10,
- step_size_up=10,
- step_size_down=5,
- mode='triangular')
-
-
- lst=[]
- for epoch in range(1, 101):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
-
- plt.plot(lst)
每个循环的最大学习率会比前一个循环的最大学习率低一半,从而使学习率的变化范围随着时间的推移而收缩
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
-
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.SGD(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 =CyclicLR(
- optimizer_1,
- base_lr=0.1,
- max_lr=10,
- step_size_up=10,
- step_size_down=5,
- mode='triangular2')
-
-
- lst=[]
- for epoch in range(1, 101):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
-
- plt.plot(lst)
学习率随着时间的推移而减小,最大学习率会根据gamma
参数指数减少
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
-
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.SGD(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 =CyclicLR(
- optimizer_1,
- base_lr=0.1,
- max_lr=10,
- step_size_up=10,
- step_size_down=5,
- mode='exp_range',
- gamma=0.9)
-
-
- lst=[]
- for epoch in range(1, 101):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
-
- plt.plot(lst)
使用余弦退火计划设置每个参数组的学习速率,并在 Ti epoch 后重启
T_0 | 第一次restart时epoch的数值 |
T_mult | 每次restart后,学习率restart周期增加因子 |
eta_min | 最小的学习率,默认值为0 |
- import torch
- import torch.nn as nn
- from torch.optim.lr_scheduler import *
- import matplotlib.pyplot as plt
-
-
- initial_lr = 0.1
-
- net_1=nn.Sequential(
- nn.Linear(1,10)
- )
-
- optimizer_1 = torch.optim.SGD(
- net_1.parameters(),
- lr = initial_lr)
-
- scheduler_1 =CosineAnnealingWarmRestarts(
- optimizer_1,
- T_0=10,
- T_mult=2,
- eta_min=1)
-
-
- lst=[]
- for epoch in range(1, 101):
- # train
-
- optimizer_1.zero_grad()
- optimizer_1.step()
- lst.append( optimizer_1.param_groups[0]['lr'])
- scheduler_1.step()
-
-
- plt.plot(lst)
参考文献:torch.optim.lr_scheduler:调整学习率_qyhaill的博客-CSDN博客_lr_scheduler
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。