赞
踩
本文主要参考沐神的视频教程 https://www.bilibili.com/video/BV1K64y1Q7wu/?spm_id_from=333.788.recommend_more_video.0&vd_source=c7bfc6ce0ea0cbe43aa288ba2713e56d
文档教程 https://zh-v2.d2l.ai/
本文的主要内容对沐神提供的代码中个人不太理解的内容进行笔记记录,内容不会特别严谨仅供参考。
torch.nn | 位置 |
---|---|
parameter | 3.1 |
torch | 位置 |
---|---|
randn | 3.1 |
zeros_like | 3.2 |
1 | 2 | 3 | 4 | |
---|---|---|---|---|
blue | + | - | + | - |
yellow | + | + | - | - |
product | + | - | - | + |
单隐藏层
隐藏层大小事超参数
输入 x ∈ R n x \in R^n x∈Rn
隐藏层 W 1 ∈ R m ∗ n , b 1 ∈ R m W_1 \in R^{m*n},b_1\in R^m W1∈Rm∗n,b1∈Rm
输出层
w
2
∈
R
m
,
b
2
∈
R
w_2 \in R^m, b_2\in R
w2∈Rm,b2∈R
h
=
σ
(
W
1
∗
x
+
b
1
)
h = \sigma(W_1*x+b_1)
h=σ(W1∗x+b1)
o
=
w
2
T
∗
h
+
b
2
o = w_2^T*h+b_2
o=w2T∗h+b2
σ
\sigma
σ是按元素的激活函数
为什么需要非线性激活函数?
输入 x ∈ R n x \in R^n x∈Rn
隐藏层 W 1 ∈ R m ∗ n , b 1 ∈ R m W_1 \in R^{m*n},b_1\in R^m W1∈Rm∗n,b1∈Rm
输出层
w
2
∈
R
m
,
b
2
∈
R
w_2 \in R^m, b_2\in R
w2∈Rm,b2∈R
h
=
(
W
1
∗
x
+
b
1
)
h = (W_1*x+b_1)
h=(W1∗x+b1)
o
=
w
2
T
∗
h
+
b
2
o = w_2^T*h+b_2
o=w2T∗h+b2
最终输出
o
=
w
2
T
∗
W
1
∗
x
+
b
′
o = w_2^T*W_1*x+b'
o=w2T∗W1∗x+b′
输出仍然为线性。
多类分类
y
1
,
y
2
,
.
.
.
y
k
=
s
o
f
t
m
a
x
(
o
1
,
o
2
.
.
.
.
.
.
o
n
)
y_1,y_2,...y_k = softmax(o_1,o_2......o_n)
y1,y2,...yk=softmax(o1,o2......on)
输入 x ∈ R n x \in R^n x∈Rn
隐藏层 W 1 ∈ R m ∗ n , b 1 ∈ R m W_1 \in R^{m*n},b_1\in R^m W1∈Rm∗n,b1∈Rm
输出层
w
2
∈
R
m
∗
k
,
b
2
∈
R
k
w_2 \in R^{m*k}, b_2\in R^k
w2∈Rm∗k,b2∈Rk
h
=
σ
(
W
1
∗
x
+
b
1
)
h = \sigma(W_1*x+b_1)
h=σ(W1∗x+b1)
o
=
w
2
T
∗
h
+
b
2
o = w_2^T*h+b_2
o=w2T∗h+b2
y
=
s
o
f
t
m
a
x
(
o
)
y = softmax(o)
y=softmax(o)
多隐藏层
h
1
=
σ
(
W
1
∗
x
+
b
1
)
h_1 = \sigma(W_1*x+b_1)
h1=σ(W1∗x+b1)
h
2
=
σ
(
W
2
∗
h
1
+
b
2
)
h_2 = \sigma(W_2*h_1+b_2)
h2=σ(W2∗h1+b2)
h
3
=
σ
(
W
3
∗
h
2
+
b
3
)
h_3 = \sigma(W_3*h_2+b_3)
h3=σ(W3∗h2+b3)
o
=
σ
(
W
4
∗
h
3
+
b
4
)
o = \sigma(W_4*h_3+b_4)
o=σ(W4∗h3+b4)
超参数
隐藏层数
每层隐藏层的大小
将输入投影到(0,1),是一个软件
σ
(
x
)
=
{
1
i
f
x
>
0
,
−
1
o
t
h
e
r
w
i
s
e
\sigma(x)=\left\{ 1if x>0,−1otherwise \right.
σ(x)={1−1if x>0,otherwise
s
i
g
m
o
d
(
x
)
=
1
/
(
1
+
e
x
p
(
−
x
)
)
sigmod(x) = 1/(1+exp(-x))
sigmod(x)=1/(1+exp(−x))
将输入投影到(-1,1)
t
h
a
h
(
x
)
=
1
−
e
x
p
(
−
2
x
)
1
+
e
p
x
(
−
2
x
)
thah(x) = \frac{1-exp(-2x)}{1+epx(-2x)}
thah(x)=1+epx(−2x)1−exp(−2x)
R
e
L
U
(
x
)
=
m
a
x
(
x
,
0
)
ReLU(x)=max(x,0)
ReLU(x)=max(x,0)
nn.Parameter 是 PyTorch 中的一种特殊的变量类型,用于定义可学习的参数。与普通的 torch.Tensor 不同,nn.Parameter 被自动地添加到 nn.Module 的参数列表中,能够被优化器更新。
返回一个张量,张量的元素来自均值为0,方差为1的正态分布(也称为标准正态分布)的随机数。
a = torch.randn(2,3)
a
tensor([[ 1.2116, -0.8110, 0.6086],
[ 0.6724, -0.5165, 0.9684]])
#1.初始化参数
num_inputs, num_outputs, num_hiddens = 28*28, 10, 256
W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [W1, b1, W2, b2]
torch.zeros_like 的主要作用是根据输入张量的形状和数据类型创建一个新的全零张量。这对于在保持张量维度和类型一致性的同时进行张量初始化非常有帮助。
torch.zeros_like(input, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format)
a = torch.randn(2,3)
a
b = torch.zeros_like(a)
b
tensor([[0., 0., 0.],
[0., 0., 0.]])
def relu(x):
a = torch.zeros_like(x)
return torch.max(x, a)
#3. 模型
def net(X):
X = X.reshape(-1, 28*28)
# @代表矩阵运算
H = relu(X @ W1+b1)
return H @ W2 + b2
#4. 损失函数
loss = nn.CrossEntropyLoss(reduction='none')
num_epochs, lr = 10, 0.1 updater = torch.optim.SGD(params, lr=lr) class Accumulator: '在n个变量上累加' def __init__(self, n): self.data = [0.0] * n #创建一个1*n的全0列表 def add(self, *args): self.data = [a + float(b) for a, b in zip(self.data, args)] def reset(self): self.data = [0.0] * len(self.data) def __getitem__(self, idx): return self.data[idx] def accuracy(y_hat, y): if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: y_hat = y_hat.argmax(axis=1) cmp = y_hat.type(y.dtype) == y return float(cmp.type(y.dtype).sum()) def evalution_accuracy(net, data_iter): if isinstance(net, torch.nn.Module): net.eval() meteric = Accumulator(2) with torch.no_grad(): for X, y in data_iter: meteric.add(accuracy(net(X), y), y.numel()) return meteric[0]/meteric[1] # 6.训练 def train_epoch_ch3(net, train_iter, loss, updater): if isinstance(net, torch.nn.Module): net.train() metric = Accumulator(3) for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y) if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.mean().backward() updater.step() else: l.sum().backward() updater(X.shape[0]) metric.add(float(l.sum()), accuracy(y_hat, y), y.numel()) return metric[0] / metric[2], metric[1] / metric[2] def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): for epoch in range(num_epochs): # 输入网络,训练数据集,损失函数,更新器 train_loss, train_acc = train_epoch_ch3(net, train_iter, loss, updater) test_acc = evalution_accuracy(net, test_iter) print(f"第{epoch + 1}轮训练集中的损失为{train_loss},准确率为{train_acc}") print(f"第{epoch + 1}轮验证集中的准确率为{test_acc}") train_ch3(net, train_iter, test_iter, loss , num_epochs, updater)
完整代码
import torch from torch import nn from d2l import torch as d2l if __name__ == '__main__': batch_size =256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) #1.初始化参数 num_inputs, num_outputs, num_hiddens = 28*28, 10, 256 W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01) b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True)) W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01) b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True)) params = [W1, b1, W2, b2] #2. 激活函数 def relu(x): a = torch.zeros_like(x) return torch.max(x, a) #3. 模型 def net(X): X = X.reshape(-1, 28*28) # @代表矩阵运算 H = relu(X @ W1+b1) return H @ W2 + b2 #4. 损失函数 loss = nn.CrossEntropyLoss(reduction='none') #5 训练 num_epochs, lr = 10, 0.1 updater = torch.optim.SGD(params, lr=lr) class Accumulator: '在n个变量上累加' def __init__(self, n): self.data = [0.0] * n #创建一个1*n的全0列表 def add(self, *args): self.data = [a + float(b) for a, b in zip(self.data, args)] def reset(self): self.data = [0.0] * len(self.data) def __getitem__(self, idx): return self.data[idx] def accuracy(y_hat, y): if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: y_hat = y_hat.argmax(axis=1) cmp = y_hat.type(y.dtype) == y return float(cmp.type(y.dtype).sum()) def evalution_accuracy(net, data_iter): if isinstance(net, torch.nn.Module): net.eval() meteric = Accumulator(2) with torch.no_grad(): for X, y in data_iter: meteric.add(accuracy(net(X), y), y.numel()) return meteric[0]/meteric[1] # 6.训练 def train_epoch_ch3(net, train_iter, loss, updater): if isinstance(net, torch.nn.Module): net.train() metric = Accumulator(3) for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y) if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.mean().backward() updater.step() else: l.sum().backward() updater(X.shape[0]) metric.add(float(l.sum()), accuracy(y_hat, y), y.numel()) return metric[0] / metric[2], metric[1] / metric[2] def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): for epoch in range(num_epochs): # 输入网络,训练数据集,损失函数,更新器 train_loss, train_acc = train_epoch_ch3(net, train_iter, loss, updater) test_acc = evalution_accuracy(net, test_iter) print(f"第{epoch + 1}轮训练集中的损失为{train_loss},准确率为{train_acc}") print(f"第{epoch + 1}轮验证集中的准确率为{test_acc}") train_ch3(net, train_iter, test_iter, loss , num_epochs, updater)
import torch from torch import nn from d2l import torch as d2l if __name__ == '__main__': batch_size =256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) #1.初始化参数 num_inputs, num_outputs, num_hiddens = 28*28, 10, 256 net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs)) def init_weights(m): if type(m) == nn.Linear: # 将张量的值初始化为正态(高斯)分布 nn.init.normal_(m.weight, std=0.01) # 使用 apply 方法将初始化函数应用到所有模块上 net.apply(init_weights) #4. 损失函数 loss = nn.CrossEntropyLoss(reduction='none') #5 训练 num_epochs, lr = 10, 0.1 updater = torch.optim.SGD(net.parameters(), lr=lr) class Accumulator: '在n个变量上累加' def __init__(self, n): self.data = [0.0] * n #创建一个1*n的全0列表 def add(self, *args): self.data = [a + float(b) for a, b in zip(self.data, args)] def reset(self): self.data = [0.0] * len(self.data) def __getitem__(self, idx): return self.data[idx] def accuracy(y_hat, y): if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: y_hat = y_hat.argmax(axis=1) cmp = y_hat.type(y.dtype) == y return float(cmp.type(y.dtype).sum()) def evalution_accuracy(net, data_iter): if isinstance(net, torch.nn.Module): net.eval() meteric = Accumulator(2) with torch.no_grad(): for X, y in data_iter: meteric.add(accuracy(net(X), y), y.numel()) return meteric[0]/meteric[1] # 6.训练 def train_epoch_ch3(net, train_iter, loss, updater): if isinstance(net, torch.nn.Module): net.train() metric = Accumulator(3) for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y) if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.mean().backward() updater.step() else: l.sum().backward() updater(X.shape[0]) metric.add(float(l.sum()), accuracy(y_hat, y), y.numel()) return metric[0] / metric[2], metric[1] / metric[2] def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): for epoch in range(num_epochs): # 输入网络,训练数据集,损失函数,更新器 train_loss, train_acc = train_epoch_ch3(net, train_iter, loss, updater) test_acc = evalution_accuracy(net, test_iter) print(f"第{epoch + 1}轮训练集中的损失为{train_loss},准确率为{train_acc}") print(f"第{epoch + 1}轮验证集中的准确率为{test_acc}") train_ch3(net, train_iter, test_iter, loss , num_epochs, updater)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。