=, 再*=(*=表示先计算一个布尔矩阵,再用布尔矩阵的对应元素去乘以原始矩阵的每个元素)[8.4571]])">
赞
踩
import torch
from torch import nn
from torch.nn import functional as F # 定义了一些未给定参数的函数
net=nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10)) # 简单的单层神经网络,线性层+relu+线性层
X=torch.rand(2,20) # 2*20的随机矩阵
net(X)
tensor([[-0.2445, -0.0222, -0.0563, -0.0727, -0.1029, 0.2274, 0.0739, 0.3686,
0.2237, -0.1444],
[-0.1527, -0.0438, 0.0650, 0.0392, -0.0910, 0.2722, 0.2828, 0.3175,
0.2041, -0.1027]], grad_fn=<AddmmBackward0>)
class MLP(nn.Module):
def __init__(self): # 需要哪些类、哪些参数
super().__init__()
self.hidden=nn.Linear(20,256)
self.out=nn.Linear(256,10)
def forward(self,X):
return self.out(F.relu(self.hidden(X)))
# nn.relu是构造了应该ReLU对象,并不是函数调用,而是F.ReLU()函数调用
net=MLP()
net(X) # 2*10的矩阵
tensor([[-0.0350, -0.0752, 0.1399, 0.0448, 0.1707, -0.0410, 0.1234, 0.3876,
-0.2362, 0.1494],
[-0.0350, -0.0322, 0.0620, 0.1837, 0.1890, -0.0255, -0.0059, 0.4239,
-0.1433, 0.0753]], grad_fn=<AddmmBackward0>)
class MySequential(nn.Module):
def __init__(self,*args):
super().__init__() # 附类初始化函数
for block in args: # 传入的层是block
self._modules[block]=block
def forward(self,X):
for block in self._modules.values():
X=block(X)
return X
net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)
tensor([[-0.3067, -0.1418, -0.0705, 0.0592, -0.1227, -0.0437, 0.0697, 0.0652,
0.1585, 0.0560],
[-0.3598, -0.0773, -0.1089, 0.1367, -0.1160, 0.0246, 0.1529, 0.0009,
0.0748, 0.0822]], grad_fn=<AddmmBackward0>)
class FixedHiddenMLP(nn.Module): def __init__(self): super().__init__() self.rand_weight=torch.rand((20,20), requires_grad=False) self.linear=nn.Linear(20,20) def forward(self,X): X=self.linear(X) X=F.relu(torch.mm(X,self.rand_weight)+1) # mm矩阵乘法++ X-self.linear(X) while X.abs().sum()>1: X/=2 return X.sum() net=FixedHiddenMLP() net(X)
tensor(0.9181, grad_fn=<SumBackward0>)
class NestMLP(nn.Module):
def __init__(self):
super().__init__()
self.net=nn.Sequential(nn.Linear(20,64), nn.ReLU(),
nn.Linear(64,32), nn.ReLU())
self.linear=nn.Linear(32,16)
def forward(self,X):
return self.linear(self.net(X))
chimera=nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
chimera(X)
tensor(0.5094, grad_fn=<SumBackward0>)
继承nn.Module的类进行模型构造,第一:定义好init,第二:定义好前向运算forward
首先关注具有单隐藏层的多层感知机
import torch
from torch import nn
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X=torch.rand(size=(2,4))
net(X)
tensor([[-0.2786],
[-0.2945]], grad_fn=<AddmmBackward0>)
print(net[2].state_dict()) # state是状态,也是权重
OrderedDict([('weight', tensor([[-0.1354, 0.1267, 0.2283, -0.2102, 0.1198, -0.1059, -0.0169, -0.0098]])), ('bias', tensor([-0.3447]))])
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data) # .data来访问值
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.3447], requires_grad=True)
tensor([-0.3447])
net[2].weight.grad==None # .grad访问梯度
True
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()]) # 访问所有的param
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
net.state_dict()['2.bias'].data
tensor([-0.3447])
def block1():
return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),
nn.ReLU())
def block2():
net=nn.Sequential()
for i in range(4):
net.add_module(f'block{1}', block1())
return net
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
rgnet(X)
tensor([[-0.3924],
[-0.4124]], grad_fn=<AddmmBackward0>)
print(rgnet)
Sequential(
(0): Sequential(
(block1): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
)
(1): Linear(in_features=4, out_features=1, bias=True)
)
def init_normal(m): # 初始成正态分布
if type(m)==nn.Linear:
nn.init.normal_(m.weight,mean=0,std=0.01) # 下划线是替换的意思,不会返回值,读输入进行改变
nn.init.zeros_(m.bias)
net.apply(init_normal) # .apply: 遍历loop
net[0].weight.data[0], net[0].bias.data[0]
(tensor([-0.0092, 0.0162, -0.0011, -0.0013]), tensor(0.))
def init_constant(m): # 初始化成常数,但是初始参数一样,后面的层就没有意义
if type(m)==nn.Linear:
nn.init.constant_(m.weight,1)
nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]
(tensor([1., 1., 1., 1.]), tensor(0.))
def xavier(m): # 均匀分布
if type(m)==nn.Linear:
nn.init.xavier_uniform_(m.weight)
def init_42(m):
if type(m)==nn.Linear:
nn.init.constant_(m.weight,42) # 不同的层,使用不同的初始化函数
net[0].apply(xavier) # 第一层
net[2].apply(init_42) # 最后一层,权重全是42
print(net[0].weight.data[0])
print(net[2].weight.data) # 全是42
tensor([-0.2800, -0.4153, -0.6631, 0.3451])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
def my_init(m):
if type(m)==nn.Linear:
print(
"Init",
*[(name,param.shape) for name,param in m.named_parameters()][0])
nn.init.uniform_(m.weight,-10,10)
m.weight.data *=m.weight.data.abs()>=5
# 先>=, 再*=(*=表示先计算一个布尔矩阵,再用布尔矩阵的对应元素去乘以原始矩阵的每个元素)
net.apply(my_init)
net[0].weight[:2]
Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])
tensor([[-5.2818, -7.9384, 6.6614, 0.0000],
[-0.0000, -0.0000, 0.0000, -0.0000]], grad_fn=<SliceBackward0>)
net[0].weight.data[:]+=1 # 所有值+1
net[0].weight.data[0,0]=42 # 第一个元素变42
net[0].weight.data[0]
tensor([42.0000, -6.9384, 7.6614, 1.0000])
# 两个数据流,共享权重,shared,即参数绑定
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,##第二个隐藏层和第四个隐藏层权重一样,即两个shared的层权重是一样的
nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]==net[4].weight.data[0])
net[2].weight.data[0,0]=100 # 改第一个shared的权重,会影响下一个shared,两个一样
print(net[2].weight.data[0]==net[4].weight.data[0])
tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
import torch
import torch.nn.functional as F
from torch import nn
class CenteredLayer(nn.Module):
def __init__(self):
super().__init__()
def forward(self,X):
return X-X.mean()
layer=CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))
tensor([-2., -1., 0., 1., 2.])
net=nn.Sequential(nn.Linear(8,128),CenteredLayer())
Y=net(torch.rand(4,8))
Y.mean()
tensor(-1.8626e-09, grad_fn=<MeanBackward0>)
class MyLinear(nn.Module):
def __init__(self, in_units,units): # in_units:输入维度,units:输出维度
super().__init__()
self.weight=nn.Parameter(torch.randn(in_units,units)) # randn是标准正态分布
self.bias=nn.Parameter(torch.randn(units,))
def forward(self,X):
linear=torch.matmul(X,self.weight.data)+self.bias.data # matmul:矩阵乘法
return F.relu(linear)
dense=MyLinear(5,3)
dense.weight
Parameter containing:
tensor([[-0.3036, -0.7583, -0.4838],
[-0.6022, 0.2335, -0.2397],
[ 0.1404, -0.1485, -0.3653],
[-1.1541, -1.6614, 1.1406],
[ 0.6781, -0.3380, 1.2838]], requires_grad=True)
dense(torch.randn(2,5))
tensor([[1.6271, 0.0000, 1.1881],
[0.0965, 1.8006, 0.4168]])
net=nn.Sequential(MyLinear(64,8),MyLinear(8,1))
net(torch.rand(2,64))
tensor([[6.2056],
[8.4571]])
import torch
from torch import nn
from torch.nn import functional as F
x=torch.arange(4)
torch.save(x, 'x-files')
x2=torch.load('x-files')
x2
tensor([0, 1, 2, 3])
y=torch.zeros(4)
torch.save([x,y],'x-files')
x2,y2=torch.load('x-files')
(x2,y2)
(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))
mydict={'x':x, 'y':y}
torch.save(mydict,'mydict')
mydict2=torch.load('mydict')
mydict2
{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.hidden=nn.Linear(20,256)
self.output=nn.Linear(256,10)
def forward(self,x):
return self.output(F.relu(self.hidden(x)))
net=MLP()
X=torch.randn(size=(2,20))
Y=net(X)
torch.save(net.state_dict(),'mlp.params') # 整个mlp参数存在字典
clone=MLP() # 必写
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()
MLP(
(hidden): Linear(in_features=20, out_features=256, bias=True)
(output): Linear(in_features=256, out_features=10, bias=True)
)
Y_clone=clone(X)
Y_clone==Y
tensor([[True, True, True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True, True, True]])
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。