赞
踩
咋说嘞,神经网络就是一个函数,拟合线性非线性的数据。改了一个小程序(代码修改自:https://blog.csdn.net/weixin_42318554/article/details/121940694),构建了一个两层convnet和两层的mlp看看效果如何,
003-是介绍了SGD的基本原理
004-是介绍深度学习的相关知识
有趣的点:
回顾一下deep learning的历史:
- 1958: Perceptron (linear model)
- 1969: Perceptron has limitation
- 1980s: Multi-layer perceptron
- Do not have significant difference from DNN today
- 1986: Backpropagation
- Usually more than 3 hidden layers is not helpful
- 1989: 1 hidden layer is “good enough”, why deep?
- 2006: RBM initialization (breakthrough)
- 2009: GPU
- 2011: Start to be popular in speech recognition
- 2012: win ILSVRC image competition 感知机(Perceptron)非常像我们的逻辑回归(Logistics
Regression)只不过是没有sigmoid
激活函数。09年的GPU的发展是很关键的,使用GPU矩阵运算节省了很多的时间。
深度学习的三个步骤
我们都知道机器学习有三个step,对于deep learning其实也是3个步骤:- Step1:神经网络(Neural network)
- Step2:模型评估(Goodness of function)
- Step3:选择最优函数(Pick best function)
那对于深度学习的Step1就是神经网络(Neural Network)
到task 004 的时候再在模型中添加zfnet,看看结果
添加ZF-Net的类似模型:
import torch.nn as nn import torch class ZFNet_like(nn.Module): def __init__(self, num_classes=1, init_weights=True): super(ZFNet_like, self).__init__() self.features = nn.Sequential( # 打包 nn.Conv1d(1, 48, kernel_size=1, stride=1, padding=0), nn.ReLU(inplace=True), # inplace 可以载入更大模型 nn.MaxPool2d(kernel_size=3, stride=1, padding=1), nn.Conv1d(48, 128, kernel_size=1, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=1, padding=1), nn.Conv1d(128, 192, kernel_size=1, padding=1), nn.ReLU(inplace=True), nn.Conv1d(192, 192, kernel_size=1, padding=1), nn.ReLU(inplace=True), nn.Conv1d(192, 128, kernel_size=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool1d(kernel_size=1, stride=2), ) self.classifier = nn.Sequential( nn.Dropout(p=0.5), # 全连接 nn.Linear(512, 2048), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(2048, 2048), nn.ReLU(inplace=True), nn.Linear(2048, num_classes), ) if init_weights: self._initialize_weights() def forward(self, x): x = x.unsqueeze(2) x = self.features(x) x = torch.flatten(x, start_dim=1) # 展平 或者view() x = self.classifier(x) return x.squeeze(-1) def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 何教授方法 if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) # 正态分布赋值 nn.init.constant_(m.bias, 0) def test(): net = ZFNet_like() y = net(torch.randn(100,1)) print(y.size()) test()
上代码:
# https://blog.csdn.net/weixin_42318554/article/details/121940694 import torch import torch.nn.functional as F import matplotlib.pyplot as plt #建立数据集 x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)# x data(tensor),shape(100,1) y = x.pow(2) + 0.2*torch.rand(x.size())# noisy y data(tensor),shape(100,1) #建立神经网络 #方法一: class Net(torch.nn.Module): def __init__(self,n_feature,n_hidden,n_output): super(Net,self).__init__()#继承__init__功能 #定义每层用什么样的形式 self.hidden = torch.nn.Linear(n_feature,n_hidden)#隐藏层线性输出 self.output = torch.nn.Linear(n_hidden,n_output)#输出层线性输出 def forward(self,x): x = F.relu(self.hidden(x))#激活函数 x = self.output(x)#输出值 return x class ConvNet(torch.nn.Module): def __init__(self, n_feature, n_hidden, n_output): super(ConvNet,self).__init__() self.hidden = torch.nn.Conv1d(n_feature, n_hidden, kernel_size=3,padding=1) self.output = torch.nn.Conv1d(n_hidden, n_feature, kernel_size=3,padding=1) def forward(self,x): x = x.unsqueeze(2) x = F.relu(self.hidden(x)) x = self.output(x) return x.squeeze(-1) net_0 = Net(n_feature=1,n_hidden=100,n_output=1) net_1 = ConvNet(n_feature=1,n_hidden=100,n_output=1) net_2 = ZFNet_like() #可视化 plt.ion() plt.show() #训练网络 optimizer_0 = torch.optim.SGD(net_0.parameters(),lr=0.2)#随机梯度下降,传入net的所有参数,学习率 optimizer_1 = torch.optim.SGD(net_1.parameters(),lr=0.2) optimizer_2 = torch.optim.Adam(net_2.parameters(), lr=0.1, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # optim.SGD(net_2.parameters(),lr=0.1) loss_func = torch.nn.MSELoss()#损失函数(均方差) for t in range(2001): pre_y0 = net_0(x)#给net训练数据,输出预测值 loss_0 = loss_func(pre_y0,y)#计算损失函数 optimizer_0.zero_grad()#清空上一步的残余更新参数值 loss_0.backward()#误差反向传播 optimizer_0.step()#将新参数更新值添加到net的parameters上 #------------------------ pre_y1 = net_1(x)#给net训练数据,输出预测值 loss_1 = loss_func(pre_y1,y)#计算损失函数 optimizer_1.zero_grad()#清空上一步的残余更新参数值 loss_1.backward()#误差反向传播 optimizer_1.step()#将新参数更新值添加到net的parameters上 #------------------------ pre_y2 = net_2(x)#给net训练数据,输出预测值 loss_2 = loss_func(pre_y2,y)#计算损失函数 optimizer_2.zero_grad()#清空上一步的残余更新参数值 loss_2.backward()#误差反向传播 optimizer_2.step()#将新参数更新值添加到net的parameters上 #绘图 if t%200 == 0 : plt.cla() plt.scatter(x.data.numpy(),y.data.numpy()) plt.plot(x.data.numpy(),pre_y0.data.numpy(),'r_',lw=5) plt.plot(x.data.numpy(),pre_y1.data.numpy(),'g_',lw=5) plt.plot(x.data.numpy(),pre_y2.data.numpy(),'b_',lw=5) plt.text(0.5,0,'FC_loss=%.4f '%(loss_0.data.numpy()),fontdict={'size':10,'color':'red'}) plt.text(0.5,0.1,'Conv_loss=%.4f'%(loss_1.data.numpy()),fontdict={'size':10,'color':'green'}) plt.text(0.5,0.2,'ZF_loss=%.4f'%(loss_2.data.numpy()),fontdict={'size':10,'color':'blue'}) plt.savefig(str(t)+'.png', bbox_inches='tight') # plt.legend() plt.pause(0.1)
展示几个结果:
1000轮
2000轮
显然,更深的模型效果很差,需要找寻原因。
看了李宏毅老师的课件代码,里面是用numpy实现的loss函数和梯度下降,仔细看下还是很有必要的:
# loss函数构建
for i in range(len(x)):
for j in range(len(y)):
b = x[i]
w = y[j]
Z[j][i] = 0 # meshgrid吐出结果:y为行,x为列
for n in range(len(x_data)):
Z[j][i] += (y_data[n] - b - w * x_data[n]) ** 2
Z[j][i] /= len(x_data)
线性回归过程:
b=-2 w=0.01 lr = 0.000005 iteration = 1400000 b_history = [b] w_history = [w] loss_history = [] import time start = time.time() for i in range(iteration): m = float(len(x_d)) y_hat = w * x_d +b loss = np.dot(y_d - y_hat, y_d - y_hat) / m grad_b = -2.0 * np.sum(y_d - y_hat) / m grad_w = -2.0 * np.dot(y_d - y_hat, x_d) / m # update param b -= lr * grad_b w -= lr * grad_w b_history.append(b) w_history.append(w) loss_history.append(loss) if i % 10000 == 0: print("Step %i, w: %0.4f, b: %.4f, Loss: %.4f" % (i, w, b, loss)) end = time.time() print("大约需要时间:",end-start)
Step 0, w: 1.8648, b: -1.9952, Loss: 413789.3821 Step 10000, w: 2.1484, b: -7.1183, Loss: 19355.2329 Step 20000, w: 2.1627, b: -12.1013, Loss: 18858.5836 Step 30000, w: 2.1766, b: -16.9474, Loss: 18388.8578 Step 40000, w: 2.1902, b: -21.6603, Loss: 17944.5958 Step 50000, w: 2.2034, b: -26.2436, Loss: 17524.4173 Step 60000, w: 2.2162, b: -30.7010, Loss: 17127.0167 Step 70000, w: 2.2286, b: -35.0359, Loss: 16751.1593 Step 80000, w: 2.2407, b: -39.2517, Loss: 16395.6772 Step 90000, w: 2.2525, b: -43.3516, Loss: 16059.4658 Step 100000, w: 2.2640, b: -47.3389, Loss: 15741.4804 Step 110000, w: 2.2751, b: -51.2165, Loss: 15440.7331 Step 120000, w: 2.2860, b: -54.9876, Loss: 15156.2893 Step 130000, w: 2.2965, b: -58.6551, Loss: 14887.2653 Step 140000, w: 2.3067, b: -62.2217, Loss: 14632.8251 Step 150000, w: 2.3167, b: -65.6903, Loss: 14392.1781 Step 160000, w: 2.3264, b: -69.0637, Loss: 14164.5766 Step 170000, w: 2.3358, b: -72.3442, Loss: 13949.3134 Step 180000, w: 2.3450, b: -75.5347, Loss: 13745.7197 Step 190000, w: 2.3539, b: -78.6374, Loss: 13553.1628 Step 200000, w: 2.3626, b: -81.6549, Loss: 13371.0444 Step 210000, w: 2.3710, b: -84.5895, Loss: 13198.7988 Step 220000, w: 2.3792, b: -87.4434, Loss: 13035.8905 Step 230000, w: 2.3872, b: -90.2189, Loss: 12881.8136 Step 240000, w: 2.3950, b: -92.9181, Loss: 12736.0891 ... Step 1370000, w: 2.6577, b: -184.3362, Loss: 10198.3508 Step 1380000, w: 2.6580, b: -184.4488, Loss: 10198.0972 Step 1390000, w: 2.6583, b: -184.5583, Loss: 10197.8574 大约需要时间: 20.32868242263794
出图
# plot the figure plt.subplot(1, 2, 1) C = plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet')) # 填充等高线 # plt.clabel(C, inline=True, fontsize=5) plt.plot([-188.4], [2.67], 'x', ms=12, mew=3, color="orange") plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black') plt.xlim(-200, -100) plt.ylim(-5, 5) plt.xlabel(r'$b$') plt.ylabel(r'$w$') plt.title("线性回归") plt.subplot(1, 2, 2) loss = np.asarray(loss_history[2:iteration]) plt.plot(np.arange(2, iteration), loss) plt.title("损失") plt.xlabel('step') plt.ylabel('loss') plt.show() # 李宏毅老师课堂中的Demo要140万次才能收敛到最优,结果如下
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。