[Datawhale-李宏毅机器学习-39期]-002-回归+003-误差和梯度下降+004-反向传播机制_datawahle 李宏毅机器学习回归

作者：凡人多烦事01 | 2024-03-01 10:46:52

踩

datawahle 李宏毅机器学习回归

咋说嘞，神经网络就是一个函数，拟合线性非线性的数据。改了一个小程序(代码修改自：https://blog.csdn.net/weixin_42318554/article/details/121940694)，构建了一个两层convnet和两层的mlp看看效果如何，
003-是介绍了SGD的基本原理
004-是介绍深度学习的相关知识
有趣的点：

回顾一下deep learning的历史：

1958: Perceptron (linear model)
1969: Perceptron has limitation
1980s: Multi-layer perceptron
Do not have significant difference from DNN today

1986: Backpropagation
Usually more than 3 hidden layers is not helpful

1989: 1 hidden layer is “good enough”, why deep?
2006: RBM initialization (breakthrough)
2009: GPU
2011: Start to be popular in speech recognition
2012: win ILSVRC image competition 感知机（Perceptron）非常像我们的逻辑回归（Logistics
Regression）只不过是没有sigmoid激活函数。09年的GPU的发展是很关键的，使用GPU矩阵运算节省了很多的时间。
深度学习的三个步骤
我们都知道机器学习有三个step，对于deep learning其实也是3个步骤：
Step1：神经网络（Neural network）
Step2：模型评估（Goodness of function）
Step3：选择最优函数（Pick best function）
那对于深度学习的Step1就是神经网络（Neural Network）

到task 004 的时候再在模型中添加zfnet，看看结果
添加ZF-Net的类似模型：

import torch.nn as nn
import torch


class ZFNet_like(nn.Module):
    def __init__(self, num_classes=1, init_weights=True):
        super(ZFNet_like, self).__init__()
        self.features = nn.Sequential(  # 打包
            nn.Conv1d(1, 48, kernel_size=1, stride=1, padding=0),   
            nn.ReLU(inplace=True),  # inplace 可以载入更大模型
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),     
            nn.Conv1d(48, 128, kernel_size=1, stride=2),        
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),    
            nn.Conv1d(128, 192, kernel_size=1, padding=1),     
            nn.ReLU(inplace=True),
            nn.Conv1d(192, 192, kernel_size=1, padding=1),   
            nn.ReLU(inplace=True),
            nn.Conv1d(192, 128, kernel_size=1, padding=1),      
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=1, stride=2),                 
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            # 全连接
            nn.Linear(512, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = x.unsqueeze(2)
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)  # 展平   或者view()
        x = self.classifier(x)
        return x.squeeze(-1)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')  # 何教授方法
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)  # 正态分布赋值
                nn.init.constant_(m.bias, 0)

def test():
    net = ZFNet_like()
    y = net(torch.randn(100,1))
    print(y.size())

test()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

上代码：

# https://blog.csdn.net/weixin_42318554/article/details/121940694
import  torch
import  torch.nn.functional as F
import matplotlib.pyplot as plt

#建立数据集
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)# x data(tensor),shape(100,1)
y = x.pow(2) + 0.2*torch.rand(x.size())# noisy y data(tensor),shape(100,1)

#建立神经网络
#方法一：
class Net(torch.nn.Module):
    def __init__(self,n_feature,n_hidden,n_output):
        super(Net,self).__init__()#继承__init__功能
        #定义每层用什么样的形式
        self.hidden = torch.nn.Linear(n_feature,n_hidden)#隐藏层线性输出
        self.output = torch.nn.Linear(n_hidden,n_output)#输出层线性输出

    def forward(self,x):
        x = F.relu(self.hidden(x))#激活函数
        x = self.output(x)#输出值
        return x

class ConvNet(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(ConvNet,self).__init__()
        self.hidden = torch.nn.Conv1d(n_feature, n_hidden, kernel_size=3,padding=1)
        self.output = torch.nn.Conv1d(n_hidden, n_feature, kernel_size=3,padding=1)

    def forward(self,x):
        x = x.unsqueeze(2)
        x = F.relu(self.hidden(x))
        x = self.output(x)
        return x.squeeze(-1)

net_0 = Net(n_feature=1,n_hidden=100,n_output=1)
net_1 = ConvNet(n_feature=1,n_hidden=100,n_output=1)
net_2 = ZFNet_like()

#可视化
plt.ion()
plt.show()
#训练网络
optimizer_0 = torch.optim.SGD(net_0.parameters(),lr=0.2)#随机梯度下降，传入net的所有参数，学习率
optimizer_1 = torch.optim.SGD(net_1.parameters(),lr=0.2)
optimizer_2 = torch.optim.Adam(net_2.parameters(), lr=0.1, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

# optim.SGD(net_2.parameters(),lr=0.1)

loss_func = torch.nn.MSELoss()#损失函数（均方差）
for t in range(2001):
    pre_y0 = net_0(x)#给net训练数据，输出预测值
    loss_0 = loss_func(pre_y0,y)#计算损失函数
    optimizer_0.zero_grad()#清空上一步的残余更新参数值
    loss_0.backward()#误差反向传播
    optimizer_0.step()#将新参数更新值添加到net的parameters上
    #------------------------
    pre_y1 = net_1(x)#给net训练数据，输出预测值
    loss_1 = loss_func(pre_y1,y)#计算损失函数
    optimizer_1.zero_grad()#清空上一步的残余更新参数值
    loss_1.backward()#误差反向传播
    optimizer_1.step()#将新参数更新值添加到net的parameters上
    #------------------------
    pre_y2 = net_2(x)#给net训练数据，输出预测值
    loss_2 = loss_func(pre_y2,y)#计算损失函数
    optimizer_2.zero_grad()#清空上一步的残余更新参数值
    loss_2.backward()#误差反向传播
    optimizer_2.step()#将新参数更新值添加到net的parameters上
    #绘图
    if t%200 == 0 :
        plt.cla()
        plt.scatter(x.data.numpy(),y.data.numpy())
        plt.plot(x.data.numpy(),pre_y0.data.numpy(),'r_',lw=5)
        plt.plot(x.data.numpy(),pre_y1.data.numpy(),'g_',lw=5)
        plt.plot(x.data.numpy(),pre_y2.data.numpy(),'b_',lw=5)
        plt.text(0.5,0,'FC_loss=%.4f '%(loss_0.data.numpy()),fontdict={'size':10,'color':'red'})
        plt.text(0.5,0.1,'Conv_loss=%.4f'%(loss_1.data.numpy()),fontdict={'size':10,'color':'green'})
        plt.text(0.5,0.2,'ZF_loss=%.4f'%(loss_2.data.numpy()),fontdict={'size':10,'color':'blue'})
        plt.savefig(str(t)+'.png', bbox_inches='tight')
        # plt.legend()
        plt.pause(0.1)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

展示几个结果：

1000轮
在这里插入图片描述

2000轮
在这里插入图片描述
显然，更深的模型效果很差，需要找寻原因。

看了李宏毅老师的课件代码，里面是用numpy实现的loss函数和梯度下降，仔细看下还是很有必要的：

# loss函数构建
for i in range(len(x)):
    for j in range(len(y)):
        b = x[i]
        w = y[j]
        Z[j][i] = 0  # meshgrid吐出结果：y为行，x为列
        for n in range(len(x_data)):
            Z[j][i] += (y_data[n] - b - w * x_data[n]) ** 2
        Z[j][i] /= len(x_data)
1
2
3
4
5
6
7
8
9

线性回归过程：

b=-2
w=0.01
lr = 0.000005
iteration = 1400000

b_history = [b]
w_history = [w]
loss_history = []
import time
start = time.time()
for i in range(iteration):
    m = float(len(x_d))
    y_hat = w * x_d  +b
    loss = np.dot(y_d - y_hat, y_d - y_hat) / m
    grad_b = -2.0 * np.sum(y_d - y_hat) / m
    grad_w = -2.0 * np.dot(y_d - y_hat, x_d) / m
    # update param
    b -= lr * grad_b
    w -= lr * grad_w

    b_history.append(b)
    w_history.append(w)
    loss_history.append(loss)
    if i % 10000 == 0:
        print("Step %i, w: %0.4f, b: %.4f, Loss: %.4f" % (i, w, b, loss))
end = time.time()
print("大约需要时间：",end-start)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

Step 0, w: 1.8648, b: -1.9952, Loss: 413789.3821
Step 10000, w: 2.1484, b: -7.1183, Loss: 19355.2329
Step 20000, w: 2.1627, b: -12.1013, Loss: 18858.5836
Step 30000, w: 2.1766, b: -16.9474, Loss: 18388.8578
Step 40000, w: 2.1902, b: -21.6603, Loss: 17944.5958
Step 50000, w: 2.2034, b: -26.2436, Loss: 17524.4173
Step 60000, w: 2.2162, b: -30.7010, Loss: 17127.0167
Step 70000, w: 2.2286, b: -35.0359, Loss: 16751.1593
Step 80000, w: 2.2407, b: -39.2517, Loss: 16395.6772
Step 90000, w: 2.2525, b: -43.3516, Loss: 16059.4658
Step 100000, w: 2.2640, b: -47.3389, Loss: 15741.4804
Step 110000, w: 2.2751, b: -51.2165, Loss: 15440.7331
Step 120000, w: 2.2860, b: -54.9876, Loss: 15156.2893
Step 130000, w: 2.2965, b: -58.6551, Loss: 14887.2653
Step 140000, w: 2.3067, b: -62.2217, Loss: 14632.8251
Step 150000, w: 2.3167, b: -65.6903, Loss: 14392.1781
Step 160000, w: 2.3264, b: -69.0637, Loss: 14164.5766
Step 170000, w: 2.3358, b: -72.3442, Loss: 13949.3134
Step 180000, w: 2.3450, b: -75.5347, Loss: 13745.7197
Step 190000, w: 2.3539, b: -78.6374, Loss: 13553.1628
Step 200000, w: 2.3626, b: -81.6549, Loss: 13371.0444
Step 210000, w: 2.3710, b: -84.5895, Loss: 13198.7988
Step 220000, w: 2.3792, b: -87.4434, Loss: 13035.8905
Step 230000, w: 2.3872, b: -90.2189, Loss: 12881.8136
Step 240000, w: 2.3950, b: -92.9181, Loss: 12736.0891
...
Step 1370000, w: 2.6577, b: -184.3362, Loss: 10198.3508
Step 1380000, w: 2.6580, b: -184.4488, Loss: 10198.0972
Step 1390000, w: 2.6583, b: -184.5583, Loss: 10197.8574
大约需要时间： 20.32868242263794
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

出图

# plot the figure
plt.subplot(1, 2, 1)
C = plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet'))  # 填充等高线
# plt.clabel(C, inline=True, fontsize=5)
plt.plot([-188.4], [2.67], 'x', ms=12, mew=3, color="orange")
plt.plot(b_history, w_history, 'o-', ms=3, lw=1.5, color='black')
plt.xlim(-200, -100)
plt.ylim(-5, 5)
plt.xlabel(r'$b$')
plt.ylabel(r'$w$')
plt.title("线性回归")

plt.subplot(1, 2, 2)
loss = np.asarray(loss_history[2:iteration])
plt.plot(np.arange(2, iteration), loss)
plt.title("损失")
plt.xlabel('step')
plt.ylabel('loss')
plt.show()
# 李宏毅老师课堂中的Demo要140万次才能收敛到最优，结果如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

在这里插入图片描述

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/凡人多烦事01/article/detail/173267

[Datawhale-李宏毅机器学习-39期]-002-回归+003-误差和梯度下降+004-反向传播机制_datawahle 李宏毅机器学习 回归

[Datawhale-李宏毅机器学习-39期]-002-回归+003-误差和梯度下降+004-反向传播机制_datawahle 李宏毅机器学习回归