赞
踩
前向传播公式:
W
X
+
b
WX+b
WX+b
代码
def forward(self, X):
return np.matmul(X, self.W) + self.b
反向传播公式
∂
f
(
W
X
+
b
)
∂
W
=
f
′
(
W
X
+
b
)
⋅
X
\frac{\partial{f(WX+b)}}{\partial{W}} = f'(WX+b) \cdot X
∂W∂f(WX+b)=f′(WX+b)⋅X
∂
f
(
W
X
+
b
)
∂
b
=
f
′
(
W
X
+
b
)
\frac{\partial{f(WX+b)}}{\partial{b}} = f'(WX+b)
∂b∂f(WX+b)=f′(WX+b)
∂
f
(
W
X
+
b
)
∂
X
=
f
′
(
W
X
+
b
)
⋅
W
\frac{\partial{f(WX+b)}}{\partial{X}} = f'(WX+b) \cdot W
∂X∂f(WX+b)=f′(WX+b)⋅W
第一个公式是 W 的梯度下降方向,第二个公式是 b 的梯度下降方向,第 三个公式是该层向前一层的传递梯度。将其转化为矩阵形式,代码如下所示:
def backward(self, X, grad):
self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和
self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) #
return np.matmul(grad, self.W.T) #对x求导
该层除了前向传播和反向传播之外,还有参数更新
def update(self, lr):
self.W = self.W + self.W_grad * lr
self.b = self.b + self.b_grad * lr
线性层的全部代码为
import numpy as np class Linear: def __init__(self, in_shape, out_shape): np.random.seed(10) self.W = np.random.rand(in_shape, out_shape) # 初始化不能为全0 self.b = np.random.rand(1, out_shape) self.W_grad = np.zeros((in_shape, out_shape)) self.b_grad = np.zeros((1, out_shape)) def forward(self, X): return np.matmul(X, self.W) + self.b def backward(self, X, grad): self.W_grad = np.matmul(X.T, grad) #k个样本权重累加求和 self.b_grad = np.matmul(grad.T, np.ones(X.shape[0])) # return np.matmul(grad, self.W.T) #对x求导 def update(self, lr): self.W = self.W + self.W_grad * lr self.b = self.b + self.b_grad * lr
前向传播公式
1
1
+
e
−
x
\frac{1}{1+e^{-x}}
1+e−x1
代码
def forward(self, X):
return 1 / (1 + np.exp(-X))
反向传播公式
∂
f
(
1
1
+
e
−
x
)
∂
x
=
f
′
(
1
1
+
e
−
x
)
(
1
1
+
e
−
x
)
(
1
−
1
1
+
e
−
x
)
\frac{\partial{f (\frac{1}{1+e^{-x}})}}{\partial{x}} = f'(\frac{1}{1+e^{-x}})(\frac{1}{1+e^{-x}})(1-\frac{1}{1+e^{-x}})
∂x∂f(1+e−x1)=f′(1+e−x1)(1+e−x1)(1−1+e−x1)
代码
def backward(self, X, grad):
return self.forward(X)*(1-self.forward(X)) * grad
全部代码为
import numpy as np
class Sigmoid:
def __init__(self):
pass
def forward(self, X):
return 1 / (1 + np.exp(-X))
def backward(self, X, grad):
return self.forward(X)*(1-self.forward(X)) * grad
前向传播公式
e
x
−
e
−
x
e
x
+
e
−
x
\frac{e^x-e^{-x}}{e^x+e^{-x}}
ex+e−xex−e−x
代码
def forward(self, X):
return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
反向传播公式
∂
f
(
e
x
−
e
−
x
e
x
+
e
−
x
)
∂
x
=
f
′
(
e
x
−
e
−
x
e
x
+
e
−
x
)
(
1
−
(
e
x
−
e
−
x
e
x
+
e
−
x
)
2
)
\frac{\partial{f(\frac{e^x-e^{-x}}{e^x+e^{-x}})}}{\partial{x}} = f'(\frac{e^x-e^{-x}}{e^x+e^{-x}})(1-(\frac{e^x-e^{-x}}{e^x+e^{-x}})^2)
∂x∂f(ex+e−xex−e−x)=f′(ex+e−xex−e−x)(1−(ex+e−xex−e−x)2)
代码
def backward(self, X, grad):
return (1-np.power(self.forward(X),2)) * grad
全部代码为:
import numpy as np
class Tanh:
def __init__(self):
pass
def forward(self, X):
return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
def backward(self, X, grad):
return (1-np.power(self.forward(X),2)) * grad
搭建神经网络:
def model_bulider(self):
self.Linear1 = Linear.Linear(self.size1[0],self.size1[1])
self.Tanh1 = Tanh.Tanh()
self.Linear2 = Linear.Linear(self.size2[0],self.size2[1])
self.Sigmoid1 = Sigmoid.Sigmoid()
损失函数采用的 MSE,损失和准确率的计算:
def MSEloss(self, X, Y):
return np.sum(np.power(self.predict(X) - Y, 2) / 2)
def acc(self, X, Y):
count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1)))
return count / X.shape[0]
Batch_size 取值为 1 是就是单样本方式更新权重,否则就是采用批量方式更新权重。
[train_loss,train_acc,val_loss,val_acc] = \
model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch)
BP_Model.py
import numpy as np import Linear import Sigmoid import Tanh class BP_Model: def __init__(self,size1,size2,lr): self.size1 = size1 self.size2 = size2 self.lr = lr def model_bulider(self): self.Linear1 = Linear.Linear(self.size1[0],self.size1[1]) self.Tanh1 = Tanh.Tanh() self.Linear2 = Linear.Linear(self.size2[0],self.size2[1]) self.Sigmoid1 = Sigmoid.Sigmoid() def MSEloss(self, X, Y): return np.sum(np.power(self.predict(X) - Y, 2) / 2) def acc(self, X, Y): count = (np.sum(np.argmax(Y, axis=1) == np.argmax(self.predict(X), axis=1))) return count / X.shape[0] def predict(self,X): o0 = X a1 = self.Linear1.forward(o0) o1 = self.Tanh1.forward(a1) a2 = self.Linear2.forward(o1) o2 = self.Sigmoid1.forward(a2) return o2 def update(self,X,Y): # o0 = X a1 = self.Linear1.forward(o0) o1 = self.Tanh1.forward(a1) a2 = self.Linear2.forward(o1) o2 = self.Sigmoid1.forward(a2) # 反向传播,获取梯度 grad = (Y - o2) grad = self.Sigmoid1.backward(a2, grad) grad = self.Linear2.backward(o1, grad) grad = self.Tanh1.backward(a1, grad) grad = self.Linear1.backward(o0, grad) #参数更新 self.Linear1.update(self.lr) self.Linear2.update(self.lr) def train(self,X_train,Y_train,X_val,Y_val,epoch,batch_size,show_epoch): train_loss = [] train_acc = [] val_loss = [] val_acc = [] for i in range(epoch): for j in range(X_train.shape[0]//batch_size): self.update(X_train[j*batch_size:j*batch_size+batch_size,:],Y_train[j*batch_size:j*batch_size+batch_size,:]) loss = self.MSEloss(X_train, Y_train) acc = self.acc(X_train, Y_train) loss_ = self.MSEloss(X_val, Y_val) acc_ = self.acc(X_val, Y_val) if i%show_epoch==0: print('epoch=',i) print('loss={},acc={},val_loss={},val_acc={}'.format(loss,acc,loss_,acc_)) train_loss.append(loss) train_acc.append(acc) val_acc.append(acc_) val_loss.append(loss_) if loss <= 0.01: break return train_loss,train_acc,val_loss,val_acc
Main.py
import numpy as np from matplotlib import pyplot as plt import datetime start_t = datetime.datetime.now() import BP_Model class_num = 3 hidden_num = 5 lr = 0.1 epoch = 5000 batch_size = 4 show_epoch = 100 train_rate = 0.8 X =np.array([[1.58, 2.32, -5.8], [0.67, 1.58, -4.78], [1.04, 1.01, -3.63], [-1.49, 2.18, -3.39], [-0.41, 1.21, -4.73], [1.39, 3.16, 2.87], [ 1.20, 1.40, -1.89], [-0.92, 1.44, -3.22], [0.45, 1.33, -4.38], [-0.76, 0.84, -1.96], [ 0.21, 0.03, -2.21], [0.37, 0.28, -1.8], [ 0.18, 1.22, 0.16], [-0.24, 0.93, -1.01], [-1.18, 0.39, -0.39], [0.74, 0.96, -1.16], [-0.38, 1.94, -0.48], [0.02, 0.72, -0.17], [ 0.44, 1.31, -0.14], [ 0.46, 1.49, 0.68], [-1.54, 1.17, 0.64], [5.41, 3.45, -1.33], [ 1.55, 0.99, 2.69], [1.86, 3.19, 1.51], [1.68, 1.79, -0.87], [3.51, -0.22, -1.39], [1.40, -0.44, -0.92], [0.44, 0.83, 1.97], [ 0.25, 0.68, -0.99], [ 0.66, -0.45, 0.08]]) Y = np.zeros([X.shape[0],class_num]) Y[0:10,0]=1 Y[10:20,1]=1 Y[20:30,2]=1 train_X = np.concatenate((X[0:int(train_rate*10),:],X[10:10+int(train_rate*10),:],X[20:20+int(train_rate*10),:]),axis=0) train_Y = np.concatenate((Y[0:int(train_rate*10),:],Y[10:10+int(train_rate*10),:],Y[20:20+int(train_rate*10),:])) val_X = np.concatenate((X[int(train_rate*10):10,:],X[10+int(train_rate*10):20,:],X[20+int(train_rate*10):30,:])) val_Y = np.concatenate((Y[int(train_rate*10):10,:],Y[10+int(train_rate*10):20,:],Y[20+int(train_rate*10):30,:])) model = BP_Model.BP_Model(size1=[X.shape[1],hidden_num],size2=[hidden_num,Y.shape[1]],lr=lr) model.model_bulider() [train_loss,train_acc,val_loss,val_acc] = \ model.train(train_X,train_Y,val_X,val_Y,epoch=epoch,batch_size=batch_size,show_epoch=show_epoch) plt.figure() plt.subplot(1, 2, 1) plt.plot(range(0, len(train_loss)*show_epoch, show_epoch), train_loss,label='train loss' ) plt.plot(range(0, len(val_loss)*show_epoch, show_epoch), val_loss,label='val loss' ) plt.legend() plt.title('Loss') plt.subplot(1, 2, 2) plt.plot(range(0, len(train_acc)*show_epoch, show_epoch), train_acc,label='train acc' ) plt.plot(range(0, len(val_acc)*show_epoch, show_epoch), val_acc,label='val acc' ) plt.legend() plt.title('Acc') plt.show() end_t = datetime.datetime.now() print((end_t - start_t).seconds,'s')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。