当前位置:   article > 正文

神经网络初始化_np.random.randn(layers_dims[l],layers_dims[l-1])

np.random.randn(layers_dims[l],layers_dims[l-1])

神经网络参数初始化非常重要,适合的初始化可以简化训练过程,提高精确率;参数初始化不是一成不变的,需要根据网络结构、激活函数以及优化算法等进行分析;

本篇记录5种初始化参数方法

1、将参数初始化为0

2、随机初始化参数,np.random.randn(layer_dims[l],layer_dims[l-1])*0.01  然后乘以0.01, 乘以0.01是为了尽量避免梯度消失,随机生成的参数服从标准正态分布(即均值为0, 方差为1的分布),统计学中方差是每个样本减去总体样本均值的平方的平均数

3、xavier初始化方法,np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(1 / layers_dims[l - 1]) 激活函数为tanh时,采用该方法效果较好

4、np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])  激活函数为relu时,采用该方法效果较好

5、np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / ((layers_dims[l]+layers_dims[l - 1])) 

完整代码如下:

  1. #对比几种初始化方法
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. #初始化为0
  5. def initialize_parameters_zeros(layers_dims):
  6. """
  7. Arguments:
  8. layer_dims -- python array (list) containing the size of each layer.
  9. Returns:
  10. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  11. W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
  12. b1 -- bias vector of shape (layers_dims[1], 1)
  13. ...
  14. WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
  15. bL -- bias vector of shape (layers_dims[L], 1)
  16. """
  17. parameters = {}
  18. L = len(layers_dims) # number of layers in the network
  19. for l in range(1, L):
  20. parameters['W' + str(l)] = np.zeros((layers_dims[l], layers_dims[l - 1]))
  21. parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
  22. return parameters
  23. #随机初始化
  24. def initialize_parameters_random(layers_dims):
  25. """
  26. Arguments:
  27. layer_dims -- python array (list) containing the size of each layer.
  28. Returns:
  29. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  30. W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
  31. b1 -- bias vector of shape (layers_dims[1], 1)
  32. ...
  33. WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
  34. bL -- bias vector of shape (layers_dims[L], 1)
  35. """
  36. np.random.seed(3) # This seed makes sure your "random" numbers will be the as ours
  37. parameters = {}
  38. L = len(layers_dims) # integer representing the number of layers
  39. for l in range(1, L):
  40. parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1])*0.01
  41. parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
  42. return parameters
  43. #xavier initialization
  44. def initialize_parameters_xavier(layers_dims):
  45. """
  46. Arguments:
  47. layer_dims -- python array (list) containing the size of each layer.
  48. Returns:
  49. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  50. W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
  51. b1 -- bias vector of shape (layers_dims[1], 1)
  52. ...
  53. WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
  54. bL -- bias vector of shape (layers_dims[L], 1)
  55. """
  56. np.random.seed(3)
  57. parameters = {}
  58. L = len(layers_dims) # integer representing the number of layers
  59. for l in range(1, L):
  60. parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(1 / layers_dims[l - 1])
  61. parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
  62. return parameters
  63. #He initialization
  64. def initialize_parameters_he(layers_dims):
  65. """
  66. Arguments:
  67. layer_dims -- python array (list) containing the size of each layer.
  68. Returns:
  69. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  70. W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
  71. b1 -- bias vector of shape (layers_dims[1], 1)
  72. ...
  73. WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
  74. bL -- bias vector of shape (layers_dims[L], 1)
  75. """
  76. np.random.seed(3)
  77. parameters = {}
  78. L = len(layers_dims) # integer representing the number of layers
  79. for l in range(1, L):
  80. parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])
  81. parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
  82. return parameters
  83. def initialize_parameters_yo(layers_dims):
  84. """
  85. Arguments:
  86. layer_dims -- python array (list) containing the size of each layer.
  87. Returns:
  88. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  89. W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
  90. b1 -- bias vector of shape (layers_dims[1], 1)
  91. ...
  92. WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
  93. bL -- bias vector of shape (layers_dims[L], 1)
  94. """
  95. np.random.seed(3)
  96. parameters = {}
  97. L = len(layers_dims) # integer representing the number of layers
  98. for l in range(1, L):
  99. parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / (layers_dims[l]+layers_dims[l - 1]))
  100. parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
  101. return parameters
  102. def relu(Z):
  103. """
  104. :param Z: Output of the linear layer
  105. :return:
  106. A: output of activation
  107. """
  108. A = np.maximum(0,Z)
  109. return A
  110. def initialize_parameters(layer_dims):
  111. """
  112. :param layer_dims: list,每一层单元的个数(维度)
  113. :return:dictionary,存储参数w1,w2,...,wL,b1,...,bL
  114. """
  115. np.random.seed(3)
  116. L = len(layer_dims)#the number of layers in the network
  117. parameters = {}
  118. for l in range(1, L):
  119. parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*np.sqrt(2 / (layer_dims[l - 1]+layer_dims[l]))
  120. parameters["b" + str(l)] = np.zeros((layer_dims[l], 1))
  121. return parameters
  122. def forward_propagation(initialization="yo"):
  123. np.random.seed(3)
  124. data = np.random.randn(1000, 100000)
  125. print("data shape : ", data.shape)
  126. layers_dims = [1000, 800, 500, 300, 200, 100, 10]
  127. num_layers = len(layers_dims)
  128. # Initialize parameters dictionary.
  129. if initialization == "zeros":
  130. parameters = initialize_parameters_zeros(layers_dims)
  131. elif initialization == "random":
  132. parameters = initialize_parameters_random(layers_dims)
  133. elif initialization == "xavier":
  134. parameters = initialize_parameters_xavier(layers_dims)
  135. elif initialization == "he":
  136. parameters = initialize_parameters_he(layers_dims)
  137. elif initialization == "yo":
  138. parameters = initialize_parameters_yo(layers_dims)
  139. A = data
  140. for l in range(1, num_layers):
  141. A_pre = A
  142. W = parameters["W" + str(l)]
  143. print("W shape : ", W.shape)
  144. b = parameters["b" + str(l)]
  145. z = np.dot(W, A_pre) + b # dot: 向量相乘是内积运算即对应相乘再相加得到一个常量;
  146. # dot: 矩阵乘法前者的列数等于后者的行数,axb * bxc = axc
  147. # A = np.tanh(z) #relu activation function
  148. A = relu(z)
  149. print("A shape : ", A.shape)
  150. print(" A flatten shape: ", A.flatten().shape)
  151. plt.subplot(2, 3, l)
  152. plt.hist(A.flatten(), facecolor='g')
  153. plt.xlim([-1, 1])
  154. plt.yticks([])
  155. plt.savefig("save_picture/%s.jpg" % initialization, dpi=500)
  156. plt.show()
  157. plt.close()
  158. if __name__ == '__main__':
  159. forward_propagation()
 

下面贴上采用方式5对lenet5参数的初始化

  1. import torch.nn as nn
  2. import numpy as np
  3. import torch
  4. np.random.seed(1307)
  5. class LeNet(nn.Module):
  6. def __init__(self, cfg):
  7. super(LeNet, self).__init__()
  8. self.features = self.features_layers(cfg)
  9. self.classifier = self.classifier_layers(cfg)
  10. def forward(self, x):
  11. out = self.features(x)
  12. out = out.view(out.size(0), -1)
  13. out = self.classifier(out)
  14. return out
  15. def features_layers(self, cfg):
  16. layers = []
  17. in_channels = 1
  18. conv1 = nn.Conv2d(in_channels, cfg[0], kernel_size=3, stride=1, padding=1)
  19. layers += [self.init_weight_bias(conv1, [cfg[0], in_channels, 3, 3], cfg[0])]
  20. # layers += [conv1]
  21. layers = self.make_activation(cfg[1], layers)
  22. layers = self.make_pool(cfg[2], layers)
  23. conv2 = nn.Conv2d(cfg[0], cfg[3], kernel_size=5)
  24. layers += [self.init_weight_bias(conv2, [cfg[3], cfg[0], 5, 5], cfg[3])]
  25. # layers += [conv2]
  26. layers = self.make_activation(cfg[4], layers)
  27. layers = self.make_pool(cfg[5], layers)
  28. return nn.Sequential(*layers)
  29. def classifier_layers(self, cfg):
  30. layers = []
  31. num_classes = 10
  32. linear1 = nn.Linear(cfg[3]*5*5, cfg[6])
  33. layers += [self.init_weight_bias(linear1, [cfg[6], cfg[3]*5*5], cfg[6])]
  34. # layers += [linear1]
  35. layers = self.make_activation(cfg[7], layers)
  36. linear2 = nn.Linear(cfg[6], cfg[8])
  37. layers += [self.init_weight_bias(linear2, [cfg[8], cfg[6]], cfg[8])]
  38. # layers += [linear2]
  39. layers = self.make_activation(cfg[9], layers)
  40. linear3 = nn.Linear(cfg[8], num_classes)
  41. layers += [self.init_weight_bias(linear3, [num_classes, cfg[8]], num_classes)]
  42. # layers += [linear3]
  43. return nn.Sequential(*layers)
  44. def make_activation(self, activation, layers):
  45. if activation == "relu":
  46. layers += [nn.ReLU(inplace=True)]
  47. elif activation == "sigmoid":
  48. layers += [nn.Sigmoid()]
  49. elif activation == "tanh":
  50. layers += [nn.Tanh()]
  51. else:
  52. print("the activation is wrong!")
  53. return layers
  54. def make_pool(self, pool, layers):
  55. if pool == "maxpool":
  56. layers += [nn.MaxPool2d(2)]
  57. elif pool == "avgpool":
  58. layers += [nn.AvgPool2d(2)]
  59. else:
  60. print("the convolutional pool is wrong!")
  61. return layers
  62. def init_weight_bias(self, layer, weight_size, bias_size):
  63. length = len(weight_size)
  64. if length == 2:
  65. init_weights = torch.Tensor(np.random.randn(weight_size[0], weight_size[1])*np.sqrt(2/(weight_size[1]+weight_size[0])))
  66. else:
  67. init_weights = torch.Tensor(np.random.randn(weight_size[0], weight_size[1],
  68. weight_size[2], weight_size[3]) *
  69. np.sqrt(2/(weight_size[3]+weight_size[2]+weight_size[1]+weight_size[0])))
  70. init_bias = torch.Tensor(np.random.uniform(0, 0, bias_size))
  71. layer.weight = nn.Parameter(init_weights)
  72. layer.bias = nn.Parameter(init_bias)
  73. return layer
  74. if __name__ == '__main__':
  75. cfg = [5, "sigmoid", "maxpool", 5, "sigmoid", "maxpool", 50, "relu", 150, "relu"]
  76. net = LeNet(cfg)
  77. print(net)

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/笔触狂放9/article/detail/523274
推荐阅读
相关标签
  

闽ICP备14008679号