赞
踩
https: // www.cnblogs.com / adong7639 / p / 9145911. html 写的很好 ''' 本文讲解的是在CNN中的batch normalization ''' import torch import torch.nn as nn import copy class Net(nn.Module): def __init__(self, dim, pretrained): super(Net, self).__init__() self.bn = nn.BatchNorm2d(dim, 0) if pretrained: self.pretrained() def forward(self, input): return self.bn(input) def pretrained(self): nn.init.constant_(self.bn.weight, 1) nn.init.constant_(self.bn.bias, 0) def train(): dim = 3 model = Net(dim) print(sum(p.numel() for p in model.parameters() if p.requires_grad)) for p in model.parameters(): print(p, p.requires_grad) ''' 对于CNN特征图通道数为3的Batch normalization层而言,BN层的learnable parameter有6个,分别是gamma和beta 在训练过程中gamma和beta才是需要被更新的 6 Parameter containing: tensor([0.2322, 0.9405, 0.9887], requires_grad=True) True Parameter containing: tensor([0., 0., 0.], requires_grad=True) True ''' # model.eval() feature_map = torch.randn((2, 3, 2, 2)) output1 = model(feature_map) state_dict = model.state_dict() for k, v in state_dict.items(): print(k, v) ''' bn.weight tensor([0.2860, 0.5986, 0.0594]) bn.bias tensor([0., 0., 0.]) bn.running_mean tensor([-0.2098, 0.1876, -0.3045]) bn.running_var tensor([0.8099, 1.5140, 0.5880]) bn.num_batches_tracked tensor(1) 打印字典时,发现batch normalization层有5个参数 其中bn.weight 对应论文中的gamma bn.bias对应论文中的beta bn.running_mean则是对于当前batch size的数据所统计出来的平均值 bn.running_var是对于当前batch size的数据所统计出来的方差 ''' print('bn.running_mean', state_dict['bn.running_mean']) print('bn.running_var', state_dict['bn.running_var']) # print(torch.mean(feature_map.permute(1, 0, 2, 3).contiguous().view(dim, -1), 1)) print(torch.var(feature_map.permute(1, 0, 2, 3).contiguous().view(dim, -1), 1)) ''' bn.running_mean tensor([-0.2098, 0.1876, -0.3045]) bn.running_var tensor([0.8099, 1.5140, 0.5880]) tensor([-0.2098, 0.1876, -0.3045]) tensor([0.8099, 1.5140, 0.5880]) 当然这是在设定BN层的momentum=1时,即当前时刻的统计量(running_mean和running_var)完全由统计平均值决定 statistic_t_new=(1-momentum)*stastic_(t-1)+momentum)*stastic_(t) momentum决定当前时刻的bn.running_mean和bn.running_var数值 (1)当momentum=1时,则数值完全由当前时刻计算出来的统计量决定 (2)由于模型上一次的统计量(由于这里不进行模型的参数更新和迭代训练,故而模型的初始值 bn.running_mean tensor([0., 0., 0.]) bn.running_var tensor([1., 1., 1.])) 可能不是0 0 0 1 1 1,而是随机初始化 故而如果将momentum设置为0,则模型会一直保持 bn.running_mean tensor([0., 0., 0.]) bn.running_var tensor([1., 1., 1.]) (3)当设置默认参数momentum=0.1时 bn.running_mean tensor([0.0233, 0.0166, 0.0469]) bn.running_var tensor([0.9961, 1.0899, 0.9974]) tensor([0.2329, 0.1663, 0.4691]) 表示用tensor的方法计算出来的统计量 tensor([0.9615, 1.8986, 0.9738]) 刚好bn.running_mean和bn.running_var是统计量的0.1倍 再次回顾计算BN的方式: 对于CNN的输入而言(即BN的输出时4-dimension),则 在batch,H,W 维度上进行normalization,也被称为spatial batch normalization ''' if __name__ == '__main__': ''' 在BN层中,一般,bn.weight时随机初始化的,而bn.bias初始化为全0 假设现在已知输入特征图的数值,和对应batch normalization的参数,求BN输出的结果 momentum=0.1默认值 0.9*(t-1时刻的统计量)+0.1*(t时刻的统计量) ''' dim = 3 momentum = 0.1 model = Net(dim, True) input = torch.randn((2, 3, 2, 2)) output1 = model(input) def bn_simple_train(input, model): ''' :param input: 卷积神经网络特征图 shape [batch size,C,H,W] :return: ''' mean = torch.mean(input.permute(1, 0, 2, 3).contiguous().view(dim, -1), 1) # shape [dim] var = torch.var(input.permute(1, 0, 2, 3).contiguous().view(dim, -1), 1) # shape [dim] init_mean = torch.zeros((dim)) init_var = torch.ones((dim)) run_mean = (1 - momentum) * init_mean + momentum * mean # 滑动平均的方式计算新的均值,训练时计算,为测试数据做准备 run_var = (1 - momentum) * init_var + momentum * var # 滑动平均的方式计算新的方差,训练时计算,为测试数据做准备 run_std = torch.sqrt(run_var + 1e-5) run_mean_exp = run_mean.view(1, input.shape[1], 1, 1).expand(input.shape) run_std_exp = run_std.view(1, input.shape[1], 1, 1).expand(input.shape) ''' 这里的tensor复制问题也让我想了很久 tensor1=torch.tensor([1,2,3]) 需要得到一个2*3*2*2的tensor2,然后需要满足 tensor2[:,0,:,:]=1 tensor2[:,1,:,:]=2 tensor2[:,2,:,:]=3 这个,除了for循环,内部函数也可以实现 先unsqueeze 到 ,(1, 2, 1, 1) 再 expand(2,3,2,2) expand, 只能再指定维度进行复制, 不能增加维度, 所以你要先unsqueeze到4个维度 expand的时候会找channel相同的维度,这些维度不变,其他维度复制 ''' # run_mean_exp=torch.zeros((2,3,2,2)) # for i in range(3): # run_mean_exp[:,i,:,:]=run_mean[i] # run_std_exp = torch.zeros((2, 3, 2, 2)) # for i in range(3): # run_std_exp[:, i, :, :] = run_std[i] output2 = input - run_mean_exp output2 = output2 / run_std_exp init_weights = model.state_dict().items()['bn.weights'] # gamma init_bias = model.state_dict().items()['bn.bias'] # beta init_weights_exp = init_weights.view(1, input.shape[1], 1, 1).expand(input.shape) init_bias_exp = init_bias.view(1, input.shape[1], 1, 1).expand(input.shape) ''' 在训练过程中会一直更新(反向传播时)的可学习参数 ''' # init_weights_exp=torch.zeros((2, 3, 2, 2)) # for i in range(3): # init_weights_exp[:, i, :, :] = init_weights[i] # # init_bias_exp = torch.zeros((2, 3, 2, 2)) # for i in range(3): # init_bias_exp[:, i, :, :] = init_bias[i] output2 = output2 * init_weights_exp output2 = output2 + init_bias_exp return output2 def bn_for_test(input, model): ''' 测试过程中,BN层的running mean和running var都是固定值,不再时新的验证数据的统计量,在model.eval()模式下这两个参数会被固定住 而gamma和beta也不发生改变 :param input: :param model: :return: ''' state_dict = model.state_dict() init_weights = state_dict.items()['bn.weight'] init_bias = state_dict.items()['bn.bias'] running_mean = state_dict.items()(['bn.running_mean'] running_var = state_dict.tems()['bn.running_var'] mean = running_mean.view(1, input.shape[1], 1, 1).expand(input.shape) var = running_var.view(1, input.shape[1], 1, 1).expand(input.shape) weights = init_weights.view(1, input.shape[1], 1, 1).expand(input.shape) bias = init_bias.view(1, input.shape[1], 1, 1).expand(input.shape) output = (input - mean) / torch.sqrt(var + 1e-5) output = output * weights + bias return output
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。