赞
踩
>>> input=torch.rand([1, 3, 2, 2]) >>> input tensor([[[[0.1181, 0.6704], [0.7010, 0.8031]], [[0.0630, 0.2088], [0.2150, 0.6469]], [[0.5746, 0.4949], [0.3656, 0.7391]]]]) >>> layer_norm=torch.nn.LayerNorm((3, 2, 2), eps=1e-05) >>> output=layer_norm(input) >>> output tensor([[[[-1.3912, 0.8131], [ 0.9349, 1.3424]], [[-1.6113, -1.0293], [-1.0047, 0.7191]], [[ 0.4308, 0.1126], [-0.4035, 1.0872]]]], grad_fn=<NativeLayerNormBackward0>) >>> output[0].mean() tensor(-1.7385e-07, grad_fn=<MeanBackward0>) >>> output[0].std() tensor(1.0445, grad_fn=<StdBackward0>) >>> layer_norm.weight.shape torch.Size([3, 2, 2]) >>> layer_norm.bias.shape torch.Size([3, 2, 2]) # 等价于 >>> mean=input.mean(dim=(-1, -2, -3), keepdim=True) >>> var=input.var(dim=(-1, -2, -3), keepdim=True, unbiased=False) >>> (input-mean)/torch.sqrt(var+1e-05) tensor([[[[-1.3912, 0.8131], [ 0.9349, 1.3424]], [[-1.6113, -1.0293], [-1.0047, 0.7191]], [[ 0.4308, 0.1126], [-0.4035, 1.0872]]]])
>>> input=torch.rand([1, 200, 80])
>>> layer_norm=torch.nn.LayerNorm(80)
>>> layer_norm(input)[0][0].mean()
tensor(8.3447e-08, grad_fn=<MeanBackward0>)
>>> layer_norm(input)[0][1].mean()
tensor(-8.0466e-08, grad_fn=<MeanBackward0>)
>>> layer_norm(input)[0][0].std()
tensor(1.0063, grad_fn=<StdBackward0>)
>>> layer_norm(input)[0][1].std()
tensor(1.0063, grad_fn=<StdBackward0>)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。