赞
踩
本文只是对于pytorch深度学习框架的使用方法的介绍,如果涉及算法中复杂的数学原理,本文将不予阐述,敬请读者自行阅读相关论文或者文献。
代码 | 含义 |
---|---|
float32 | 32位float |
float | floa |
float64 | 64位float |
double | double |
float16 | 16位float |
bfloat16 | 比float范围大但精度低 |
int8 | 8位int |
int16 | 16位int |
short | short |
int32 | 32位int |
int | int |
int64 | 64位int |
long | long |
complex32 | 32位complex |
complex64 | 64位complex |
cfloat | complex float |
complex128 | 128位complex float |
cdouble | complex double |
@overload
def empty(size: Sequence[Union[_int, SymInt]], *, memory_format: Optional[memory_format]=None, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def empty(*size: _int, memory_format: Optional[memory_format]=None, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def empty(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], memory_format: Optional[memory_format]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def empty(*size: _int, names: Optional[Sequence[Union[str, ellipsis, None]]], memory_format: Optional[memory_format]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
size:[行数,列数]
dtype(deepth type):数据类型
device:选择运算设备
requires_grad:是否进行自动求导,默认为False
gpu=torch.device("cuda")
empty_tensor=torch.empty(size=[3,4],device=gpu,requires_grad=True)
print(empty_tensor)
输出
tensor([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]], device='cuda:0', requires_grad=True)
@overload
def ones(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def ones(*size: _int, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def ones(size: Sequence[Union[_int, SymInt]], *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def ones(*size: _int, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
size:[行数,列数]
dtype(deepth type):数据类型
device:选择运算设备
requires_grad:是否进行自动求导,默认为False
@overload
def zeros(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def zeros(*size: _int, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def zeros(size: Sequence[Union[_int, SymInt]], *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def zeros(*size: _int, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload def rand(size: _size, *, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(*size: _int, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(size: _size, *, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(*size: _int, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(size: _size, *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(*size: _int, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def rand(*size: _int, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def randint(low: _int, high: _int, size: _size, *, generator: Optional[Generator]=None, dtype: Optional[_dtype]=None, device: Device=None, requires_grad: _bool=False) -> Tensor: ...
@overload
def randint(high: _int, size: _size, *, generator: Optional[Generator]=None, dtype: Optional[_dtype]=None, device: Device=None, requires_grad: _bool=False) -> Tensor: ...
@overload
def randint(high: _int, size: _size, *, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def randint(low: _int, high: _int, size: _size, *, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def randint(high: _int, size: _size, *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload
def randint(low: _int, high: _int, size: _size, *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
int_tensor=torch.randint(low=0,high=20,size=[5,6],device=gpu)
print(int_tensor)
输出
tensor([[18, 0, 14, 7, 18, 14],
[17, 0, 2, 0, 0, 3],
[16, 17, 5, 15, 1, 14],
[ 7, 12, 8, 6, 4, 11],
[12, 4, 7, 5, 3, 3]], device='cuda:0')
@overload def randn(size: _size, *, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(*size: _int, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(size: _size, *, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(*size: _int, generator: Optional[Generator], out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(size: _size, *, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(*size: _int, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ... @overload def randn(*size: _int, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
def tensor(data: Any, dtype: Optional[_dtype]=None, device: Device=None, requires_grad: _bool=False) -> Tensor: ...
sequence_tensor1=torch.tensor(np.array([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]]),
dtype=torch.float,device=gpu,requires_grad=True)
print(sequence_tensor1)
sequence_tensor2=torch.tensor([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]],
dtype=torch.float,device=gpu,requires_grad=True)
print(sequence_tensor2)
输出
tensor([[[1., 2., 3.],
[4., 5., 6.]],
[[9., 8., 7.],
[6., 5., 4.]]], device='cuda:0', requires_grad=True)
tensor([[[1., 2., 3.],
[4., 5., 6.]],
[[9., 8., 7.],
[6., 5., 4.]]], device='cuda:0', requires_grad=True)
def numpy(self, *args, **kwargs): # real signature unknown; NOTE: unreliably restored from __doc__ """ numpy(*, force=False) -> numpy.ndarray Returns the tensor as a NumPy :class:`ndarray`. If :attr:`force` is ``False`` (the default), the conversion is performed only if the tensor is on the CPU, does not require grad, does not have its conjugate bit set, and is a dtype and layout that NumPy supports. The returned ndarray and the tensor will share their storage, so changes to the tensor will be reflected in the ndarray and vice versa. If :attr:`force` is ``True`` this is equivalent to calling ``t.detach().cpu().resolve_conj().resolve_neg().numpy()``. If the tensor isn't on the CPU or the conjugate or negative bit is set, the tensor won't share its storage with the returned ndarray. Setting :attr:`force` to ``True`` can be a useful shorthand. Args: force (bool): if ``True``, the ndarray may be a copy of the tensor instead of always sharing memory, defaults to ``False``. """ pass
def item(self): # real signature unknown; restored from __doc__ """ item() -> number Returns the value of this tensor as a standard Python number. This only works for tensors with one element. For other cases, see :meth:`~Tensor.tolist`. This operation is not differentiable. Example:: >>> x = torch.tensor([1.0]) >>> x.item() 1.0 """ return 0
def dim(self): # real signature unknown; restored from __doc__
return 0
dtype = property(lambda self: object(), lambda self, v: None, lambda self: None) # default
def size(self, dim=None): # real signature unknown; restored from __doc__ """ size(dim=None) -> torch.Size or int Returns the size of the :attr:`self` tensor. If ``dim`` is not specified, the returned value is a :class:`torch.Size`, a subclass of :class:`tuple`. If ``dim`` is specified, returns an int holding the size of that dimension. Args: dim (int, optional): The dimension for which to retrieve the size. Example:: >>> t = torch.empty(3, 4, 5) >>> t.size() torch.Size([3, 4, 5]) >>> t.size(dim=1) 4 """ pass
def view(self, *shape): # real signature unknown; restored from __doc__ """ Example:: >>> x = torch.randn(4, 4) >>> x.size() torch.Size([4, 4]) >>> y = x.view(16) >>> y.size() torch.Size([16]) >>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions >>> z.size() torch.Size([2, 8]) >>> a = torch.randn(1, 2, 3, 4) >>> a.size() torch.Size([1, 2, 3, 4]) >>> b = a.transpose(1, 2) # Swaps 2nd and 3rd dimension >>> b.size() torch.Size([1, 3, 2, 4]) >>> c = a.view(1, 3, 2, 4) # Does not change tensor layout in memory >>> c.size() torch.Size([1, 3, 2, 4]) >>> torch.equal(b, c) False """ return _te.Tensor(*(), **{})
def t(self): # real signature unknown; restored from __doc__
"""
t() -> Tensor
See :func:`torch.t`
"""
return _te.Tensor(*(), **{})
def transpose(self, dim0, dim1): # real signature unknown; restored from __doc__
"""
transpose(dim0, dim1) -> Tensor
See :func:`torch.transpose`
"""
return _te.Tensor(*(), **{})
def permute(self, *dims): # real signature unknown; restored from __doc__
"""
permute(*dims) -> Tensor
See :func:`torch.permute`
"""
return _te.Tensor(*(), **{})
sequence_tensor=torch.tensor(np.array([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]]),
dtype=torch.float,device=gpu,requires_grad=True)
print(sequence_tensor)
sequence_tensor_permute=sequence_tensor.permute(2,1,0)
print(sequence_tensor_permute)
sequence_tensor_transpose=sequence_tensor.transpose(0,2)
print(sequence_tensor_transpose)
输出
tensor([[[1., 2., 3.], [4., 5., 6.]], [[9., 8., 7.], [6., 5., 4.]]], device='cuda:0', requires_grad=True) tensor([[[1., 9.], [4., 6.]], [[2., 8.], [5., 5.]], [[3., 7.], [6., 4.]]], device='cuda:0', grad_fn=<PermuteBackward0>) tensor([[[1., 9.], [4., 6.]], [[2., 8.], [5., 5.]], [[3., 7.], [6., 4.]]], device='cuda:0', grad_fn=<TransposeBackward0>)
可以看到两者的效果是一样的
假设有模型A和模型B,我们需要将A的输出作为B的输入,但训练时我们只训练模型B. 那么可以这样做:
input_B = output_A.detach()
它可以使两个计算图的梯度传递断开,从而实现我们所需的功能。
返回一个新的tensor,新的tensor和原来的tensor共享数据内存,但不涉及梯度计算,即requires_grad=False。修改其中一个tensor的值,另一个也会改变,因为是共享同一块内存。
sequence_tensor=torch.tensor(np.array([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]]),
dtype=torch.float,device=gpu,)
sequence_tensor_shallowCp=sequence_tensor.detach()
sequence_tensor_shallowCp+=1
print(sequence_tensor)
print(sequence_tensor_shallowCp.requires_grad)
输出
tensor([[[ 2., 3., 4.],
[ 5., 6., 7.]],
[[10., 9., 8.],
[ 7., 6., 5.]]], device='cuda:0')
False
sequence_tensor=torch.tensor(np.array([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]]),
dtype=torch.float,requires_grad=True,device=gpu)
sequence_tensor_deepCp=torch.tensor(sequence_tensor.to(cpu).detach().numpy())
sequence_tensor_deepCp+=1
print(sequence_tensor)
print(sequence_tensor_deepCp)
输出
tensor([[[1., 2., 3.],
[4., 5., 6.]],
[[9., 8., 7.],
[6., 5., 4.]]], device='cuda:0', requires_grad=True)
tensor([[[ 2., 3., 4.],
[ 5., 6., 7.]],
[[10., 9., 8.],
[ 7., 6., 5.]]])
def mean(self, dim=None, keepdim=False, *args, **kwargs): # real signature unknown; NOTE: unreliably restored from __doc__ """ mean(dim=None, keepdim=False, *, dtype=None) -> Tensor See :func:`torch.mean` """ pass def sum(self, dim=None, keepdim=False, dtype=None): # real signature unknown; restored from __doc__ """ sum(dim=None, keepdim=False, dtype=None) -> Tensor See :func:`torch.sum` """ return _te.Tensor(*(), **{}) def median(self, dim=None, keepdim=False): # real signature unknown; restored from __doc__ """ median(dim=None, keepdim=False) -> (Tensor, LongTensor) See :func:`torch.median` """ pass def mode(self, dim=None, keepdim=False): # real signature unknown; restored from __doc__ """ mode(dim=None, keepdim=False) -> (Tensor, LongTensor) See :func:`torch.mode` """ pass def norm(self, p="fro", dim=None, keepdim=False, dtype=None): r"""See :func:`torch.norm`""" if has_torch_function_unary(self): return handle_torch_function( Tensor.norm, (self,), self, p=p, dim=dim, keepdim=keepdim, dtype=dtype ) return torch.norm(self, p, dim, keepdim, dtype=dtype) def dist(self, other, p=2): # real signature unknown; restored from __doc__ """ dist(other, p=2) -> Tensor See :func:`torch.dist` """ return _te.Tensor(*(), **{}) def std(self, dim, unbiased=True, keepdim=False): # real signature unknown; restored from __doc__ """ std(dim, unbiased=True, keepdim=False) -> Tensor See :func:`torch.std` .. function:: std(unbiased=True) -> Tensor :noindex: See :func:`torch.std` """ return _te.Tensor(*(), **{}) def var(self, dim, unbiased=True, keepdim=False): # real signature unknown; restored from __doc__ """ var(dim, unbiased=True, keepdim=False) -> Tensor See :func:`torch.var` .. function:: var(unbiased=True) -> Tensor :noindex: See :func:`torch.var` """ return _te.Tensor(*(), **{}) def cumsum(self, dim, dtype=None): # real signature unknown; restored from __doc__ """ cumsum(dim, dtype=None) -> Tensor See :func:`torch.cumsum` """ return _te.Tensor(*(), **{}) def cumprod(self, dim, dtype=None): # real signature unknown; restored from __doc__ """ cumprod(dim, dtype=None) -> Tensor See :func:`torch.cumprod` """ return _te.Tensor(*(), **{})
def to(self, *args, **kwargs): # real signature unknown; restored from __doc__ """ Example:: >>> tensor = torch.randn(2, 2) # Initially dtype=float32, device=cpu >>> tensor.to(torch.float64) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64) >>> cuda0 = torch.device('cuda:0') >>> tensor.to(cuda0) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], device='cuda:0') >>> tensor.to(cuda0, dtype=torch.float64) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') >>> other = torch.randn((), dtype=torch.float64, device=cuda0) >>> tensor.to(other, non_blocking=True) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') """ return _te.Tensor(*(), **{})
以下为示例
sequence_tensor=torch.tensor(np.array([[[1,2,3],
[4,5,6]],
[[9,8,7],
[6,5,4]]]),
dtype=torch.float,device=gpu,requires_grad=True)
multi_tensor=sequence_tensor*3+1
multi_tensor_mean=multi_tensor.mean()
multi_tensor_mean.backward()
print(sequence_tensor.grad)
输出
tensor([[[0.2500, 0.2500, 0.2500],
[0.2500, 0.2500, 0.2500]],
[[0.2500, 0.2500, 0.2500],
[0.2500, 0.2500, 0.2500]]], device='cuda:0')
nn.Module是torch.nn提供的一个类,是pytorch中定义网络的必要的一个父类,在这个类中定义了很多有用的方法,使我们非常方便地计算。在我们进行网络的定义时,有两个地方需要特别注意:
class lr(nn.Module):
def __init__(self):
super(lr,self).__init__()
self.linear=nn.Linear(1,1)
def forward(self,x):
y_predict=self.linear(x)
return y_predict
其中,nn.Linear函数的参数为:输入的特征量,输出的特征量。
优化器(optimizer),即用以更新参数的方法,比如常见的随机梯度下降(stochastic gradient descent)(SGD)
在torch.nn中已经定义好了很多代价函数,只需要调用它们并且传入真实值、预测值,就可以返回结果,例如:
def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
return gca().plot(
*args, scalex=scalex, scaley=scaley,
**({"data": data} if data is not None else {}), **kwargs)
def scatter(
x, y, s=None, c=None, marker=None, cmap=None, norm=None,
vmin=None, vmax=None, alpha=None, linewidths=None, *,
edgecolors=None, plotnonfinite=False, data=None, **kwargs):
__ret = gca().scatter(
x, y, s=s, c=c, marker=marker, cmap=cmap, norm=norm,
vmin=vmin, vmax=vmax, alpha=alpha, linewidths=linewidths,
edgecolors=edgecolors, plotnonfinite=plotnonfinite,
**({"data": data} if data is not None else {}), **kwargs)
sci(__ret)
return __ret
def savefig(*args, **kwargs):
fig = gcf()
res = fig.savefig(*args, **kwargs)
fig.canvas.draw_idle() # Need this if 'transparent=True', to reset colors.
return res
def show(*args, **kwargs):
_warn_if_gui_out_of_main_thread()
return _get_backend_mod().show(*args, **kwargs)
if __name__=="__main__": import torch import numpy as np from torch import nn from torch import optim from matplotlib import pyplot gpu=torch.device("cuda") cpu="cpu" #定义网络 class lr(nn.Module): def __init__(self): #继承成员变量 super(lr,self).__init__() self.linear=nn.Linear(1,1) #定义前向传播函数 def forward(self,x): y_predict=self.linear(x) return y_predict #准备数据 x_train=torch.rand([200,1],device=gpu) y_train=torch.matmul(x_train,torch.tensor([[3]],dtype=torch.float32,requires_grad=True,device=gpu))+8 #实例化 model_lr=lr().to(gpu) optimizer=optim.SGD(model_lr.parameters(),0.02) cost_fn=nn.MSELoss() #开始计算 for i in range(1000): y_predict=model_lr.forward(x_train) cost=cost_fn(y_predict,y_train) optimizer.zero_grad() cost.backward(retain_graph=True) optimizer.step() if i%20==0: print(cost.item()) print(list(model_lr.parameters())) #进行预测与评估 model_lr.eval() y_predict_numpy=model_lr.forward(x_train).to(cpu).detach().numpy() x_train_numpy=x_train.to(cpu).detach().numpy() y_train_numpy=y_train.to(cpu).detach().numpy() pyplot.scatter(x_train_numpy,y_predict_numpy,c="r") pyplot.plot(x_train_numpy,y_train_numpy) pyplot.show()
输出
4.7310328227467835e-05
[Parameter containing:
tensor([[3.0237]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([7.9876], device='cuda:0', requires_grad=True)]
绘制图
在pytorch中提供了数据集的父类torch.utils.data.Dataset,继承这个父类,我们可以非常快速地实现对数据的加载,与继承nn.Module类一样,我们同样必须定义一些必要的成员函数
SMSData_path="D:\Desktop\PycharmProjects\exercise\SMSSpamCollection" #数据来源:http://archive.ics.uci.edu/ml/machine-learning-databases/00228/ class SMSData(Dataset): def __init__(self): self.data=open(SMSData_path,"r",encoding="utf-8").readlines() def __getitem__(self, index): current_line=self.data[index].strip() label=current_line[:4].strip() content=current_line[4:].strip() return [label,content] def __len__(self): return len(self.data) SMSex=SMSData() print(SMSex.__getitem__(5)) print(SMSex.__len__())
输出
['spam', "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv"]
5574
class DataLoader(Generic[T_co]):
def __init__(self, dataset: Dataset[T_co], batch_size: Optional[int] = 1,
shuffle: Optional[bool] = None, sampler: Union[Sampler, Iterable, None] = None,
batch_sampler: Union[Sampler[Sequence], Iterable[Sequence], None] = None,
num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None,
pin_memory: bool = False, drop_last: bool = False,
timeout: float = 0, worker_init_fn: Optional[_worker_init_fn_t] = None,
multiprocessing_context=None, generator=None,
*, prefetch_factor: int = 2,
persistent_workers: bool = False,
pin_memory_device: str = ""):
#只列出参数表,以下详细内容不再列出
dataset:以Dataset类为父类的自定义类的实例化对象
batch_size:批处理的个数
shuffle:bool类型,若为True则表示提前打乱数据
num_workers:加载数据时用到的线程数
drop_last :bool类型,若为True:这个是对最后的未完成的batch来说的,比如你的batch_size设置为64,而一个训练集只有100个样本,那么训练的时候后面的36个就被扔掉了。如果为False(默认),那么会继续正常执行,只是最后的batch_size会小一点。
timeout:如果是正数,表明等待从worker进程中收集一个batch等待的时间,若超出设定的时间还没有收集到,那就不收集这个内容了。这个numeric应总是大于等于0,默认为0
import torch from torch.utils.data import Dataset,DataLoader import chardet gpu = torch.device("cuda") cpu="cpu" try: SMSData_path="SMSSpamCollection" #获取文件编码方式 with open(SMSData_path,"rb") as file: file_format=chardet.detect(file.read())["encoding"] class SMSData(Dataset): def __init__(self): self.data=open(SMSData_path,"r",encoding=file_format).readlines() def __getitem__(self, index): current_line=self.data[index].strip() origin=current_line[:4].strip() content=current_line[4:].strip() return [origin,content] def __len__(self): return len(self.data) SMSex=SMSData() SMSData_loader=DataLoader(dataset=SMSex,batch_size=2,shuffle=False,num_workers=2) if __name__=='__main__':#如果设置多线程,一定要加这句话,否则会报错 for i in SMSData_loader: print("遍历一:",i) break for i in enumerate(SMSData_loader): print("遍历二:",i) break for batch_index,(label,content) in enumerate(SMSData_loader): print("遍历三:",batch_index,label,content) break except BaseException as error: print(error)
输出
遍历一: [('ham', 'ham'), ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')]
遍历二: (0, [('ham', 'ham'), ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')])
遍历三: 0 ('ham', 'ham') ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')
class ToTensor:
def __init__(self) -> None:
_log_api_usage_once(self)
class Normalize(torch.nn.Module):
def __init__(self, mean, std, inplace=False):
super().__init__()
_log_api_usage_once(self)
self.mean = mean
self.std = std
self.inplace = inplace
mean:数据类型为元组,元组的长度取决于通道数
std:数据类型为元组,元组的长度取决于通道数
class Compose:
def __init__(self, transforms):
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(self)
self.transforms = transforms
transforms:数据类型为列表,列表中每个元素都是transforms模块中的一个类,如ToTensor和Normalize(隐式构造)。
import torchvision
if __name__ == '__main__':
MNIST=torchvision.datasets.MNIST(root="./data",train=True,download=False,transform=None)
MNIST_normalize=torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0),(1))])(MNIST[0][0])
print(MNIST_normalize)
import torchvision import torch from torch.utils.data import DataLoader from torch import nn from torch import optim from torch.nn import functional as Activate from matplotlib import pyplot # 定义所用网络 class ExNet(nn.Module): def __init__(self): # super函数调用 super(ExNet, self).__init__() # 卷积层1 self.conv1 = nn.Conv2d(1, 15, 5) ''' 输入通道数1,输出通道数15,核的大小5,输入必须为1,输出可以自定义 ''' # 卷积层2 self.conv2 = nn.Conv2d(15, 30, 3) ''' 输入通道数15,输出通道数30,核的大小3,输入必须与上层的输出一致,输出可以自定义 ''' # 全连接层1 self.fully_connected_1 = nn.Linear(30 * 10 * 10, 40) ''' MNIST原始图像是1*28*28,输入为batch_size*1*28*28,经过卷积层1后,变为batch_size*15*24*24 经过池化层后,变为batch_size*15*12*12 经过卷积层2后,变为batch_size*30*10*10 这个全连接层的第一层输入个数就是这么来的 ''' # 全连接层2 self.fully_connected_2 = nn.Linear(40, 10) ''' 输入与上层保持一致 由于要鉴别十个数字,因此输出层的神经元个数必须是10 ''' # 定义前向传播 def forward(self, x): in_size = x.size(0) # 在本例中in_size,也就是BATCH_SIZE的值。输入的x可以看成是batch_size*1*28*28的张量。 # 卷积层1 out = self.conv1(x) # batch*1*28*28 -> batch*15*24*24 out = Activate.relu(out) # 调用ReLU激活函数 # 池化层 out = Activate.max_pool2d(out, 2, 2) # batch*15*24*24 -> batch*15*12*12(2*2的池化层会减半) # 卷积层2 out = self.conv2(out) # batch*15*12*12 -> batch*30*10*10 out = Activate.relu(out) # 调用ReLU激活函数 # flatten处理 out = out.view(in_size, -1) # 全连接层1 out = self.fully_connected_1(out) out = Activate.relu(out) # 全连接层2 out = self.fully_connected_2(out) # 归一化处理,以便进行交叉熵代价函数的运算 out = Activate.log_softmax(out, dim=1) return out # 开始训练 def train(the_model, the_device, train_loader, the_optimizer, the_epoch): # 模型相关设置 the_model=the_model.to(device=the_device) the_model.train(mode=True) # 用来绘制图像的变量 list_times = [] list_cost = [] # 每轮循环 for batch_idx, (data, target) in enumerate(train_loader): # 转移到指定设备上计算 data = data.to(the_device);target = target.to(the_device) # 优化器参数重置 the_optimizer.zero_grad() # 向前计算 output = the_model.forward(data) # 计算误差 cost = Activate.nll_loss(output, target) # 反向传播 cost.backward() # 参数更新 the_optimizer.step() # 打印信息 if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( the_epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), cost.item())) print(batch_idx, cost.item()) list_times.append(batch_idx) list_cost.append(cost.item()) # 绘制图像 pyplot.scatter(list_times, list_cost) pyplot.savefig("costImage.jpg") pyplot.show() return def test(the_model, the_device, the_test_loader): # 设置训练模式 the_model=the_model.to(device=the_device) the_model.eval() # 测试的结果集 acc_vector = [] cost_vector = [] #开始测试 with torch.no_grad(): for index, (data, target) in enumerate(the_test_loader): # 转移到指定设备上计算 data = data.to(the_device);target = target.to(the_device) # 向前计算 output = the_model.forward(data) # 计算误差 cost = Activate.nll_loss(output, target) cost_vector.append(cost) pred = output.max(dim=1)[-1] # output的尺寸是[batch_size,10],对每行取最大值,返回索引编号,即代表模型预测手写数字的结果 cur_acc = pred.eq(target).float().mean() # 均值代表每组batch_size中查准率 acc_vector.append(cur_acc) # 打印结果 print("平均查准率:{}".format(sum(acc_vector)/len(acc_vector))) print("average cost:{}".format(sum(cost_vector)/len(cost_vector))) return if __name__ == '__main__': gpu = torch.device("cuda") cpu = "cpu" # 准备数据 transAndNorm = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0), (1))]) MNISTData = torchvision.datasets.MNIST(root="./data", train=True, download=False, transform=transAndNorm) MNISTtest = torchvision.datasets.MNIST(root="./data", train=False, download=False, transform=transAndNorm) MNISTData_loader = DataLoader(dataset=MNISTData, batch_size=10, shuffle=True) MNISTtest_loader = DataLoader(dataset=MNISTtest, batch_size=10, shuffle=True) # 实例化网络和优化器 MNISTnet_Ex = ExNet() MNIST_optimizer = optim.Adam(MNISTnet_Ex.parameters(), lr=0.001) # lr(learning rate)是学习率 for i in range(1,2): train(the_model=MNISTnet_Ex, the_device=gpu, train_loader=MNISTData_loader, the_optimizer=MNIST_optimizer, the_epoch=i) test(the_model=MNISTnet_Ex, the_device=gpu, the_test_loader=MNISTtest_loader)
输出
平均查准率:0.9804015159606934
average cost:0.061943911015987396
散点图
在刚刚的MNIST手写数字识别分类任务中,我们使用的数据集是pytorch官方内置的图片数据集。现在,我们要从零开始,尝试制作我们自己的数据集。
Oxford 102 Flower 是一个图像分类数据集,由 102 个花卉类别组成。被选为英国常见花卉的花卉。每个类别由 40 到 258 张图像组成。图像具有大尺度、姿势和光线变化。此外,还有一些类别在类别内有很大的变化,还有几个非常相似的类别。这里是flower102数据集的下载地址。解压后的文件目录如下:
如第三章一样建立即可,如下:
import torch
from torch.utils.data import Dataset
import os
gpu = torch.device("cuda")
cpu = "cpu"
class flower102(Dataset):
def __init__(self,root,resize,mode):
super(flower102,self).__init__()
pass
def __len__(self):
pass
def __getitem__(self, item):
pass
在训练集中,这102种花的类别名称如上图所示(我这里是经过重命名的),我们定义名称flower1
为数字标签1
,这样我们就建立了一个映射。接下来,稍微修改一下构造函数,就可以实现全部的映射。如下:
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 def __len__(self): pass def __getitem__(self, idx): pass # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
结果如下:
{'flower1': 0, 'flower10': 1, 'flower100': 2, 'flower101': 3, 'flower102': 4, 'flower11': 5, 'flower12': 6, 'flower13': 7, 'flower14': 8, 'flower15': 9, 'flower16': 10, 'flower17': 11, 'flower18': 12, 'flower19': 13, 'flower2': 14, 'flower20': 15, 'flower21': 16, 'flower22': 17, 'flower23': 18, 'flower24': 19, 'flower25': 20, 'flower26': 21, 'flower27': 22, 'flower28': 23, 'flower29': 24, 'flower3': 25, 'flower30': 26, 'flower31': 27, 'flower32': 28, 'flower33': 29, 'flower34': 30, 'flower35': 31, 'flower36': 32, 'flower37': 33, 'flower38': 34, 'flower39': 35, 'flower4': 36, 'flower40': 37, 'flower41': 38, 'flower42': 39, 'flower43': 40, 'flower44': 41, 'flower45': 42, 'flower46': 43, 'flower47': 44, 'flower48': 45, 'flower49': 46, 'flower5': 47, 'flower50': 48, 'flower51': 49, 'flower52': 50, 'flower53': 51, 'flower54': 52, 'flower55': 53, 'flower56': 54, 'flower57': 55, 'flower58': 56, 'flower59': 57, 'flower6': 58, 'flower60': 59, 'flower61': 60, 'flower62': 61, 'flower63': 62, 'flower64': 63, 'flower65': 64, 'flower66': 65, 'flower67': 66, 'flower68': 67, 'flower69': 68, 'flower7': 69, 'flower70': 70, 'flower71': 71, 'flower72': 72, 'flower73': 73, 'flower74': 74, 'flower75': 75, 'flower76': 76, 'flower77': 77, 'flower78': 78, 'flower79': 79, 'flower8': 80, 'flower80': 81, 'flower81': 82, 'flower82': 83, 'flower83': 84, 'flower84': 85, 'flower85': 86, 'flower86': 87, 'flower87': 88, 'flower88': 89, 'flower89': 90, 'flower9': 91, 'flower90': 92, 'flower91': 93, 'flower92': 94, 'flower93': 95, 'flower94': 96, 'flower95': 97, 'flower96': 98, 'flower97': 99, 'flower98': 100, 'flower99': 101}
在建立了从名称到数字标签的映射后,我们希望有一个csv文件,里面存储了所有的图片路径及其数字标签,接下来,我们将定义一个load_csv函数去完成这件事,如下:
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 if mode == "train": self.images, self.labels = self.load_csv("images_train.csv") elif mode == "valid": self.images, self.labels = self.load_csv("images_valid.csv") else: raise Exception("invalid mode!", self.mode) # 加载CSV文件并返回图像路径和标签列表 def load_csv(self, filename): # 如果CSV文件不存在,则根据训练集目录和映射关系生成CSV文件 if not os.path.exists(os.path.join(self.root, filename)): images = [] for name in self.cat2label.keys(): images += glob.glob(os.path.join(self.root, self.mode, name, "*.png")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpg")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpeg")) random.shuffle(images) with open(os.path.join(self.root, filename), mode="w", newline="") as f: writer = csv.writer(f) for img in images: label = self.cat2label[img.split(os.sep)[-2]] writer.writerow([img, label]) print("written into csv file:", filename) # 从CSV文件中读取图像路径和标签 images = [] labels = [] with open(os.path.join(self.root, filename)) as f: reader = csv.reader(f) for row in reader: img, label = row label = int(label) images.append(img) labels.append(label) assert len(images) == len(labels) return images, labels # 反归一化 def denormalize(self, x_hat): pass def __len__(self): pass def __getitem__(self, idx): pass # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
然后,我们获得了一个如下的csv文件:
在完成了load_csv函数后,这个数据集基本制作完成,接下来只需要完善__len__函数和__getitem__函数,并定义transform过程即可。
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 if mode == "train": self.images, self.labels = self.load_csv("images_train.csv") elif mode == "valid": self.images, self.labels = self.load_csv("images_valid.csv") else: raise Exception("invalid mode!", self.mode) # 加载CSV文件并返回图像路径和标签列表 def load_csv(self, filename): # 如果CSV文件不存在,则根据训练集目录和映射关系生成CSV文件 if not os.path.exists(os.path.join(self.root, filename)): images = [] for name in self.cat2label.keys(): images += glob.glob(os.path.join(self.root, self.mode, name, "*.png")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpg")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpeg")) random.shuffle(images) with open(os.path.join(self.root, filename), mode="w", newline="") as f: writer = csv.writer(f) for img in images: label = self.cat2label[img.split(os.sep)[-2]] writer.writerow([img, label]) print("written into csv file:", filename) # 从CSV文件中读取图像路径和标签 images = [] labels = [] with open(os.path.join(self.root, filename)) as f: reader = csv.reader(f) for row in reader: img, label = row label = int(label) images.append(img) labels.append(label) assert len(images) == len(labels) return images, labels # 反归一化 def denormalize(self, x_hat): # x_hat = (x - mean) / std # x = x_hat * std + mean # x.size(): [c, h, w] # mean.size(): [3] => [3, 1, 1] mean = torch.tensor(self.mean).unsqueeze(1).unsqueeze(1) std = torch.tensor(self.std).unsqueeze(1).unsqueeze(1) x = x_hat * std + mean return x def __len__(self): # 返回数据集中样本的数量 return len(self.images) def __getitem__(self, idx): # 根据索引获取图像和标签 img, label = self.images[idx], self.labels[idx] # 定义数据的预处理操作 tf = transforms.Compose([ lambda x: Image.open(x).convert("RGB"), # 以RGB格式打开图像 transforms.Resize((int(self.resize * 1.25), int(self.resize * 1.25))), # 调整图像大小为resize的1.25倍 transforms.RandomRotation(15), # 随机旋转图像(最大旋转角度为15度) transforms.CenterCrop(self.resize), # 将图像中心裁剪为resize大小 transforms.ToTensor(), # 将图像转换为Tensor类型 transforms.Normalize(mean=self.mean, std=self.std), # 归一化图像 ]) # 对图像进行预处理操作 img = tf(img) label = torch.tensor(label) return img, label # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
if __name__=='__main__' :
loader = DataLoader(dataset=db, shuffle=True,num_workers=1,batch_size=8)
import matplotlib.pyplot as plt
data,target=next(iter(db))
print(data.shape)
plt.imshow(transforms.ToPILImage()(db.denormalize(data)))
plt.show()
成功显示:
我们要保存的是:
def save(
obj: object,
f: FILE_LIKE,
pickle_module: Any = pickle,
pickle_protocol: int = DEFAULT_PROTOCOL,
_use_new_zipfile_serialization: bool = True
) -> None:...
torch.save(MNISTnet_Ex.state_dict(),"MNIST.pt")
torch.save(optimzer.state_dict(),"optimizer.pt")
MNISTnet_Ex.load_state_dict(torch.load("MNIST.pt"))
optimzer.load_state_dict(torch.load("optimizer.pt"))
pytoch官方提供了不少与训练的模型可供使用,如下:
关于这些模型的详细用途,可以自行前往pytorch官网查阅相关资料,具体原理本文不再涉及。
在使用预训练模型的过程中,最重要的一步是,确定这个预训练模型中哪些参数是需要训练的,哪些参数是不需要训练的,哪些参数是要修改的。
首先,查看一下resnet50的网络结构:
import torchvision.models as models
print(models.resnet50(pretrained=True))
Resnet(
...
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2048, out_features=1000, bias=True)
)
看到最后一层是一个1000分类的全连接层,而我们第五章制作的数据集里,只需要102分类,因此,我们选择只修改最后一层的参数并训练。如下所示:
import torchvision.models as models import torch.nn as nn def set_parameter_requires_grad(model,need_train): if not need_train: for para in model.parameters(): para.requires_grad = False return def initalize_resnet50(num_classes,need_train=False,pretrained=True): trained_model=models.resnet50(pretrained=pretrained) input_size=224 set_parameter_requires_grad(trained_model, need_train) trained_model.fc = nn.Sequential( nn.Linear(trained_model.fc.in_features, num_classes), nn.LogSoftmax(dim=1), ) # trained_model.fc = nn.Sequential( # nn.Linear(trained_model.fc.in_features, num_classes), # nn.Flatten(), # ) return trained_model,input_size resnet50,input_size=initalize_resnet50(num_classes=102,need_train=False,pretrained=True)
训练的流程和记录如第四章所示即可,如下:
import copy # 导入copy模块,用于深拷贝对象 import os.path # 导入os.path模块,用于操作文件路径 import time # 导入time模块,用于计时 def train(model, dataLoader, criterion, optimzer, num_epoch, device, filename): """ 训练函数 Args: model: 模型对象 dataLoader: 数据加载器 criterion: 损失函数 optimzer: 优化器 num_epoch: 迭代次数 device: 计算设备 filename: 保存模型的文件名 Returns: model: 训练后的模型 train_acc_history: 训练集准确率历史 train_losses: 训练集损失历史 l_rs: 优化器学习率历史 """ since = time.time() # 获取当前时间 best_epoch = {"epoch": -1, "acc": 0 } # 存储最佳模型的epoch和准确率 model.to(device) # 将模型移动到计算设备上 train_acc_history = [] # 存储训练集准确率历史 train_losses = [] # 存储训练集损失历史 l_rs = [optimzer.param_groups[0]['lr']] # 存储优化器学习率历史 best_model_wts = copy.deepcopy(model.state_dict()) # 深拷贝当前模型的权重作为最佳模型权重 for epoch in range(num_epoch): # 迭代训练 print("Epoch {}/{}".format(epoch, num_epoch - 1)) print('*' * 10) running_loss = 0.0 # 初始化损失总和 running_correct = 0.0 # 初始化正确预测的样本数总和 for data, target in dataLoader: # 遍历数据加载器中的每个批次 data = data.to(device) # 将输入数据移动到计算设备上 target = target.to(device) # 将目标数据移动到计算设备上 optimzer.zero_grad() # 清零梯度 output = model.forward(data) # 前向传播 loss = criterion(output, target) # 计算损失 pred = output.argmax(dim=1) # 获取预测结果 loss.backward() # 反向传播 optimzer.step() # 更新参数 running_loss += loss.item() * data.size(0) # 累加损失 running_correct += torch.eq(pred, target).sum().float().item() # 累加正确预测的样本数 epoch_loss = running_loss / len(dataLoader.dataset) # 计算平均损失 epoch_acc = running_correct / len(dataLoader.dataset) # 计算准确率 time_elapsed = time.time() - since # 计算训练时间 print("Time elapsed {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) print("Loss: {:4f} Acc:{:.4f}".format(epoch_loss, epoch_acc)) train_acc_history.append(epoch_acc) # 将准确率添加到历史列表中 train_losses.append(epoch_loss) # 将损失添加到历史列表中 if (epoch_acc > best_epoch["acc"]): # 更新最佳模型信息 best_epoch = { "epoch": epoch, "acc": epoch_acc } best_model_wts = copy.deepcopy(model.state_dict()) # 深拷贝当前模型权重作为最佳模型权重 state = { "state_dict": model.state_dict(), "best_acc": best_epoch["acc"], "optimzer": optimzer.state_dict(), } torch.save(state, filename) # 保存最佳模型的状态字典到文件 print("Optimzer learning rate : {:.7f}".format(optimzer.param_groups[0]['lr'])) # 打印当前优化器学习率 l_rs.append(optimzer.param_groups[0]['lr']) # 将当前优化器学习率添加到历史列表中 print() time_elapsed = time.time() - since # 计算总训练时间 print("Training complete in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) print("Best epoch:", best_epoch) model.load_state_dict(best_model_wts) # 加载最佳模型权重 return model, train_acc_history, train_losses, l_rs if __name__ == "__main__": import torch import Net import torch.nn as nn import torch.optim as optim optimzer = optim.Adam(params=Net.resnet50.parameters(), lr=1e-2) # 创建Adam优化器 sche = optim.lr_scheduler.StepLR(optimizer=optimzer, step_size=10, gamma=0.5) # 创建学习率调度器 criterion = nn.NLLLoss() # 创建负对数似然损失函数 #criterion=nn.CrossEntropyLoss() import flower102 from torch.utils.data import DataLoader db = flower102.flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=Net.input_size, mode="train") # 创建数据集对象 loader = DataLoader(dataset=db, shuffle=True, num_workers=1, batch_size=5) # 创建数据加载器 model = Net.resnet50 # 创建模型对象 filename = "checkpoint.pth" # 模型保存文件名 if os.path.exists(filename): # 如果存在模型文件 checkpoint = torch.load(filename) # 加载模型状态字典 model.load_state_dict(checkpoint["state_dict"]) # 加载模型权重 model, train_acc_history, train_loss, LRS = train(model=model, dataLoader=loader, criterion=criterion, optimzer=optimzer, num_epoch=5, device=torch.device("cuda"), filename=filename)
下面是我训练5轮的结果:
Epoch0/4 ********** Time elapsed 0m 37s Loss: 11.229704 Acc:0.3515 Optimzer learning rate : 0.0100000 Epoch1/4 ********** Time elapsed 1m 12s Loss: 8.165128 Acc:0.5697 Optimzer learning rate : 0.0100000 Epoch2/4 ********** Time elapsed 2m 4s Loss: 7.410833 Acc:0.6363 Optimzer learning rate : 0.0100000 Epoch3/4 ********** Time elapsed 2m 60s Loss: 6.991850 Acc:0.6822 Optimzer learning rate : 0.0100000 Epoch4/4 ********** Time elapsed 3m 44s Loss: 6.482804 Acc:0.7128 Optimzer learning rate : 0.0100000 Training complete in 3m 44s Best epoch: {'epoch': 4, 'acc': 0.7127594627594628}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。