当前位置:   article > 正文

Pytorch_Learn_def forward(self, preds: tensor, labels: tensor) -

def forward(self, preds: tensor, labels: tensor) -> dict(str, tensor): typee

Pytorch

题外话:Pycharm的快捷操作

  1. Ctrl+O:可以选择重写类的方法
  2. Ctrl+I:可以选择实现类的方法
  3. Alt+insert:可以直接选择重写还是实现类的方法
  4. Ctrl+P:提示函数的参数
  5. Tab:对段落进行统一的 缩进
  6. Ctrl+/:对选中的部分进行注释,再点一下就会取消注释
  7. Ctrl+d:光标在上一行的末尾,按下Ctrl+d,会复制上一行

一、加载数据

Dataset:提供一种方式去获取数据及其label

①如何获取每一个数据及其label

②告诉我们有多少的数据

Dataloader:为后面的网络提供不同的数据形式

from torch.utils.data import Dataset
from PIL import Image  # 读取图片的库,可以对图片进行可视化
import os  # 对文件路径进行操作


#  help(Dataset)

class MyData(Dataset):  # 创建读取数据的类
    def __init__(self, root_dir, label_dir):  # 为CLASS提供一个全局变量
        self.root_dir = root_dir
        self.label_dir = label_dir
        self.path = os.path.join(self.root_dir, self.label_dir)  # 对root_path和label_path进行合并,得到总的路径path
        self.image_path = os.listdir(self.path)  # 返回得到path所包含的数据的文件列表

    def __getitem__(self, index):
        image_name = self.image_path[index] # 得到指定的文件名称
        image_item_path = os.path.join(self.root_dir, self.label_dir, image_name)  # 得到目标文件的的具体路径
        image = Image.open(image_item_path)  # 打开文件,获取文件的信息
        label = self.label_dir  # 的到上一级文件
        return image, label

    def __len__(self):
        return len(self.image_path)  # 得到长度


root_dir = "hymenoptera_data/train"
ants_dir = "ants"
bees_dir = "bees"
ants_dataset = MyData(root_dir, ants_dir)  # 得到了蚂蚁的数据集
bees_dataset = MyData(root_dir, bees_dir)  # 得到了蜜蜂的数据集
train_dataset=ants_dataset + bees_dataset  # 将ants和bees,两个数据集进行拼接


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33

二、TensorBord

SummaryWriter

作用:允许训练程序调用方法以直接从训练循环将数据添加到文件中,而不会减慢训练速度。

1.初始化函数__init__()

#init,SummaryWriter初始化函数
def __init__(self,log_dir=None,comment='',purge_step=None,max_queue=10,flush_secs=120,filename_suffix='')
#都是默认值

log_dir:用以保存summary的位置,SummaryWriter生成的writer实例的第一个参数都是 logs ,那么,我的当前代码所在文件夹下方就会出现一个名为 logs 的文件夹里面装的就是summary,
如果无参数,则会生成默认的存储文件夹:runs/时间日期

comment:在存储文件夹后面加上后缀,默认不指定 log_dir时的文件后缀。

filename_suffix:文件名后缀

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

2.生成文件

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs")#生成存储文件logs

#消除理合现象:不同的模型相互纠缠
#1.就是新建一个tag
#2.删除之前的文件,退出终端,再运行
当我们在同一个tag下进行可视化展示时,函数的形状都会出现在同一个表格中。
如果我们想要分开进行展示的话,可以更换一个tag;如果想要删除数据,可以直接删除writer.add.scalar()生成的文件即可

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
#在终端中打开,启用tnesorboard
tensorboard --logdir="logs"#在网页中打开,启用默认端口
tensorboard --logdir="logs" --port=number#人为控制端口,启用number端口
  • 1
  • 2
  • 3

3.add_scalar():标量可视化

   #函数原型
    def add_scalar(
        self, tag, scalar_value, global_step=None, walltime=None, new_style=False
    ):

		Args:
            tag (string): Data identifier            #标题
                
            scalar_value (float or string/blobname): Value to save          #Y轴
                
            global_step (int): Global step value to record          #X轴
                
            walltime (float): Optional override default walltime (time.time())
              with seconds after epoch of event 
            #可选参数,记录发生的时间,默认为time.time()
            
            new_style (boolean): Whether to use new style (tensor field) or old
              style (simple_value field). New style could lead to faster data loading.

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
writer.add_scalar("y=x",y,x)#y为Y轴的变量,x为X轴的变量,"y=x",为名称
  • 1

4.add_image():打开图像

从PIL到numpy,需要在add_image()中指定shape中每一个数组/维表示的含义,即加上dataformats

#函数原型
add_image(self, tag, img_tensor, global_step=None, waltime=None,dataformats='CHW'):
        
        Add image data to summary.

        Note that this requires the ``pillow`` package.

        Args:
            tag (string): Data identifier        #标题,相当于图片的一级标题,改变tag相当于新建一个图相册
            
            img_tensor (torch.Tensor, numpy.array, or string/blobname): 
            Image data   图像数据类型
            
            global_step (int): Global step value to record       #记录的步骤,也就是在同一个title/tag下面,根据不同的step有不同的图片,相当于二级标题
            
            walltime (float): Optional override default walltime (time.time())
              seconds after epoch of event

            
#注意:add_image(np.array)此时pillow的版本要低于10.0.0,可以安装9.5.0            
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
writer= SummaryWriter("logs")
image_path="data/train/ants_image/0013035.jpg"#图像的相对路径
image_array = np.array(image_path)#转换为numpy.array类型

writer.add_image("test",image_array,1,dataformats='HWC')
#test,是名称,image_array是数组,1是指定的步数
#dataformats='HWC'代表格式,H为长度,W为宽度,C为通道

for i in range(100):
    writer.add_scalar("y=x", i, i)

writer.close()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

5.add_graph():模型结构图可视化

writer.add_graph(net,input_to_model = torch.rand(batch_size,channels,H,W))
  • 1

三、Transforms

图像变换

1.ToTensor():变为totensor类型

将PIL Image / numpy.ndarray转化为tensor类型

#函数原型:
class ToTensor:#这是一个类,需要创建对象
    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.

    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
    or if the numpy.ndarray has dtype = np.uint8

    In the other cases, tensors are returned without scaling.

    .. note::
        Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
        transforming target image masks. See the `references`_ for implementing the transforms for image masks.

    .. _references: https://github.com/pytorch/vision/tree/main/references/segmentation
    """

    def __init__(self) -> None:
        _log_api_usage_once(self)

    def __call__(self, pic):
        """
        Args:
            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.

        Returns:
            Tensor: Converted image.
        """
        return F.to_tensor(pic)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}()"


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
from  PIL import Image

writer =SummaryWriter('logs')
image_PIL = Image.open("data/train/bees_image/17209602_fe5a5a746f.jpg")

trans_totensor =transforms.ToTensor()#创建对象
image_tensor=trans_totensor(image_PIL)
writer.add_image("Totensor",image_tensor)
writer.close()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

2.Normalize():归一化

#函数原型:
class Normalize(torch.nn.Module):
    """Normalize a tensor image with mean and standard deviation.
    This transform does not support PIL Image.
    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
    channels, this transform will normalize each channel of the input
    ``torch.*Tensor`` i.e.,
    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``

    .. note::
        This transform acts out of place, i.e., it does not mutate the input tensor.

    Args:
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channel.
        inplace(bool,optional): Bool to make this operation in-place.

    """

    def __init__(self, mean, std, inplace=False):
        super().__init__()
        _log_api_usage_once(self)
        self.mean = mean
        self.std = std
        self.inplace = inplace

    def forward(self, tensor: Tensor) -> Tensor:
        """
        Args:
            tensor (Tensor): Tensor image to be normalized.

        Returns:
            Tensor: Normalized Tensor image.
        """
        return F.normalize(tensor, self.mean, self.std, self.inplace)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(mean={self.mean}, std={self.std})"

    
#Normalize a tensor image with mean and standard deviation.就是要带着tensor image的平均值和标准差
#计算公式:output[channel] = (input[channel] - mean[channel]) / std[channel],

#channel是通道/维度,通常jpg图片是由RGB三色组成,所以一般channel一般为三维,而png图片是由四通道组成的,多了一个透明度的参数,所以要设置4 channels
(input-0.5)/0.5==2*input-1
如果output==[0,1],那么result==[-1,1]
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
trans_norm=transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
image_normal=trans_norm(image_tensor)
writer.add_image("Normalize",image_normal)
  • 1
  • 2
  • 3

3.Resize():变换图片的大小

#函数原型:
class Resize(torch.nn.Module):
    """Resize the input image to the given size.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions

    .. warning::
        The output image might be different depending on its type: when downsampling, the interpolation of PIL images
        and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
        in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
        types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
        closer.

    Args:
        size (sequence or int): Desired output size. If size is a sequence like
            (h, w), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size).

            .. note::
                In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
        interpolation (InterpolationMode): Desired interpolation enum defined by
            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
            ``InterpolationMode.BICUBIC`` are supported.
            For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
            but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
        max_size (int, optional): The maximum allowed for the longer edge of
            the resized image: if the longer edge of the image is greater
            than ``max_size`` after being resized according to ``size``, then
            the image is resized again so that the longer edge is equal to
            ``max_size``. As a result, ``size`` might be overruled, i.e the
            smaller edge may be shorter than ``size``. This is only supported
            if ``size`` is an int (or a sequence of length 1 in torchscript
            mode).
        antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
            is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
            ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
            closer.

            .. warning::
                There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.

    """

    def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
        super().__init__()
        _log_api_usage_once(self)
        if not isinstance(size, (int, Sequence)):
            raise TypeError(f"Size should be int or sequence. Got {type(size)}")
        if isinstance(size, Sequence) and len(size) not in (1, 2):
            raise ValueError("If size is a sequence, it should have 1 or 2 values")
        self.size = size
        self.max_size = max_size

        # Backward compatibility with integer value
        if isinstance(interpolation, int):
            warnings.warn(
                "Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. "
                "Please use InterpolationMode enum."
            )
            interpolation = _interpolation_modes_from_int(interpolation)

        self.interpolation = interpolation
        self.antialias = antialias

    def forward(self, img):
        """
        Args:
            img (PIL Image or Tensor): Image to be scaled.

        Returns:
            PIL Image or Tensor: Rescaled image.
        """
        return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)

    def __repr__(self) -> str:
        detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})"
        return f"{self.__class__.__name__}{detail}"

# 定义一个resize函数  
resize = transforms.Resize((224, 224))#将图像调整为224X224
resize= transforms.Resize(224)#如果只是指定一个参数,会保留原图像的纵横比

image_resize=resize(PIL / ToTensor)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
#Resize
trans_resize=transforms.Resize((512,512))
image_resize=trans_resize(image_tensor)
writer.add_image("Resize",image_resize)
  • 1
  • 2
  • 3
  • 4

4.Compose():组合不同的函数

#函数原型:
class Compose:
    """Composes several transforms together. This transform does not support torchscript.
    Please, see the note below.

    Args:
        transforms (list of ``Transform`` objects): list of transforms to compose.

    Example:
        >>> transforms.Compose([
        >>>     transforms.CenterCrop(10),
        >>>     transforms.PILToTensor(),
        >>>     transforms.ConvertImageDtype(torch.float),
        >>> ])

    .. note::
        In order to script the transformations, please use ``torch.nn.Sequential`` as below.

        >>> transforms = torch.nn.Sequential(
        >>>     transforms.CenterCrop(10),
        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        >>> )
        >>> scripted_transforms = torch.jit.script(transforms)

        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
        `lambda` functions or ``PIL.Image``.

    """

    def __init__(self, transforms):
        if not torch.jit.is_scripting() and not torch.jit.is_tracing():
            _log_api_usage_once(self)
        self.transforms = transforms

    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img

    def __repr__(self) -> str:
        format_string = self.__class__.__name__ + "("
        for t in self.transforms:
            format_string += "\n"
            format_string += f"    {t}"
        format_string += "\n)"
        return format_string

Compose() 中的参数需要是一个列表,Python中列表的表示形式为[数据1,数据2...]
在Compose中,数据需要是transforms类型,所以得到 Compose([transforms参数1,transforms参数2...])    
通过List的形式,组合多个函数/操作
Compose()类会将transforms列表里面的transform操作进行遍历。
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
trans_resize_2=transforms.Resize(512)#创造trans_resize函数,不改变图形的纵横比

trans_compose=transforms.Compose([trans_totensor,trans_resize_2])#构造compose函数,将totensor/resize功能组合在一起,在某些Pytorch版本中,必须resize在前,totensor在后面

image_resize_2=trans_compose(image_PIL)#参数为PIL类型
writer.add_image("Resize_2",image_resize_2,1)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7

5.RndomCrop():随机切割

#函数原型:
class RandomCrop(torch.nn.Module):
    """Crop the given image at a random location.
    If the image is torch Tensor, it is expected
    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
    but if non-constant padding is used, the input is expected to have at most 2 leading dimensions

    Args:
        size (sequence or int): Desired output size of the crop. If size is an
            int instead of sequence like (h, w), a square crop (size, size) is
            made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
        padding (int or sequence, optional): Optional padding on each border
            of the image. Default is None. If a single int is provided this
            is used to pad all borders. If sequence of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a sequence of length 4 is provided
            this is the padding for the left, top, right and bottom borders respectively.

            .. note::
                In torchscript mode padding as single int is not supported, use a sequence of
                length 1: ``[padding, ]``.
        pad_if_needed (boolean): It will pad the image if smaller than the
            desired size to avoid raising an exception. Since cropping is done
            after padding, the padding seems to be done at a random offset.
        fill (number or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the padding_mode is constant.
            Only number is supported for torch Tensor.
            Only int or tuple value is supported for PIL Image.
        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
            Default is constant.

            - constant: pads with a constant value, this value is specified with fill

            - edge: pads with the last value at the edge of the image.
              If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2

            - reflect: pads with reflection of image without repeating the last value on the edge.
              For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
              will result in [3, 2, 1, 2, 3, 4, 3, 2]

            - symmetric: pads with reflection of image repeating the last value on the edge.
              For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
              will result in [2, 1, 1, 2, 3, 4, 4, 3]
    """

    @staticmethod
    def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
        """Get parameters for ``crop`` for a random crop.

        Args:
            img (PIL Image or Tensor): Image to be cropped.
            output_size (tuple): Expected output size of the crop.

        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
        """
        _, h, w = F.get_dimensions(img)
        th, tw = output_size

        if h + 1 < th or w + 1 < tw:
            raise ValueError(f"Required crop size {(th, tw)} is larger then input image size {(h, w)}")

        if w == tw and h == th:
            return 0, 0, h, w

        i = torch.randint(0, h - th + 1, size=(1,)).item()
        j = torch.randint(0, w - tw + 1, size=(1,)).item()
        return i, j, th, tw

    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
        super().__init__()
        _log_api_usage_once(self)

        self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))

        self.padding = padding
        self.pad_if_needed = pad_if_needed
        self.fill = fill
        self.padding_mode = padding_mode

    def forward(self, img):
        """
        Args:
            img (PIL Image or Tensor): Image to be cropped.

        Returns:
            PIL Image or Tensor: Cropped image.
        """
        if self.padding is not None:
            img = F.pad(img, self.padding, self.fill, self.padding_mode)

        _, height, width = F.get_dimensions(img)
        # pad the width if needed
        if self.pad_if_needed and width < self.size[1]:
            padding = [self.size[1] - width, 0]
            img = F.pad(img, padding, self.fill, self.padding_mode)
        # pad the height if needed
        if self.pad_if_needed and height < self.size[0]:
            padding = [0, self.size[0] - height]
            img = F.pad(img, padding, self.fill, self.padding_mode)

        i, j, h, w = self.get_params(img, self.size)

        return F.crop(img, i, j, h, w)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108

(1)传入一个参数,不改变纵横比

注:注意图片原本的大小,切割大小不能超过图片原本的大小

#RandomCrop()
trans_random=transforms.RandomCrop(200)#将图片切割成切割成200X200的样式
trans_compose_3=transforms.Compose([trans_random,trans_totensor])#构造函数
for i in range(10):#生成十张图片,[0,9]
    image_crop=trans_compose_3(image_PIL)
    writer.add_image("RandomCrop",image_crop,i)#不断添加
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

(2)传入横纵参数

trans_random=transforms.RandomCrop((200,100))#将图片切割成切割成200X100的样式
  • 1

6.总结

关注输入和输出类型
多看官方文档
关注方法需要什么参数:参数如果设置了默认值,保留默认值即可,没有默认值的需要指定(看一下要求传入什么类型的参数)
不知道变量的输出类型可以
直接print该变量
print(type()),看结果里显示什么类型
断点调试 dubug
最后要 totensor,在 tensorboard 看一下结果(tensorboard需要tensor数据类型进行显示)

四、CIFAR10数据集

分为train_set训练数据集和test_set测试数据集

import torchvision

dataset_transform= transforms.Compose([
    transforms.ToTensor()])

train_set= torchvision.datasets.CIFAR10( root="./dataset",train=True,transform=dataset_transform,download=True) #训练数据集
test_set= torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=dataset_transform,download=True)#测试数据集
'''
root为数据集的位置
train = True 下载的是train
train = False 下载的是test
transform 为变换方式
download=True 为到官网上进行下载,如果下载较慢,可以将网址复制到迅雷上进行下载,对CIFAR10进行Ctrl+点击,找到源代码中的下载网址

'''

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

五、Dataloader

DataLoader(test_data,batch_size=4,shuffle=True,num_workers=0,drop_last=False)

test_data:为传入的测试数据

batch_size=4:每次从数据集中抓取4张图片进行打包,为随机抓取

drop_last=False:当最后的图片不足batch_size时,不会舍弃最后的图片,如果为True则会舍弃最后的图片

shuffle=True:两次读取的照片不同,=False为相同

num_works:单线程/多线程,默认为0,单线程

#函数原型:
    def __init__(self, dataset: Dataset[T_co], batch_size: Optional[int] = 1,
                 shuffle: Optional[bool] = None, sampler: Union[Sampler, Iterable, None] = None,
                 batch_sampler: Union[Sampler[Sequence], Iterable[Sequence], None] = None,
                 num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None,
                 pin_memory: bool = False, drop_last: bool = False,
                 timeout: float = 0, worker_init_fn: Optional[_worker_init_fn_t] = None,
                 multiprocessing_context=None, generator=None,
                 *, prefetch_factor: int = 2,
                 persistent_workers: bool = False,
                 pin_memory_device: str = ""):
        torch._C._log_api_usage_once("python.data_loader")

        if num_workers < 0:
            raise ValueError('num_workers option should be non-negative; '
                             'use num_workers=0 to disable multiprocessing.')

        if timeout < 0:
            raise ValueError('timeout option should be non-negative')

        if num_workers == 0 and prefetch_factor != 2:
            raise ValueError('prefetch_factor option could only be specified in multiprocessing.'
                             'let num_workers > 0 to enable multiprocessing.')
        assert prefetch_factor > 0

        if persistent_workers and num_workers == 0:
            raise ValueError('persistent_workers option needs num_workers > 0')

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
import torchvision
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

#  准备测试集
data_transforms= transforms.ToTensor()
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=data_transforms,download=True)
test_load = DataLoader(test_data,batch_size=4,shuffle=True,num_workers=0,drop_last=False)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

六、nn.Module:神经网络的基本骨架

from torch import nn
import torch

class Tudui(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, input):#前向传播
        output = input + 1
        return output

tudui = Tudui()#创建对象
x = torch.tensor(1.0)#x类型为tensor类型
output = tudui(x)
print(output)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

七、卷积操作

灰度图用2维矩阵表示,所以通道数channel为1 。彩色图用3维矩阵表示,通道数为2

torch.nn.functional.``conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) → Tensor

input:为输入,input tensor of shape(minibatch,in_channels,i**H,i**W)

weight:为卷积核,也就是权重

stride:为步长,就是移动的距离,默认为1

padding:为填充,将input周围进行填充指定的数,padding就是在input上下左右进行填充0,padding一般为卷积核大小的一般向下取整

这里面conv2d(N,C,H,W)里面的四个是 N就是batch size也就是输入图片的数量,C就是通道数这只是一个二维张量所以通道为1,H就是高,W就是宽,所以是1 1 5 5

import torch
import torch.nn.functional as F

input = torch.tensor([[1, 2, 0, 3, 1],  # 输入图像
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],  # 卷积核
                       [0, 1, 0],
                       [2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))#将input转换为4张量
kernel = torch.reshape(kernel, (1, 1, 3, 3))#将kernel转换为4张量

output = F.conv2d(input, kernel, stride=1)
print(output)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

八、矩阵操作

1.reshape():改变矩阵的大小

reshape()函数中 -1 代表 n:n=tensor的长度/第一个参数

t= torch.tensor([[1,1,1,1],
                [2,2,2,2],
                [3,3,3,3]])
#t为3X4的张量,一共12个元素
reshape(-1,1)之后变成:
    12 x 1的矩阵
    #-1就代表着n
    
reshape(1,-1)之后变成:
    1x 12的矩阵
reshape(2,-1)之后变成:
    2 x 6的矩阵
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12

2.flatten():将矩阵转换为一行

Flatten和flatten的区别

(首先:Flatten来自nn模块下这点和卷积池化是一样的,flatten是来自torch模块下和nn模块是平级的)

(其次:Flatten进行展平的时候只是将每张图片进行分别展平,没有将展平后的向量进行拼接,但是flatten是将所有图片进行展平后还进行拼接)

for data in test_loader:
    imgs,targets = data
    print(imgs.shape)
    # imgs = flatten(imgs)#展平处理,包括把batch一起展平
    # print(imgs.shape)
    output = flatten_(imgs)
    print(output.shape)
    output1 = flatten(imgs)
    print(output1.shape)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9

img

3.矩阵的比较

argmax(0):将横向数据作为整体进行上下比较

argmax(1):将纵向的数据作为整体进行左右比较

import torch

outputs = torch.tensor([[0.1, 0.5],
                        [0.4, 0.3]])

print(outputs.argmax(0))
print(outputs.argmax(1))

preds = outputs.argmax(1)  # 存储结果
targets = torch.tensor([1, 1])  # 预期结果

print(preds == targets)  # 实际的结果与预期的结果进行比较
print((preds == targets).sum())  # 实际结果与预期结果的比较之后相匹配的个数

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14

image-20231009220013587

九、卷积层nn.Conv2d

torch.nn.``Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode=‘zeros’)

Parameters

  • in_channels (int) – Number of channels in the input image

    输入图像的通道数,彩色图像一般是3通道

  • out_channels (int) – Number of channels produced by the convolution

    输出通道数

  • kernel_size (int or tuple) – Size of the convolving kernel

    卷积核的大小 int x int ,tuple用于不规则的大小

  • stride (int or tuple, optional) – Stride of the convolution. Default: 1

    移动的步数,默认为1

  • padding (int or tuple, optional) – Zero-padding added to both sides of the input. Default: 0

    对于input的上下左右进行填充

  • padding_mode (string*,* optional) – 'zeros', 'reflect', 'replicate' or 'circular'. Default: 'zeros'

    填充的格式,默认为0,也就是zeros

  • dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1

  • groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1

  • bias (bool, optional) – If True, adds a learnable bias to the output. Default: True

Shape

  • Input: (N,Cin,Hin,Win)

  • Output: (N,Cou**t,Hou**t,Wou**t) where

import torch
import torchvision

from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)  # 测试数据集

dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
    def __init__(self):
        super().__init__()#继承父类
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)#创建类的对象,设置卷积conv1d

    def forward(self, x):
        x = self.conv1(x)
        return x


tudui = Tudui()
writer = SummaryWriter("./logs")
step = 0
for data in dataloader:
    imgs, target = data
    output = tudui(imgs)
    writer.add_images("input", imgs, step)

    output = torch.reshape(output, (-1, 3, 30, 30))#将output改为3通道
    writer.add_images("output", output, step)
    step = step + 1
writer.close()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37

十、池化层

最大池化:MAXPOOL2D

torch.nn.``MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

Parameters

  • kernel_size – the size of the window to take a max over

    池化核的大小,int x int

  • stride – the stride of the window. Default value is kernel_size

    步径大小,默认为池化核的大小

  • padding – implicit zero padding to be added on both sides

  • dilation – a parameter that controls the stride of elements in the window

  • return_indices – if True, will return the max indices along with the outputs. Useful for torch.nn.MaxUnpool2d later

  • ceil_mode – when True, will use ceil instead of floor to compute the output shape

    为True时,采用ceil模式,会保留为False时,采用floor模式,不会保留,默认为False

Shape:

  • Input: (N,C,Hin,Win)
  • Output: (N,C,Hou**t,Wou**t) , where

十一、非线性激活

ReLU

CLASStorch.nn.``ReLU(inplace=False)[SOURCE]

Applies the rectified linear unit function element-wise:

ReLU(x)=(x)+=max(0,x)

  • Parameters

    inplace – can optionally do the operation in-place. Default: False

    inplace=True时会直接将output赋值给input

    inplace=False时会保留原来的input,重新生成一个output,默认为False

  • Shape:

    Input: (N,∗) where * means, any number of additional dimensions

    Output: (N,∗) , same shape as the input

image-20231007163619066

十二、正则化层

BatchNorm2d

CLASS torch.nn.``BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

Parameters

  • num_features – C* from an expected input of size (N,C,H,W)
  • eps – a value added to the denominator for numerical stability. Default: 1e-5
  • momentum – the value used for the running_mean and running_var computation. Can be set to None for cumulative moving average (i.e. simple average). Default: 0.1
  • affine – a boolean value that when set to True, this module has learnable affine parameters. Default: True
  • track_running_stats – a boolean value that when set to True, this module tracks the running mean and variance, and when set to False, this module does not track such statistics, and initializes statistics buffers running_mean and running_var as None. When these buffers are None, this module always uses batch statistics. in both training and eval modes. Default: True

Shape:

  • Input:(N,C,H,W)
  • Output: (same shape as input)

十三、线性层

LINEAR

CLASStorch.nn.``Linear(in_features, out_features, bias=True)

Applies a linear transformation to the incoming data: y*=*x A**T+b

This module supports TensorFloat32.

Parameters

  • in_features – size of each input sample
  • out_features – size of each output sample
  • bias – If set to False, the layer will not learn an additive bias. Default: True 决定b是否存在,默认是存在的

Shape:

  • Input: (N,∗,Hin) where ∗∗ means any number of additional dimensions and =in_featuresHin=in_features
  • Output: (N,∗,Hou**t) where all but the last dimension are the same shape as the input and =out_featuresHou**t=out_features .
import torch
import torchvision.datasets
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=64)

class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1=Linear(196608,10)

    def forward(self,input):
        output=self.linear1(input)
        return output


tudui=Tudui()
for data in dataloader:
    imgs, targets = data
    print(imgs.shape)
    # input=torch.reshape(imgs,(1,1,1,-1))
    # print(input.shape)
    input=torch.flatten(imgs)
    print(input.shape)
    output=tudui(input)
    print(output.shape)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32

十四、Sequential

torch.nn.``Sequential(*args)

A sequential container. Modules will be added to it in the order they are passed in the constructor. Alternatively, an ordered dict of modules can also be passed in.

# Example of using Sequential
model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15

十五、CIFAR 10 model 结构搭建

img

卷积后长宽不变的话,应该padding=kernel_size/2

import torch
import torchvision.datasets
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        # self.conv1 = Conv2d(3, 32, 5, padding=2)
        # self.maxpool1 = MaxPool2d(2)
        # self.conv2 = Conv2d(32, 32, 5, padding=2)
        # self.maxpool2 = MaxPool2d(2)
        # self.conv3 = Conv2d(32, 64, 5, padding=2)
        # self.maxpool3 = MaxPool2d(2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(1024, 64)
        # self.linear2 = Linear(64, 10)

        self.module = Sequential( 
            #直接创建一个模型,对不同的操作进行整合
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)

        )

    def forward(self, x):
        # x = self.conv1(x)
        # x = self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x = self.module(x)#用一个模型来代替不同的操作
        return x


tudui = Tudui()#创建对象
# print(tudui)


#进行验证

input = torch.ones((64, 3, 32, 32))#创造一个input,64为batch_size,3为channels,32x32为大小
output = tudui(input)
print(output.shape)

writer = SummaryWriter("logs")

writer.add_graph(tudui, input, )
writer.close()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63

十六、损失函数与反向传播

1.L1Loss

CLASStorch.nn.``L1Loss(size_average=None, reduce=None, reduction=‘mean’)

ℓ(x,y)={mean(L),if reduction=‘mean’;

​ sum(L),if reduction=‘sum’.

默认为mean,可以指定sum

Shape:

  • Input: (N,∗) where ∗∗ means, any number of additional dimensions
  • Target: (N,∗) , same shape as the input
  • Output: scalar. If reduction is 'none', then (N,∗) , same shape as the input
import torch
from torch.nn import L1Loss

inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)

inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))

loss = L1Loss(reduction="sum")
result = loss(inputs, targets)
print(result)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13

2.MSELOSS:均方误差

torch.nn.``MSELoss(size_average=None, reduce=None, reduction=‘mean’)

Shape:

  • Input:(N,∗) where ∗∗ means, any number of additional dimensions
  • Target: (N,∗) , same shape as the input

3.CROSSENTROPYLOSS:交叉熵

十七、优化器

import torch
import torchvision.datasets
from torch import nn
from torch.nn import L1Loss, Sequential, Conv2d, MaxPool2d, Flatten, Linear

from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)

dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        self.module = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)

        )

    def forward(self, x):
        x = self.module(x)
        return x


tudui = Tudui()

optim = torch.optim.SGD(tudui.parameters(), lr=0.01)  # 随机梯度下降

loss = nn.CrossEntropyLoss()


#开始优化,一共优化20次
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets)
        optim.zero_grad()#对上一次的梯度进行清零
        result_loss.backward()#得到本次的梯度
        optim.step()#对本次进行优化
        running_loss = running_loss + result_loss#得到总的误差

    print(running_loss)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55

十八、现有网络模型的使用与修改

vgg16

image-20231009141147063

import torchvision
import torch
from torch import nn

vgg16_true=torchvision.models.vgg16()

dataset=torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)

#vgg16_true.classifier.add_module("name_linear",nn.Linear(1000,10)) 会直接再classifier中增加名字为name_linear的Linear层

#vgg16_true.add_module("name_linear",nn.Linear(1000,10))
#会再vgg_16的最后增加名字为name_linear的Linear层

vgg16_true.classifier[6]=nn.Linear(4096,10)
#会直接修改vgg_16的classifier的序号为6的Linear层

print(vgg16_true)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

十九、网络模型的保存与读取

import torch
import torchvision.models
from torch import nn

vgg16 = torchvision.models.vgg16()

# 保存方式1:模型结构+模型参数
torch.save(vgg16, "vgg16_method1.path")

# 保存方式2:模型参数(官方推荐),会节约内存
torch.save(vgg16.state_dict(), "vgg16_method2.path")


# 将模型的参数以字典的形式保存下来


# 如果是自己写的模型
class Tudui(nn.Module):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, input):
        output = input + 1
        return output


tudui = Tudui()
torch.save(tudui, "tudui_method1.path")

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
import torch

# 方式1,加载模型
import torchvision.models

from module_save import *

module = torch.load("vgg16_method1.path")
# print(module)#这个module就是原来的vgg16


# 方式2,加载模型
module2 = torch.load("vgg16_method2.path")
print(module)
# 恢复原有的模型
vgg16 = torchvision.models.vgg16()
vgg16.load_state_dict(module2)  # 通过参数来加载模型
print(vgg16)

# 如果是自己写的模型

# class Tudui(nn.Module):
#     def __init__(self) -> None:
#         super().__init__()
#
#     def forward(self, input):
#         output = input + 1
#         return output
#
#
# tudui = Tudui()
# 必须重新定义一遍或者从别的文件中导入
module3 = torch.load("tudui_method1.path")
print(module3)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35

二十、完整的模型训练套路

以CIFAR10数据集为例子进行训练

import torch
import torchvision.datasets
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

train_data = torchvision.datasets.CIFAR10("./dataset", train=True,
                                          transform=torchvision.transforms.ToTensor(), download=True)  # 训练数据集
test_data = torchvision.datasets.CIFAR10("./dataset", train=False,
                                         transform=torchvision.transforms.ToTensor(), download=True)  # 测试数据集
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

# 利用dataloader加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


# 搭建神经网络,10分类的网络
class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        self.module = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)

        )

    def forward(self, x):
        x = self.module(x)
        return x


#  主函数,来验证模型的准确性
# if __name__ == '__main__':
#     tudui = Tudui()
#     input = torch.ones((64, 3, 32, 32))
#     output = tudui(input)
#     print(output.shape)

# 创建模型
tudui = Tudui()

# 创建损失函数
loss_function = nn.CrossEntropyLoss()  # 交叉熵

# 优化器
learning_rate = 0.01  # 学习速率
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)  # 随机梯度下降

# 设置训练网络的一些参数
total_train_step = 0  # 记录训练的次数
total_test_step = 0  # 记录测试的次数
epoch = 10  # 训练的轮数

# 添加tensorboard
writer = SummaryWriter("./train_logs")

for i in range(epoch):
    print("-----第{}轮训练开始-----".format(i + 1))

    for data in train_dataloader:
        imgs, targets = data
        inputs = tudui(imgs)
        loss = loss_function(inputs, targets)

        # 优化器梯度清零
        optimizer.zero_grad()
        loss.backward()  # 反向传播,得到优化器的梯度
        optimizer.step()  # 开始优化

        total_train_step += 1  # 每一轮训练的次数

        if total_train_step % 50 == 0:  # 只有次数为50的倍数是才会输出,避免太多无用的数据
            print("训练的次数:{},loss:{}".format(total_train_step, loss))
            # 可以将loss变成loss.item()
            writer.add_scalar("train_loss", loss, total_train_step)

    # 一次训练结束

    # 测试步骤开始,进行测试,看看与test比train优化的怎么样
    total_test_loss = 0  # 对于整个数据集上的误差和
    total_accuracy = 0  # 整体的正确次数
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            inputs = tudui(imgs)
            loss = loss_function(inputs, targets)  # 求出这一次循环的误差
            total_test_loss += loss.item()  # 得到整体的误差
            # 由于total_test_loss是一个普通的整数,而loss是一个tensor数据类型
            # 因此要加上item(),得到一个整数

            accuracy = (inputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("整体测试集上的Loss:{}".format(total_test_loss))
    print("整体测试机上的正确率:{}".format(total_accuracy/test_data_size))

    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
    total_test_step += 1

    # 保存每一轮训练的数据
    torch.save(tudui, "tudui_{}.path".format(i))
    print("模型已保存")

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116

二十一、利用GPU进行训练

第一种方法:

网络模型、数据(输入输出)、损失函数.cuda()

# 创建模型
tudui = Tudui()
# 用GPU进行训练
if torch.cuda.is_available:
    tudui = tudui.cuda()
    
# 创建损失函数
loss_function = nn.CrossEntropyLoss()  # 交叉熵
# 用GPU进行训练
if torch.cuda.is_available():
    loss_function = loss_function.cuda()
    
    
imgs, targets = data
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            targets = targets.cuda()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

第二种方法:

定义训练的设备

device = torch.device(“cuda”)

device = torch.device(“cpu”)

cuda就是用GPU进行训练,cpu就是用cpu进行训练

也可以写:语法糖

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  • 1
#定义训练的设备
device =torch.device("cuda")

# 创建模型
tudui = Tudui()
# 用GPU进行训练
tudui = tudui.to(device)
#也可以直接 tudui.to(device)

# 创建损失函数
loss_function = nn.CrossEntropyLoss()  # 交叉熵
# 用GPU进行训练
loss_function=loss_function.to(device)
#也可以直接 loss_function.to(device)


imgs=imgs.to(device)
targets=targets.to(device)
#imgs,targets必须重新进行赋值

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
import torch
import torchvision.datasets
import time
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 定义训练的设备
# device = torch.device("cuda")
# 也可以这样写
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 语法糖

train_data = torchvision.datasets.CIFAR10("./dataset", train=True,
                                          transform=torchvision.transforms.ToTensor(), download=True)  # 训练数据集
test_data = torchvision.datasets.CIFAR10("./dataset", train=False,
                                         transform=torchvision.transforms.ToTensor(), download=True)  # 测试数据集
# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

# 利用dataloader加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


# 搭建神经网络,10分类的网络
class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        self.module = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)

        )

    def forward(self, x):
        x = self.module(x)
        return x


#  主函数,来验证模型的准确性
# if __name__ == '__main__':
#     tudui = Tudui()
#     input = torch.ones((64, 3, 32, 32))
#     output = tudui(input)
#     print(output.shape)

# 创建模型
tudui = Tudui()
# 用GPU进行训练
tudui = tudui.to(device)

# 创建损失函数
loss_function = nn.CrossEntropyLoss()  # 交叉熵
# 用GPU进行训练
loss_function = loss_function.to(device)
# 优化器
learning_rate = 0.01  # 学习速率
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)  # 随机梯度下降

# 设置训练网络的一些参数
total_train_step = 0  # 记录训练的次数
total_test_step = 0  # 记录测试的次数
epoch = 10  # 训练的轮数

# 添加tensorboard
writer = SummaryWriter("./train_logs")

# 设置起始时间
# start_time = time.time()

for i in range(epoch):
    start_time = time.time()  # 每一轮训练都会更新时间
    print("-----第{}轮训练开始-----".format(i + 1))

    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)

        inputs = tudui(imgs)
        loss = loss_function(inputs, targets)

        # 优化器梯度清零
        optimizer.zero_grad()
        loss.backward()  # 反向传播,得到优化器的梯度
        optimizer.step()  # 开始优化

        total_train_step += 1  # 每一轮训练的次数

        if total_train_step % 100 == 0:  # 只有次数为50的倍数是才会输出,避免太多无用的数据
            end_time = time.time()
            print("训练的时间:{}".format(end_time - start_time))
            print("训练的次数:{},loss:{}".format(total_train_step, loss))
            # 可以将loss变成loss.item()
            writer.add_scalar("train_loss", loss, total_train_step)

    # 一次训练结束

    # 测试步骤开始,进行测试,看看与test比train优化的怎么样
    total_test_loss = 0  # 对于整个数据集上的误差和
    total_accuracy = 0  # 整体的正确次数
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            inputs = tudui(imgs)
            loss = loss_function(inputs, targets)  # 求出这一次循环的误差
            total_test_loss += loss.item()  # 得到整体的误差
            # 由于total_test_loss是一个普通的整数,而loss是一个tensor数据类型
            # 因此要加上item(),得到一个整数

            accuracy = (inputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("整体测试集上的Loss:{}".format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size))

    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
    total_test_step += 1

    # 保存每一轮训练的数据
    torch.save(tudui, "tudui_{}.path".format(i))
    print("模型已保存")
    
    

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138

二十二、完整的模型验证套路

png是四通道的图片,RGB+透明度

img是三通道图片,RGB

image = image.convert(“RGB”),会保留其颜色通道

如果图片本来就是RGB,那么通道数不变



如果读取的模型是用GPU训练的,而现在在用CPU进行训练那么在加载的时候要指定映射位置map_location

module =torch.load("tudui.path",map_location=torch.device("cpu"))
  • 1
from PIL import Image
import torch
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear

image_path = "dog_image/dog.png"

image = Image.open(image_path)

transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                            torchvision.transforms.ToTensor(), ])
image = transform(image)

# 定义训练的设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 搭建神经网络,10分类的网络
class Tudui(nn.Module):
    def __init__(self):
        super().__init__()
        self.module = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)

        )

    def forward(self, x):
        x = self.module(x)
        return x


module = torch.load("tudui_0.path")
# print(module)

image = torch.reshape(image, (1, 3, 32, 32))  # 将图片改为对应的尺寸
image = image.to(device)  # 将图片改为GPU训练

module.eval()  # 将module模型改为测试类型
with torch.no_grad():  # 节约内存
    input = module(image)
print(input)
print(input.argmax(1))  # 输出最大的类别


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/318310
推荐阅读
相关标签
  

闽ICP备14008679号