赞
踩
Dataset:提供一种方式去获取数据及其label
①如何获取每一个数据及其label
②告诉我们有多少的数据
Dataloader:为后面的网络提供不同的数据形式
from torch.utils.data import Dataset from PIL import Image # 读取图片的库,可以对图片进行可视化 import os # 对文件路径进行操作 # help(Dataset) class MyData(Dataset): # 创建读取数据的类 def __init__(self, root_dir, label_dir): # 为CLASS提供一个全局变量 self.root_dir = root_dir self.label_dir = label_dir self.path = os.path.join(self.root_dir, self.label_dir) # 对root_path和label_path进行合并,得到总的路径path self.image_path = os.listdir(self.path) # 返回得到path所包含的数据的文件列表 def __getitem__(self, index): image_name = self.image_path[index] # 得到指定的文件名称 image_item_path = os.path.join(self.root_dir, self.label_dir, image_name) # 得到目标文件的的具体路径 image = Image.open(image_item_path) # 打开文件,获取文件的信息 label = self.label_dir # 的到上一级文件 return image, label def __len__(self): return len(self.image_path) # 得到长度 root_dir = "hymenoptera_data/train" ants_dir = "ants" bees_dir = "bees" ants_dataset = MyData(root_dir, ants_dir) # 得到了蚂蚁的数据集 bees_dataset = MyData(root_dir, bees_dir) # 得到了蜜蜂的数据集 train_dataset=ants_dataset + bees_dataset # 将ants和bees,两个数据集进行拼接
SummaryWriter
作用:允许训练程序调用方法以直接从训练循环将数据添加到文件中,而不会减慢训练速度。
#init,SummaryWriter初始化函数
def __init__(self,log_dir=None,comment='',purge_step=None,max_queue=10,flush_secs=120,filename_suffix='')
#都是默认值
log_dir:用以保存summary的位置,SummaryWriter生成的writer实例的第一个参数都是 logs ,那么,我的当前代码所在文件夹下方就会出现一个名为 logs 的文件夹里面装的就是summary,
如果无参数,则会生成默认的存储文件夹:runs/时间日期
comment:在存储文件夹后面加上后缀,默认不指定 log_dir时的文件后缀。
filename_suffix:文件名后缀
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs")#生成存储文件logs
#消除理合现象:不同的模型相互纠缠
#1.就是新建一个tag
#2.删除之前的文件,退出终端,再运行
当我们在同一个tag下进行可视化展示时,函数的形状都会出现在同一个表格中。
如果我们想要分开进行展示的话,可以更换一个tag;如果想要删除数据,可以直接删除writer.add.scalar()生成的文件即可
#在终端中打开,启用tnesorboard
tensorboard --logdir="logs"#在网页中打开,启用默认端口
tensorboard --logdir="logs" --port=number#人为控制端口,启用number端口
#函数原型 def add_scalar( self, tag, scalar_value, global_step=None, walltime=None, new_style=False ): Args: tag (string): Data identifier #标题 scalar_value (float or string/blobname): Value to save #Y轴 global_step (int): Global step value to record #X轴 walltime (float): Optional override default walltime (time.time()) with seconds after epoch of event #可选参数,记录发生的时间,默认为time.time() new_style (boolean): Whether to use new style (tensor field) or old style (simple_value field). New style could lead to faster data loading.
writer.add_scalar("y=x",y,x)#y为Y轴的变量,x为X轴的变量,"y=x",为名称
从PIL到numpy,需要在add_image()中指定shape中每一个数组/维表示的含义,即加上dataformats
#函数原型 add_image(self, tag, img_tensor, global_step=None, waltime=None,dataformats='CHW'): Add image data to summary. Note that this requires the ``pillow`` package. Args: tag (string): Data identifier #标题,相当于图片的一级标题,改变tag相当于新建一个图相册 img_tensor (torch.Tensor, numpy.array, or string/blobname): Image data 图像数据类型 global_step (int): Global step value to record #记录的步骤,也就是在同一个title/tag下面,根据不同的step有不同的图片,相当于二级标题 walltime (float): Optional override default walltime (time.time()) seconds after epoch of event #注意:add_image(np.array)此时pillow的版本要低于10.0.0,可以安装9.5.0
from torch.utils.tensorboard import SummaryWriter import numpy as np from PIL import Image writer= SummaryWriter("logs") image_path="data/train/ants_image/0013035.jpg"#图像的相对路径 image_array = np.array(image_path)#转换为numpy.array类型 writer.add_image("test",image_array,1,dataformats='HWC') #test,是名称,image_array是数组,1是指定的步数 #dataformats='HWC'代表格式,H为长度,W为宽度,C为通道 for i in range(100): writer.add_scalar("y=x", i, i) writer.close()
5.add_graph():模型结构图可视化
writer.add_graph(net,input_to_model = torch.rand(batch_size,channels,H,W))
图像变换
将PIL Image / numpy.ndarray转化为tensor类型
#函数原型: class ToTensor:#这是一个类,需要创建对象 """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript. Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8 In the other cases, tensors are returned without scaling. .. note:: Because the input image is scaled to [0.0, 1.0], this transformation should not be used when transforming target image masks. See the `references`_ for implementing the transforms for image masks. .. _references: https://github.com/pytorch/vision/tree/main/references/segmentation """ def __init__(self) -> None: _log_api_usage_once(self) def __call__(self, pic): """ Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ return F.to_tensor(pic) def __repr__(self) -> str: return f"{self.__class__.__name__}()"
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
writer =SummaryWriter('logs')
image_PIL = Image.open("data/train/bees_image/17209602_fe5a5a746f.jpg")
trans_totensor =transforms.ToTensor()#创建对象
image_tensor=trans_totensor(image_PIL)
writer.add_image("Totensor",image_tensor)
writer.close()
#函数原型: class Normalize(torch.nn.Module): """Normalize a tensor image with mean and standard deviation. This transform does not support PIL Image. Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e., ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` .. note:: This transform acts out of place, i.e., it does not mutate the input tensor. Args: mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. inplace(bool,optional): Bool to make this operation in-place. """ def __init__(self, mean, std, inplace=False): super().__init__() _log_api_usage_once(self) self.mean = mean self.std = std self.inplace = inplace def forward(self, tensor: Tensor) -> Tensor: """ Args: tensor (Tensor): Tensor image to be normalized. Returns: Tensor: Normalized Tensor image. """ return F.normalize(tensor, self.mean, self.std, self.inplace) def __repr__(self) -> str: return f"{self.__class__.__name__}(mean={self.mean}, std={self.std})" #Normalize a tensor image with mean and standard deviation.就是要带着tensor image的平均值和标准差 #计算公式:output[channel] = (input[channel] - mean[channel]) / std[channel], #channel是通道/维度,通常jpg图片是由RGB三色组成,所以一般channel一般为三维,而png图片是由四通道组成的,多了一个透明度的参数,所以要设置4 channels (input-0.5)/0.5==2*input-1 如果output==[0,1],那么result==[-1,1]
trans_norm=transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
image_normal=trans_norm(image_tensor)
writer.add_image("Normalize",image_normal)
#函数原型: class Resize(torch.nn.Module): """Resize the input image to the given size. If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions .. warning:: The output image might be different depending on its type: when downsampling, the interpolation of PIL images and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences in the performance of a network. Therefore, it is preferable to train and serve a model with the same input types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors closer. Args: size (sequence or int): Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size). .. note:: In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. interpolation (InterpolationMode): Desired interpolation enum defined by :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. max_size (int, optional): The maximum allowed for the longer edge of the resized image: if the longer edge of the image is greater than ``max_size`` after being resized according to ``size``, then the image is resized again so that the longer edge is equal to ``max_size``. As a result, ``size`` might be overruled, i.e the smaller edge may be shorter than ``size``. This is only supported if ``size`` is an int (or a sequence of length 1 in torchscript mode). antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors closer. .. warning:: There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor. """ def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): super().__init__() _log_api_usage_once(self) if not isinstance(size, (int, Sequence)): raise TypeError(f"Size should be int or sequence. Got {type(size)}") if isinstance(size, Sequence) and len(size) not in (1, 2): raise ValueError("If size is a sequence, it should have 1 or 2 values") self.size = size self.max_size = max_size # Backward compatibility with integer value if isinstance(interpolation, int): warnings.warn( "Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " "Please use InterpolationMode enum." ) interpolation = _interpolation_modes_from_int(interpolation) self.interpolation = interpolation self.antialias = antialias def forward(self, img): """ Args: img (PIL Image or Tensor): Image to be scaled. Returns: PIL Image or Tensor: Rescaled image. """ return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias) def __repr__(self) -> str: detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})" return f"{self.__class__.__name__}{detail}" # 定义一个resize函数 resize = transforms.Resize((224, 224))#将图像调整为224X224 resize= transforms.Resize(224)#如果只是指定一个参数,会保留原图像的纵横比 image_resize=resize(PIL / ToTensor)
#Resize
trans_resize=transforms.Resize((512,512))
image_resize=trans_resize(image_tensor)
writer.add_image("Resize",image_resize)
#函数原型: class Compose: """Composes several transforms together. This transform does not support torchscript. Please, see the note below. Args: transforms (list of ``Transform`` objects): list of transforms to compose. Example: >>> transforms.Compose([ >>> transforms.CenterCrop(10), >>> transforms.PILToTensor(), >>> transforms.ConvertImageDtype(torch.float), >>> ]) .. note:: In order to script the transformations, please use ``torch.nn.Sequential`` as below. >>> transforms = torch.nn.Sequential( >>> transforms.CenterCrop(10), >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), >>> ) >>> scripted_transforms = torch.jit.script(transforms) Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require `lambda` functions or ``PIL.Image``. """ def __init__(self, transforms): if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(self) self.transforms = transforms def __call__(self, img): for t in self.transforms: img = t(img) return img def __repr__(self) -> str: format_string = self.__class__.__name__ + "(" for t in self.transforms: format_string += "\n" format_string += f" {t}" format_string += "\n)" return format_string Compose() 中的参数需要是一个列表,Python中列表的表示形式为[数据1,数据2,...] 在Compose中,数据需要是transforms类型,所以得到 Compose([transforms参数1,transforms参数2,...]) 通过List的形式,组合多个函数/操作 Compose()类会将transforms列表里面的transform操作进行遍历。
trans_resize_2=transforms.Resize(512)#创造trans_resize函数,不改变图形的纵横比
trans_compose=transforms.Compose([trans_totensor,trans_resize_2])#构造compose函数,将totensor/resize功能组合在一起,在某些Pytorch版本中,必须resize在前,totensor在后面
image_resize_2=trans_compose(image_PIL)#参数为PIL类型
writer.add_image("Resize_2",image_resize_2,1)
#函数原型: class RandomCrop(torch.nn.Module): """Crop the given image at a random location. If the image is torch Tensor, it is expected to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions, but if non-constant padding is used, the input is expected to have at most 2 leading dimensions Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). padding (int or sequence, optional): Optional padding on each border of the image. Default is None. If a single int is provided this is used to pad all borders. If sequence of length 2 is provided this is the padding on left/right and top/bottom respectively. If a sequence of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. .. note:: In torchscript mode padding as single int is not supported, use a sequence of length 1: ``[padding, ]``. pad_if_needed (boolean): It will pad the image if smaller than the desired size to avoid raising an exception. Since cropping is done after padding, the padding seems to be done at a random offset. fill (number or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant. Only number is supported for torch Tensor. Only int or tuple value is supported for PIL Image. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. - constant: pads with a constant value, this value is specified with fill - edge: pads with the last value at the edge of the image. If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 - reflect: pads with reflection of image without repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2] - symmetric: pads with reflection of image repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] """ @staticmethod def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]: """Get parameters for ``crop`` for a random crop. Args: img (PIL Image or Tensor): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. """ _, h, w = F.get_dimensions(img) th, tw = output_size if h + 1 < th or w + 1 < tw: raise ValueError(f"Required crop size {(th, tw)} is larger then input image size {(h, w)}") if w == tw and h == th: return 0, 0, h, w i = torch.randint(0, h - th + 1, size=(1,)).item() j = torch.randint(0, w - tw + 1, size=(1,)).item() return i, j, th, tw def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"): super().__init__() _log_api_usage_once(self) self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")) self.padding = padding self.pad_if_needed = pad_if_needed self.fill = fill self.padding_mode = padding_mode def forward(self, img): """ Args: img (PIL Image or Tensor): Image to be cropped. Returns: PIL Image or Tensor: Cropped image. """ if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) _, height, width = F.get_dimensions(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] img = F.pad(img, padding, self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] img = F.pad(img, padding, self.fill, self.padding_mode) i, j, h, w = self.get_params(img, self.size) return F.crop(img, i, j, h, w) def __repr__(self) -> str: return f"{self.__class__.__name__}(size={self.size}, padding={self.padding})"
(1)传入一个参数,不改变纵横比
注:注意图片原本的大小,切割大小不能超过图片原本的大小
#RandomCrop()
trans_random=transforms.RandomCrop(200)#将图片切割成切割成200X200的样式
trans_compose_3=transforms.Compose([trans_random,trans_totensor])#构造函数
for i in range(10):#生成十张图片,[0,9]
image_crop=trans_compose_3(image_PIL)
writer.add_image("RandomCrop",image_crop,i)#不断添加
(2)传入横纵参数
trans_random=transforms.RandomCrop((200,100))#将图片切割成切割成200X100的样式
关注输入和输出类型
多看官方文档
关注方法需要什么参数:参数如果设置了默认值,保留默认值即可,没有默认值的需要指定(看一下要求传入什么类型的参数)
不知道变量的输出类型可以
直接print该变量
print(type()),看结果里显示什么类型
断点调试 dubug
最后要 totensor,在 tensorboard 看一下结果(tensorboard需要tensor数据类型进行显示)
分为train_set训练数据集和test_set测试数据集
import torchvision dataset_transform= transforms.Compose([ transforms.ToTensor()]) train_set= torchvision.datasets.CIFAR10( root="./dataset",train=True,transform=dataset_transform,download=True) #训练数据集 test_set= torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=dataset_transform,download=True)#测试数据集 ''' root为数据集的位置 train = True 下载的是train train = False 下载的是test transform 为变换方式 download=True 为到官网上进行下载,如果下载较慢,可以将网址复制到迅雷上进行下载,对CIFAR10进行Ctrl+点击,找到源代码中的下载网址 '''
DataLoader(test_data,batch_size=4,shuffle=True,num_workers=0,drop_last=False)
test_data:为传入的测试数据
batch_size=4:每次从数据集中抓取4张图片进行打包,为随机抓取
drop_last=False:当最后的图片不足batch_size时,不会舍弃最后的图片,如果为True则会舍弃最后的图片
shuffle=True:两次读取的照片不同,=False为相同
num_works:单线程/多线程,默认为0,单线程
#函数原型: def __init__(self, dataset: Dataset[T_co], batch_size: Optional[int] = 1, shuffle: Optional[bool] = None, sampler: Union[Sampler, Iterable, None] = None, batch_sampler: Union[Sampler[Sequence], Iterable[Sequence], None] = None, num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None, pin_memory: bool = False, drop_last: bool = False, timeout: float = 0, worker_init_fn: Optional[_worker_init_fn_t] = None, multiprocessing_context=None, generator=None, *, prefetch_factor: int = 2, persistent_workers: bool = False, pin_memory_device: str = ""): torch._C._log_api_usage_once("python.data_loader") if num_workers < 0: raise ValueError('num_workers option should be non-negative; ' 'use num_workers=0 to disable multiprocessing.') if timeout < 0: raise ValueError('timeout option should be non-negative') if num_workers == 0 and prefetch_factor != 2: raise ValueError('prefetch_factor option could only be specified in multiprocessing.' 'let num_workers > 0 to enable multiprocessing.') assert prefetch_factor > 0 if persistent_workers and num_workers == 0: raise ValueError('persistent_workers option needs num_workers > 0')
import torchvision
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
# 准备测试集
data_transforms= transforms.ToTensor()
test_data = torchvision.datasets.CIFAR10("./dataset",train=False,transform=data_transforms,download=True)
test_load = DataLoader(test_data,batch_size=4,shuffle=True,num_workers=0,drop_last=False)
from torch import nn import torch class Tudui(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, input):#前向传播 output = input + 1 return output tudui = Tudui()#创建对象 x = torch.tensor(1.0)#x类型为tensor类型 output = tudui(x) print(output)
灰度图用2维矩阵表示,所以通道数channel为1 。彩色图用3维矩阵表示,通道数为2
torch.nn.functional.``conv2d
(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) → Tensor
input:为输入,input tensor of shape(minibatch,in_channels,i**H,i**W)
weight:为卷积核,也就是权重
stride:为步长,就是移动的距离,默认为1
padding:为填充,将input周围进行填充指定的数,padding就是在input上下左右进行填充0,padding一般为卷积核大小的一般向下取整
这里面conv2d(N,C,H,W)里面的四个是 N就是batch size也就是输入图片的数量,C就是通道数这只是一个二维张量所以通道为1,H就是高,W就是宽,所以是1 1 5 5
import torch import torch.nn.functional as F input = torch.tensor([[1, 2, 0, 3, 1], # 输入图像 [0, 1, 2, 3, 1], [1, 2, 1, 0, 0], [5, 2, 3, 1, 1], [2, 1, 0, 1, 1]]) kernel = torch.tensor([[1, 2, 1], # 卷积核 [0, 1, 0], [2, 1, 0]]) input = torch.reshape(input, (1, 1, 5, 5))#将input转换为4张量 kernel = torch.reshape(kernel, (1, 1, 3, 3))#将kernel转换为4张量 output = F.conv2d(input, kernel, stride=1) print(output)
reshape()函数中 -1 代表 n:n=tensor的长度/第一个参数
t= torch.tensor([[1,1,1,1],
[2,2,2,2],
[3,3,3,3]])
#t为3X4的张量,一共12个元素
reshape(-1,1)之后变成:
12 x 1的矩阵
#-1就代表着n
reshape(1,-1)之后变成:
1x 12的矩阵
reshape(2,-1)之后变成:
2 x 6的矩阵
Flatten和flatten的区别
(首先:Flatten来自nn模块下这点和卷积池化是一样的,flatten是来自torch模块下和nn模块是平级的)
(其次:Flatten进行展平的时候只是将每张图片进行分别展平,没有将展平后的向量进行拼接,但是flatten是将所有图片进行展平后还进行拼接)
for data in test_loader:
imgs,targets = data
print(imgs.shape)
# imgs = flatten(imgs)#展平处理,包括把batch一起展平
# print(imgs.shape)
output = flatten_(imgs)
print(output.shape)
output1 = flatten(imgs)
print(output1.shape)
argmax(0):将横向数据作为整体进行上下比较
argmax(1):将纵向的数据作为整体进行左右比较
import torch
outputs = torch.tensor([[0.1, 0.5],
[0.4, 0.3]])
print(outputs.argmax(0))
print(outputs.argmax(1))
preds = outputs.argmax(1) # 存储结果
targets = torch.tensor([1, 1]) # 预期结果
print(preds == targets) # 实际的结果与预期的结果进行比较
print((preds == targets).sum()) # 实际结果与预期结果的比较之后相匹配的个数
torch.nn.``Conv2d
(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode=‘zeros’)
Parameters
in_channels (int) – Number of channels in the input image
输入图像的通道数,彩色图像一般是3通道
out_channels (int) – Number of channels produced by the convolution
输出通道数
kernel_size (int or tuple) – Size of the convolving kernel
卷积核的大小 int x int ,tuple用于不规则的大小
stride (int or tuple, optional) – Stride of the convolution. Default: 1
移动的步数,默认为1
padding (int or tuple, optional) – Zero-padding added to both sides of the input. Default: 0
对于input的上下左右进行填充
padding_mode (string*,* optional) – 'zeros'
, 'reflect'
, 'replicate'
or 'circular'
. Default: 'zeros'
填充的格式,默认为0,也就是zeros
dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1
groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1
bias (bool, optional) – If True
, adds a learnable bias to the output. Default: True
Shape
Input: (N,Cin,Hin,Win)
Output: (N,Cou**t,Hou**t,Wou**t) where
import torch import torchvision from torch import nn from torch.nn import Conv2d from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) # 测试数据集 dataloader = DataLoader(dataset, batch_size=64) class Tudui(nn.Module): def __init__(self): super().__init__()#继承父类 self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)#创建类的对象,设置卷积conv1d def forward(self, x): x = self.conv1(x) return x tudui = Tudui() writer = SummaryWriter("./logs") step = 0 for data in dataloader: imgs, target = data output = tudui(imgs) writer.add_images("input", imgs, step) output = torch.reshape(output, (-1, 3, 30, 30))#将output改为3通道 writer.add_images("output", output, step) step = step + 1 writer.close()
最大池化:MAXPOOL2D
torch.nn.``MaxPool2d
(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
Parameters
kernel_size – the size of the window to take a max over
池化核的大小,int x int
stride – the stride of the window. Default value is kernel_size
步径大小,默认为池化核的大小
padding – implicit zero padding to be added on both sides
dilation – a parameter that controls the stride of elements in the window
return_indices – if True
, will return the max indices along with the outputs. Useful for torch.nn.MaxUnpool2d
later
ceil_mode – when True, will use ceil instead of floor to compute the output shape
为True时,采用ceil模式,会保留为False时,采用floor模式,不会保留,默认为False
Shape:
ReLU
CLASStorch.nn.``ReLU
(inplace=False)[SOURCE]
Applies the rectified linear unit function element-wise:
ReLU(x)=(x)+=max(0,x)
Parameters
inplace – can optionally do the operation in-place. Default: False
inplace=True时会直接将output赋值给input
inplace=False时会保留原来的input,重新生成一个output,默认为False
Shape:
Input: (N,∗) where * means, any number of additional dimensions
Output: (N,∗) , same shape as the input
BatchNorm2d
CLASS torch.nn.``BatchNorm2d
(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Parameters
None
for cumulative moving average (i.e. simple average). Default: 0.1True
, this module has learnable affine parameters. Default: True
True
, this module tracks the running mean and variance, and when set to False
, this module does not track such statistics, and initializes statistics buffers running_mean
and running_var
as None
. When these buffers are None
, this module always uses batch statistics. in both training and eval modes. Default: True
Shape:
LINEAR
CLASStorch.nn.``Linear
(in_features, out_features, bias=True)
Applies a linear transformation to the incoming data: y*=*x A**T+b
This module supports TensorFloat32.
Parameters
False
, the layer will not learn an additive bias. Default: True
决定b是否存在,默认是存在的Shape:
import torch import torchvision.datasets from torch import nn from torch.nn import Linear from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64) class Tudui(nn.Module): def __init__(self): super().__init__() self.linear1=Linear(196608,10) def forward(self,input): output=self.linear1(input) return output tudui=Tudui() for data in dataloader: imgs, targets = data print(imgs.shape) # input=torch.reshape(imgs,(1,1,1,-1)) # print(input.shape) input=torch.flatten(imgs) print(input.shape) output=tudui(input) print(output.shape)
torch.nn.``Sequential
(*args)
A sequential container. Modules will be added to it in the order they are passed in the constructor. Alternatively, an ordered dict of modules can also be passed in.
# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
卷积后长宽不变的话,应该padding=kernel_size/2
import torch import torchvision.datasets from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter class Tudui(nn.Module): def __init__(self): super().__init__() # self.conv1 = Conv2d(3, 32, 5, padding=2) # self.maxpool1 = MaxPool2d(2) # self.conv2 = Conv2d(32, 32, 5, padding=2) # self.maxpool2 = MaxPool2d(2) # self.conv3 = Conv2d(32, 64, 5, padding=2) # self.maxpool3 = MaxPool2d(2) # self.flatten = Flatten() # self.linear1 = Linear(1024, 64) # self.linear2 = Linear(64, 10) self.module = Sequential( #直接创建一个模型,对不同的操作进行整合 Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): # x = self.conv1(x) # x = self.maxpool1(x) # x = self.conv2(x) # x = self.maxpool2 # x = self.conv3(x) # x = self.maxpool3(x) # x = self.flatten(x) # x = self.linear1(x) # x = self.linear2(x) x = self.module(x)#用一个模型来代替不同的操作 return x tudui = Tudui()#创建对象 # print(tudui) #进行验证 input = torch.ones((64, 3, 32, 32))#创造一个input,64为batch_size,3为channels,32x32为大小 output = tudui(input) print(output.shape) writer = SummaryWriter("logs") writer.add_graph(tudui, input, ) writer.close()
CLASStorch.nn.``L1Loss
(size_average=None, reduce=None, reduction=‘mean’)
ℓ(x,y)={mean(L),if reduction=‘mean’;
sum(L),if reduction=‘sum’.
默认为mean,可以指定sum
Shape:
reduction
is 'none'
, then (N,∗) , same shape as the inputimport torch
from torch.nn import L1Loss
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
loss = L1Loss(reduction="sum")
result = loss(inputs, targets)
print(result)
torch.nn.``MSELoss
(size_average=None, reduce=None, reduction=‘mean’)
Shape:
3.CROSSENTROPYLOSS:交叉熵
import torch import torchvision.datasets from torch import nn from torch.nn import L1Loss, Sequential, Conv2d, MaxPool2d, Flatten, Linear from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64) class Tudui(nn.Module): def __init__(self): super().__init__() self.module = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): x = self.module(x) return x tudui = Tudui() optim = torch.optim.SGD(tudui.parameters(), lr=0.01) # 随机梯度下降 loss = nn.CrossEntropyLoss() #开始优化,一共优化20次 for epoch in range(20): running_loss = 0.0 for data in dataloader: imgs, targets = data outputs = tudui(imgs) result_loss = loss(outputs, targets) optim.zero_grad()#对上一次的梯度进行清零 result_loss.backward()#得到本次的梯度 optim.step()#对本次进行优化 running_loss = running_loss + result_loss#得到总的误差 print(running_loss)
vgg16
import torchvision import torch from torch import nn vgg16_true=torchvision.models.vgg16() dataset=torchvision.datasets.CIFAR10("./dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True) #vgg16_true.classifier.add_module("name_linear",nn.Linear(1000,10)) 会直接再classifier中增加名字为name_linear的Linear层 #vgg16_true.add_module("name_linear",nn.Linear(1000,10)) #会再vgg_16的最后增加名字为name_linear的Linear层 vgg16_true.classifier[6]=nn.Linear(4096,10) #会直接修改vgg_16的classifier的序号为6的Linear层 print(vgg16_true)
import torch import torchvision.models from torch import nn vgg16 = torchvision.models.vgg16() # 保存方式1:模型结构+模型参数 torch.save(vgg16, "vgg16_method1.path") # 保存方式2:模型参数(官方推荐),会节约内存 torch.save(vgg16.state_dict(), "vgg16_method2.path") # 将模型的参数以字典的形式保存下来 # 如果是自己写的模型 class Tudui(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, input): output = input + 1 return output tudui = Tudui() torch.save(tudui, "tudui_method1.path")
import torch # 方式1,加载模型 import torchvision.models from module_save import * module = torch.load("vgg16_method1.path") # print(module)#这个module就是原来的vgg16 # 方式2,加载模型 module2 = torch.load("vgg16_method2.path") print(module) # 恢复原有的模型 vgg16 = torchvision.models.vgg16() vgg16.load_state_dict(module2) # 通过参数来加载模型 print(vgg16) # 如果是自己写的模型 # class Tudui(nn.Module): # def __init__(self) -> None: # super().__init__() # # def forward(self, input): # output = input + 1 # return output # # # tudui = Tudui() # 必须重新定义一遍或者从别的文件中导入 module3 = torch.load("tudui_method1.path") print(module3)
以CIFAR10数据集为例子进行训练
import torch import torchvision.datasets from torch import nn from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter train_data = torchvision.datasets.CIFAR10("./dataset", train=True, transform=torchvision.transforms.ToTensor(), download=True) # 训练数据集 test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) # 测试数据集 # length 长度 train_data_size = len(train_data) test_data_size = len(test_data) print("训练数据集的长度为:{}".format(train_data_size)) print("测试数据集的长度为:{}".format(test_data_size)) # 利用dataloader加载数据集 train_dataloader = DataLoader(train_data, batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) # 搭建神经网络,10分类的网络 class Tudui(nn.Module): def __init__(self): super().__init__() self.module = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): x = self.module(x) return x # 主函数,来验证模型的准确性 # if __name__ == '__main__': # tudui = Tudui() # input = torch.ones((64, 3, 32, 32)) # output = tudui(input) # print(output.shape) # 创建模型 tudui = Tudui() # 创建损失函数 loss_function = nn.CrossEntropyLoss() # 交叉熵 # 优化器 learning_rate = 0.01 # 学习速率 optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate) # 随机梯度下降 # 设置训练网络的一些参数 total_train_step = 0 # 记录训练的次数 total_test_step = 0 # 记录测试的次数 epoch = 10 # 训练的轮数 # 添加tensorboard writer = SummaryWriter("./train_logs") for i in range(epoch): print("-----第{}轮训练开始-----".format(i + 1)) for data in train_dataloader: imgs, targets = data inputs = tudui(imgs) loss = loss_function(inputs, targets) # 优化器梯度清零 optimizer.zero_grad() loss.backward() # 反向传播,得到优化器的梯度 optimizer.step() # 开始优化 total_train_step += 1 # 每一轮训练的次数 if total_train_step % 50 == 0: # 只有次数为50的倍数是才会输出,避免太多无用的数据 print("训练的次数:{},loss:{}".format(total_train_step, loss)) # 可以将loss变成loss.item() writer.add_scalar("train_loss", loss, total_train_step) # 一次训练结束 # 测试步骤开始,进行测试,看看与test比train优化的怎么样 total_test_loss = 0 # 对于整个数据集上的误差和 total_accuracy = 0 # 整体的正确次数 with torch.no_grad(): for data in test_dataloader: imgs, targets = data inputs = tudui(imgs) loss = loss_function(inputs, targets) # 求出这一次循环的误差 total_test_loss += loss.item() # 得到整体的误差 # 由于total_test_loss是一个普通的整数,而loss是一个tensor数据类型 # 因此要加上item(),得到一个整数 accuracy = (inputs.argmax(1) == targets).sum() total_accuracy += accuracy print("整体测试集上的Loss:{}".format(total_test_loss)) print("整体测试机上的正确率:{}".format(total_accuracy/test_data_size)) writer.add_scalar("test_loss", total_test_loss, total_test_step) writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step) total_test_step += 1 # 保存每一轮训练的数据 torch.save(tudui, "tudui_{}.path".format(i)) print("模型已保存")
第一种方法:
网络模型、数据(输入输出)、损失函数.cuda()
# 创建模型 tudui = Tudui() # 用GPU进行训练 if torch.cuda.is_available: tudui = tudui.cuda() # 创建损失函数 loss_function = nn.CrossEntropyLoss() # 交叉熵 # 用GPU进行训练 if torch.cuda.is_available(): loss_function = loss_function.cuda() imgs, targets = data if torch.cuda.is_available(): imgs = imgs.cuda() targets = targets.cuda()
第二种方法:
定义训练的设备
device = torch.device(“cuda”)
device = torch.device(“cpu”)
cuda就是用GPU进行训练,cpu就是用cpu进行训练
也可以写:语法糖
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#定义训练的设备 device =torch.device("cuda") # 创建模型 tudui = Tudui() # 用GPU进行训练 tudui = tudui.to(device) #也可以直接 tudui.to(device) # 创建损失函数 loss_function = nn.CrossEntropyLoss() # 交叉熵 # 用GPU进行训练 loss_function=loss_function.to(device) #也可以直接 loss_function.to(device) imgs=imgs.to(device) targets=targets.to(device) #imgs,targets必须重新进行赋值
import torch import torchvision.datasets import time from torch import nn from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter # 定义训练的设备 # device = torch.device("cuda") # 也可以这样写 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 语法糖 train_data = torchvision.datasets.CIFAR10("./dataset", train=True, transform=torchvision.transforms.ToTensor(), download=True) # 训练数据集 test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) # 测试数据集 # length 长度 train_data_size = len(train_data) test_data_size = len(test_data) print("训练数据集的长度为:{}".format(train_data_size)) print("测试数据集的长度为:{}".format(test_data_size)) # 利用dataloader加载数据集 train_dataloader = DataLoader(train_data, batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) # 搭建神经网络,10分类的网络 class Tudui(nn.Module): def __init__(self): super().__init__() self.module = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): x = self.module(x) return x # 主函数,来验证模型的准确性 # if __name__ == '__main__': # tudui = Tudui() # input = torch.ones((64, 3, 32, 32)) # output = tudui(input) # print(output.shape) # 创建模型 tudui = Tudui() # 用GPU进行训练 tudui = tudui.to(device) # 创建损失函数 loss_function = nn.CrossEntropyLoss() # 交叉熵 # 用GPU进行训练 loss_function = loss_function.to(device) # 优化器 learning_rate = 0.01 # 学习速率 optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate) # 随机梯度下降 # 设置训练网络的一些参数 total_train_step = 0 # 记录训练的次数 total_test_step = 0 # 记录测试的次数 epoch = 10 # 训练的轮数 # 添加tensorboard writer = SummaryWriter("./train_logs") # 设置起始时间 # start_time = time.time() for i in range(epoch): start_time = time.time() # 每一轮训练都会更新时间 print("-----第{}轮训练开始-----".format(i + 1)) for data in train_dataloader: imgs, targets = data imgs = imgs.to(device) targets = targets.to(device) inputs = tudui(imgs) loss = loss_function(inputs, targets) # 优化器梯度清零 optimizer.zero_grad() loss.backward() # 反向传播,得到优化器的梯度 optimizer.step() # 开始优化 total_train_step += 1 # 每一轮训练的次数 if total_train_step % 100 == 0: # 只有次数为50的倍数是才会输出,避免太多无用的数据 end_time = time.time() print("训练的时间:{}".format(end_time - start_time)) print("训练的次数:{},loss:{}".format(total_train_step, loss)) # 可以将loss变成loss.item() writer.add_scalar("train_loss", loss, total_train_step) # 一次训练结束 # 测试步骤开始,进行测试,看看与test比train优化的怎么样 total_test_loss = 0 # 对于整个数据集上的误差和 total_accuracy = 0 # 整体的正确次数 with torch.no_grad(): for data in test_dataloader: imgs, targets = data imgs = imgs.to(device) targets = targets.to(device) inputs = tudui(imgs) loss = loss_function(inputs, targets) # 求出这一次循环的误差 total_test_loss += loss.item() # 得到整体的误差 # 由于total_test_loss是一个普通的整数,而loss是一个tensor数据类型 # 因此要加上item(),得到一个整数 accuracy = (inputs.argmax(1) == targets).sum() total_accuracy += accuracy print("整体测试集上的Loss:{}".format(total_test_loss)) print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size)) writer.add_scalar("test_loss", total_test_loss, total_test_step) writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step) total_test_step += 1 # 保存每一轮训练的数据 torch.save(tudui, "tudui_{}.path".format(i)) print("模型已保存")
png是四通道的图片,RGB+透明度
img是三通道图片,RGB
image = image.convert(“RGB”),会保留其颜色通道
如果图片本来就是RGB,那么通道数不变
如果读取的模型是用GPU训练的,而现在在用CPU进行训练那么在加载的时候要指定映射位置map_location
module =torch.load("tudui.path",map_location=torch.device("cpu"))
from PIL import Image import torch import torchvision from torch import nn from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear image_path = "dog_image/dog.png" image = Image.open(image_path) transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), torchvision.transforms.ToTensor(), ]) image = transform(image) # 定义训练的设备 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 搭建神经网络,10分类的网络 class Tudui(nn.Module): def __init__(self): super().__init__() self.module = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): x = self.module(x) return x module = torch.load("tudui_0.path") # print(module) image = torch.reshape(image, (1, 3, 32, 32)) # 将图片改为对应的尺寸 image = image.to(device) # 将图片改为GPU训练 module.eval() # 将module模型改为测试类型 with torch.no_grad(): # 节约内存 input = module(image) print(input) print(input.argmax(1)) # 输出最大的类别
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。