当前位置:   article > 正文

使用迁移学习的方法来训练自己的数据集_迁移训练怎么做

迁移训练怎么做

使用 ImageFolder 类加载图像数据集,其中指定了对应的变换操作,最后通过 DataLoader 类来创建数据加载器,以便于在训练过程中以批次方式获取数据。用到的预训练的模型:efficientnet_b4(模型可以根据自己的需要进行更改)

数据集:只要是分类问题的数据集都可以!

可以在kaggle上或者在本地上运行,建议使用kaggle(比较方便而且还有免费的数据集)

第一步:

导入依赖的库文件

  1. import warnings
  2. warnings.filterwarnings('ignore')
  3. !pip install efficientnet_pytorch
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. import random
  8. import torch
  9. import torch.nn as nn
  10. import torch.optim as optim
  11. !pip install vision-transformer-pytorch
  12. from torchvision import transforms, models
  13. from sklearn.metrics import classification_report
  14. from sklearn.utils import shuffle
  15. from sklearn.metrics import confusion_matrix
  16. import seaborn as sns
  17. from torch.utils.tensorboard import SummaryWriter
  18. from torch.optim.lr_scheduler import ReduceLROnPlateau
  19. import numpy as np
  20. from tabulate import tabulate
  21. import os
  22. import glob
  23. import json
  24. import shutil
  25. from PIL import Image, ImageDraw
  26. import torchvision.transforms as transforms
  27. from torchvision.datasets import ImageFolder
  28. from torch.utils.data import DataLoader
  29. from efficientnet_pytorch import EfficientNet

第二步:

定义配置文件,训练模型时使用到的超参数

  1. #定义配置文件
  2. class Config:
  3. def __init__(self):
  4. #设置输入图像的大小
  5. self.image_width = 64
  6. self.image_height = 64
  7. self.epoch = 3 #训练的次数,可以根据实际情况进行调整
  8. self.seed = 42
  9. self.batch_size = 32 #batchsize的大小会影响最后的输出的长度,如batch_size=32,最后输出为#32*1,32个样本作为1组进行输出,最后应该是一维的向量,长度是batch_size的大小
  10. self.dataset_path = '这里是数据集的路径,如path/to/mydatasets/'
  11. # self.checkpoint_filepath = 'model_checkpoint.h5'
  12. # self.logs_path = '/kaggle/working/logs'
  13. #实例化配置函数
  14. config = Config()
  15. print("Checking Epoch Configuration:", config.epoch)

第三步:数据集的准备及预处理

  1. #遍历dataset_path路径目录下的图像文件,提取这些图像的路径、状态和位置信息
  2. #然后将这些信息存储在一个 Pandas DataFrame 中,以方便后续对数据的处理和分析。
  3. #其中包含3个键值对,分别存储图像的路径,图像的状态,图像所在的位置信息
  4. dataset = {"image_path":[],"img_status":[],"where":[]}#创建字典
  5. for where in os.listdir(config.dataset_path):
  6. for status in os.listdir(config.dataset_path+"/"+where):
  7. #使用 glob.glob 函数获取符合特定条件(以.jpg为扩展名)的图像文件的路径。
  8. for image in glob.glob(os.path.join(config.dataset_path, where, status, "*.jpg")):
  9. #将每个图像的路径、状态和位置信息分别添加到 dataset 字典对应的列表中。
  10. dataset["image_path"].append(image)
  11. dataset["img_status"].append(status)
  12. dataset["where"].append(where)
  13. #将字典转换为 Pandas DataFrame,其中每个键对应 DataFrame 的一列。
  14. #最终,每行包含一个图像的路径、状态和位置信息。
  15. dataset = pd.DataFrame(dataset)
  16. # 将数据集进行打乱
  17. dataset = shuffle(dataset)
  18. # 重新设置索引,并将原来的索引丢弃,使索引重新按照顺序排列
  19. dataset = dataset.reset_index(drop=True)
  1. # 对训练集-数据集进行数据增强
  2. train_transform = transforms.Compose([
  3. transforms.RandomHorizontalFlip(),
  4. transforms.ToTensor(),
  5. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  6. ])
  7. # 12/05 定义一些增强操作
  8. train_transform = transforms.Compose([
  9. transforms.RandomHorizontalFlip(), # 随机水平翻转
  10. transforms.RandomRotation(degrees=15), # 随机旋转
  11. transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # 颜色扭曲
  12. #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)), # 随机裁剪和缩放
  13. transforms.ToTensor(), # 转换为张量
  14. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化
  15. ])
  16. # Data Transformation for Validation and Testing
  17. val_test_transform = transforms.Compose([
  18. transforms.ToTensor(),
  19. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  20. ])
  21. # Data Loaders使用 ImageFolder 类加载图像数据集,其中指定了对应的变换操作
  22. #最后通过 DataLoader 类来创建数据加载器,以便于在训练过程中以批次方式获取数据
  23. train_dataset = ImageFolder(os.path.join(config.dataset_path, 'train'), transform=train_transform)
  24. train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
  25. valid_dataset = ImageFolder(os.path.join(config.dataset_path, 'valid'), transform=val_test_transform)
  26. valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)
  27. test_dataset = ImageFolder(os.path.join(config.dataset_path, 'test'), transform=val_test_transform)
  28. test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)
  1. #train
  2. print("Train Dataset: ", train_dataset)
  3. print("Train Loader: ", train_loader)
  4. #valid
  5. print("Valid Dataset: ", valid_dataset)
  6. print("Valid Loader: ", valid_loader)
  7. #test
  8. print("Test Dataset: ", test_dataset)
  9. print("Test Loader: ", test_loader)

第四步:

定义训练模型,MyModel类通过扩展nn来定义神经网络模型。模块利用预先训练的模型架构,替换其完全连接的层,提供可视化,并总结其架构和参数。

  1. class MyModel(nn.Module):
  2. def __init__(self, model_name, num_classes):
  3. super(MyModel, self).__init__()
  4. self.model = models.__dict__[model_name](pretrained=True)
  5. #vgg模型
  6. if 'vgg' in model_name:
  7. num_features = self.model.classifier[6].in_features
  8. self.model.classifier[6] = nn.Sequential(
  9. nn.Linear(num_features, 512),
  10. nn.ReLU(),
  11. nn.BatchNorm1d(512),
  12. nn.Linear(512, num_classes)
  13. )
  14. #mobilenet模型
  15. elif 'mobilenet' in model_name:
  16. num_features = self.model.classifier[1].in_features
  17. self.model.classifier = nn.Sequential(
  18. nn.Linear(num_features, 512),
  19. nn.ReLU(),
  20. nn.BatchNorm1d(512),
  21. nn.Linear(512, num_classes)
  22. )
  23. #efficientnet模型
  24. elif 'efficientnet_b4' in model_name:
  25. # 加载预训练的 EfficientNet 模型
  26. self.model = EfficientNet.from_pretrained('efficientnet-b4')
  27. feature = self.model._fc.in_features
  28. self.model._fc = nn.Linear(in_features=feature, out_features=num_classes, bias=True)
  29. #print(model)
  30. #vision transformer 模型,目前没有调试正确,上面的3个模型都可以试一下,均可以运行
  31. elif 'vit' in model_name:
  32. # Use the correct name for the Vision Transformer model
  33. # Define the pre-trained ViT model string
  34. model_str = 'google/vit-base-patch16-224-in21k'
  35. # Create a processor for ViT model input from the pre-trained model
  36. processor = ViTImageProcessor.from_pretrained(model_str)
  37. self.vit_model = vision_transformer.vit_base_patch16_224(pretrained=True)
  38. in_features = self.vit_model.head.in_features
  39. self.vit_model.head = nn.Linear(in_features, num_classes)
  40. else:
  41. num_features = self.model.fc.in_features
  42. self.model.fc = nn.Sequential(
  43. nn.Linear(num_features, 512),
  44. nn.ReLU(),
  45. nn.BatchNorm1d(512),
  46. nn.Linear(512, num_classes)
  47. )
  48. self.val_loss = []
  49. self.val_accuracy = []
  50. self.test_loss = []
  51. self.test_accuracy = []
  52. self.train_loss = []
  53. self.train_accuracy = []
  54. def forward(self, x):
  55. return self.model(x)
  56. def print_model_summary(self):
  57. print(self.model)
  58. print("Model Summary:")
  59. total_params = sum(p.numel() for p in self.parameters())
  60. print(f"Total Parameters: {total_params}")
  61. trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
  62. print(f"Trainable Parameters: {trainable_params}")
  63. def plot_metrics_graph(self):
  64. epochs = range(1, len(self.train_loss) + 1)
  65. plt.figure(figsize=(12, 8))
  66. plt.subplot(2, 1, 1)
  67. plt.plot(epochs, self.train_loss, label='Train Loss', linewidth=2, color='blue')
  68. plt.plot(epochs, self.val_loss, label='Validation Loss', linewidth=2, color='orange')
  69. plt.plot(epochs, self.test_loss, label='Test Loss', linewidth=2, color='green')
  70. plt.xlabel('Epochs')
  71. plt.ylabel('Loss')
  72. plt.title('Training ,Test and Validation Loss')
  73. plt.legend()
  74. plt.subplot(2, 1, 2)
  75. plt.plot(epochs, self.train_accuracy, label='Train Accuracy', linewidth=2, color='green')
  76. plt.plot(epochs, self.val_accuracy, label='Validation Accuracy', linewidth=2, color='red')
  77. plt.xlabel('Epochs')
  78. plt.ylabel('Accuracy')
  79. plt.title('Training and Validation Accuracy')
  80. plt.legend()
  81. plt.tight_layout()
  82. plt.show()
  83. def plot_confusion_matrix(self, y_true, y_pred):
  84. cm = confusion_matrix(y_true, y_pred)
  85. plt.figure(figsize=(8, 6))
  86. sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
  87. plt.xlabel("Predicted Labels")
  88. plt.ylabel("True Labels")
  89. plt.title("Confusion Matrix")
  90. plt.show()
  91. def train_model(self, train_loader, valid_loader, num_epochs, device):
  92. criterion = nn.BCEWithLogitsLoss() # Binary Cross-Entropy loss
  93. optimizer = optim.Adam(self.parameters(), lr=0.001)
  94. scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, verbose=True, min_lr=1e-6)
  95. for epoch in range(num_epochs):
  96. self.train() # Set the model to training mode
  97. total_loss = 0.0
  98. correct_train = 0
  99. total_train = 0
  100. print(f"Epoch [{epoch+1}/{num_epochs}] - Training...")
  101. for batch_idx, (inputs, labels) in enumerate(train_loader):
  102. inputs, labels = inputs.to(device), labels.to(device)
  103. optimizer.zero_grad()
  104. outputs = self(inputs)
  105. loss = criterion(outputs, labels.float().unsqueeze(1))
  106. loss.backward()
  107. optimizer.step()
  108. total_loss += loss.item() * inputs.size(0)
  109. predicted_labels = (outputs >= 0.0).float()
  110. correct_train += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
  111. total_train += labels.size(0)
  112. print(f"Epoch [{epoch+1}/{num_epochs}] - Batch [{batch_idx+1}/{len(train_loader)}] - "
  113. f"Loss: {loss.item():.4f} - Train Accuracy: {correct_train / total_train:.4f}")
  114. average_loss = total_loss / len(train_loader.dataset)
  115. train_accuracy = correct_train / total_train
  116. self.train_loss.append(average_loss)
  117. self.train_accuracy.append(train_accuracy)
  118. self.eval()
  119. total_val_loss = 0.0
  120. correct_val = 0
  121. total_val = 0
  122. y_true = []
  123. y_pred = []
  124. with torch.no_grad():
  125. for inputs, labels in valid_loader:
  126. inputs, labels = inputs.to(device), labels.to(device)
  127. outputs = self(inputs)
  128. val_loss = criterion(outputs, labels.float().unsqueeze(1))
  129. total_val_loss += val_loss.item() * inputs.size(0)
  130. predicted_labels = (outputs >= 0.0).float()
  131. correct_val += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
  132. total_val += labels.size(0)
  133. y_true.extend(labels.float().unsqueeze(1).cpu().numpy())
  134. y_pred.extend(predicted_labels.cpu().numpy())
  135. average_val_loss = total_val_loss / len(valid_loader.dataset)
  136. val_accuracy = correct_val / total_val
  137. self.val_loss.append(average_val_loss)
  138. self.val_accuracy.append(val_accuracy)
  139. print(f"Epoch [{epoch+1}/{num_epochs}] - "
  140. f"Train Loss: {average_loss:.4f} - Train Accuracy: {train_accuracy:.4f} - "
  141. f"Val Loss: {average_val_loss:.4f} - Val Accuracy: {val_accuracy:.4f} - "
  142. f"LR: {scheduler.optimizer.param_groups[0]['lr']:.6f}")
  143. scheduler.step(average_val_loss)
  144. self.plot_metrics_graph()
  145. self.plot_confusion_matrix(y_true, y_pred)

将模型加载到cpu上

  1. # Instantiate the mymodel model
  2. #num_classes = 1# Change this to your number of classes,全连接这里应该是1
  3. #model_name = "resnet18" # Change this to any model available in torchvision
  4. #model_name = "vgg16"
  5. num_classes = 1
  6. model_name = "efficientnet_b4"
  7. model = MyModel(model_name=model_name, num_classes=num_classes)
  8. # Move the model to GPU if available
  9. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  10. model.to(device)
  11. # Print model summary
  12. #model.print_model_summary()

训练模型并保存训练好的参数

  1. # Train the model using the integrated training loop
  2. num_epochs = config.epoch # Change this in last
  3. model.train_model(train_loader, valid_loader, num_epochs, device)
  4. torch.save(model.state_dict(), 'model_efficient_b4.pth')

完成!

代码直接一行行拷贝,粘贴,运行就可咯~

对于使用vit进行训练,暂时没有更改正确,先挖坑,后续会了再填坑

2023.12.10

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/287850
推荐阅读
相关标签
  

闽ICP备14008679号