当前位置:   article > 正文

如何给自己的模型添加自注意力模块_如何在自己的模型中加入注意力机制

如何在自己的模型中加入注意力机制

我使用的是预训练的模型,efficientnet-b4,数据集是人脸图像,注意力模块是添加在分类器之间

ps:调参数真的和炼丹一样,试了一天能才跑通代码,最后我也不知道是因为啥,感觉自己的自注意力模块还是没学太明白,很晕!不得不记录一下这个时刻~

背景:图像二分类问题

1.导入必要的包,以及配置环境

  1. !pip install efficientnet_pytorch
  2. import warnings
  3. warnings.filterwarnings('ignore')
  4. !pip install efficientnet_pytorch
  5. import pandas as pd
  6. import matplotlib.pyplot as plt
  7. import seaborn as sns
  8. import random
  9. import torch
  10. import torch.nn as nn
  11. import torch.optim as optim
  12. !pip install vision-transformer-pytorch
  13. from torchvision import transforms, models
  14. from sklearn.metrics import classification_report
  15. from sklearn.utils import shuffle
  16. from sklearn.metrics import confusion_matrix
  17. import seaborn as sns
  18. from torch.utils.tensorboard import SummaryWriter
  19. from torch.optim.lr_scheduler import ReduceLROnPlateau
  20. import numpy as np
  21. from tabulate import tabulate
  22. import os
  23. import glob
  24. import json
  25. import shutil
  26. from PIL import Image, ImageDraw
  27. import torchvision.transforms as transforms
  28. from torchvision.datasets import ImageFolder
  29. from torch.utils.data import DataLoader
  30. from efficientnet_pytorch import EfficientNet
  31. from torch import nn
  32. from vision_transformer_pytorch import VisionTransformer
  33. import torch.nn.functional as F
  34. import torch
  35. import torch.nn as nn
  36. import torchvision.models as models

2.设置训练模型的超参数

  1. #定义配置文件
  2. class Config:
  3. def __init__(self):
  4. #设置输入图像的大小
  5. self.image_width = 128
  6. self.image_height = 128
  7. self.epoch = 1
  8. self.seed = 42
  9. self.batch_size = 16 #batchsize的大小会影响最后的输出的维度,如batch_size=32,最后输出为32*1是1维向量
  10. self.dataset_path = '/kaggle/input/.../'
  11. # self.checkpoint_filepath = 'model_checkpoint.h5'
  12. # self.logs_path = '/kaggle/working/logs'
  13. #实例化配置函数
  14. config = Config()
  15. print("Checking Epoch Configuration:", config.epoch)

3.数据集的预处理

  1. dataset = {"image_path":[],"img_status":[],"where":[]}
  2. for where in os.listdir(config.dataset_path):
  3. for status in os.listdir(config.dataset_path+"/"+where):
  4. for image in glob.glob(os.path.join(config.dataset_path, where, status, "*.jpg")):
  5. dataset["image_path"].append(image)
  6. dataset["img_status"].append(status)
  7. dataset["where"].append(where)
  8. dataset = pd.DataFrame(dataset)
  9. #将数据集进行打乱,并对其进行升序排序
  10. dataset = shuffle(dataset)
  11. dataset = dataset.reset_index(drop=True)
  12. # 对训练集-数据集进行数据增强
  13. # 12/05 定义一些增强操作
  14. train_transform = transforms.Compose([
  15. transforms.RandomHorizontalFlip(), # 随机水平翻转
  16. transforms.RandomRotation(degrees=15), # 随机旋转
  17. transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # 颜色扭曲
  18. #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)), # 随机裁剪和缩放
  19. transforms.ToTensor(), # 转换为张量
  20. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化
  21. ])
  22. # Data Transformation for Validation and Testing
  23. val_test_transform = transforms.Compose([
  24. transforms.ToTensor(),
  25. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  26. ])
  27. # Data Loaders
  28. train_dataset = ImageFolder(os.path.join(config.dataset_path, 'train'), transform=train_transform)
  29. train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
  30. valid_dataset = ImageFolder(os.path.join(config.dataset_path, 'valid'), transform=val_test_transform)
  31. valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)
  32. test_dataset = ImageFolder(os.path.join(config.dataset_path, 'test'), transform=val_test_transform)
  33. test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)

4.定义自注意力模块

  1. class SelfAttention(nn.Module):
  2. def __init__(self, in_channels):
  3. super(SelfAttention, self).__init__()
  4. self.theta = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
  5. self.phi = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
  6. self.g = nn.Conv2d(in_channels, 64, kernel_size=1, stride=1)
  7. self.concat = nn.Conv2d(64, in_channels, kernel_size=1, stride=1)
  8. self.softmax = nn.Softmax(dim=-1)
  9. def forward(self, x):
  10. theta = self.theta(x)
  11. phi = self.phi(x)
  12. g = self.g(x)
  13. theta = theta.view(x.size(0), -1, x.size(2) * x.size(3))
  14. phi = phi.view(x.size(0), -1, x.size(2) * x.size(3))
  15. g = g.view(x.size(0), -1, x.size(2) * x.size(3))
  16. # print("Theta shape:", theta.shape)
  17. # print("Phi shape:", phi.shape)
  18. # print("G shape:", g.shape)
  19. theta = theta.permute(0, 2, 1)
  20. attn = torch.matmul(theta, phi)
  21. attn = self.softmax(attn)
  22. g = g.permute(0, 2, 1)
  23. attn_g = torch.matmul(attn, g)
  24. attn_g = attn_g.permute(0, 2, 1)
  25. attn_g = attn_g.view(x.size(0), g.size(1), x.size(2), x.size(3))
  26. attn_g = self.concat(attn_g)
  27. return attn_g + x
  28. # Continue with the rest of your attention mechanism...

加之后

5.自定义的预训练模型

  1. class EfficientNetWithAttention(nn.Module):
  2. def __init__(self, num_classes, pretrained=True, attention_channels=1792):
  3. super(EfficientNetWithAttention, self).__init__()
  4. # Load the pre-trained EfficientNet as a feature extractor
  5. efficientnet = models.efficientnet_b4(pretrained=pretrained)
  6. self.features = efficientnet.features
  7. # Add custom head
  8. self.attention = SelfAttention(attention_channels)
  9. self.avg_pool = nn.AdaptiveAvgPool2d(1)
  10. self.fc = nn.Linear(attention_channels, num_classes)
  11. self.val_loss = []
  12. self.val_accuracy = []
  13. self.test_loss = []
  14. self.test_accuracy = []
  15. self.train_loss = []
  16. self.train_accuracy = []
  17. def forward(self, x):
  18. # Forward pass through EfficientNet feature extractor
  19. x = self.features(x)
  20. # Apply self-attention module
  21. x = self.attention(x)
  22. # Global average pooling
  23. x = self.avg_pool(x)
  24. x = x.view(x.size(0), -1)
  25. # Fully connected layer for classification
  26. x = self.fc(x)
  27. return x
  28. def print_model_summary(self):
  29. print(self.model)
  30. print("Model Summary:")
  31. total_params = sum(p.numel() for p in self.parameters())
  32. print(f"Total Parameters: {total_params}")
  33. trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
  34. print(f"Trainable Parameters: {trainable_params}")
  35. def plot_metrics_graph(self):
  36. epochs = range(1, len(self.train_loss) + 1)
  37. plt.figure(figsize=(12, 8))
  38. plt.subplot(2, 1, 1)
  39. plt.plot(epochs, self.train_loss, label='Train Loss', linewidth=2, color='blue')
  40. plt.plot(epochs, self.val_loss, label='Validation Loss', linewidth=2, color='orange')
  41. plt.plot(epochs, self.test_loss, label='Test Loss', linewidth=2, color='green')
  42. plt.xlabel('Epochs')
  43. plt.ylabel('Loss')
  44. plt.title('Training ,Test and Validation Loss')
  45. plt.legend()
  46. plt.subplot(2, 1, 2)
  47. plt.plot(epochs, self.train_accuracy, label='Train Accuracy', linewidth=2, color='green')
  48. plt.plot(epochs, self.val_accuracy, label='Validation Accuracy', linewidth=2, color='red')
  49. plt.xlabel('Epochs')
  50. plt.ylabel('Accuracy')
  51. plt.title('Training and Validation Accuracy')
  52. plt.legend()
  53. plt.tight_layout()
  54. plt.show()
  55. def plot_confusion_matrix(self, y_true, y_pred):
  56. cm = confusion_matrix(y_true, y_pred)
  57. plt.figure(figsize=(8, 6))
  58. sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
  59. plt.xlabel("Predicted Labels")
  60. plt.ylabel("True Labels")
  61. plt.title("Confusion Matrix")
  62. plt.show()
  63. def train_model(self, train_loader, valid_loader, num_epochs, device):
  64. criterion = nn.BCEWithLogitsLoss() # Binary Cross-Entropy loss
  65. optimizer = optim.Adam(self.parameters(), lr=0.001)
  66. scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, verbose=True, min_lr=1e-6)
  67. for epoch in range(num_epochs):
  68. self.train() # Set the model to training mode
  69. total_loss = 0.0
  70. correct_train = 0
  71. total_train = 0
  72. print(f"Epoch [{epoch+1}/{num_epochs}] - Training...")
  73. for batch_idx, (inputs, labels) in enumerate(train_loader):
  74. inputs, labels = inputs.to(device), labels.to(device)
  75. optimizer.zero_grad()
  76. outputs = self(inputs)
  77. loss = criterion(outputs, labels.float().unsqueeze(1))
  78. loss.backward()
  79. optimizer.step()
  80. total_loss += loss.item() * inputs.size(0)
  81. predicted_labels = (outputs >= 0.0).float()
  82. correct_train += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
  83. total_train += labels.size(0)
  84. print(f"Epoch [{epoch+1}/{num_epochs}] - Batch [{batch_idx+1}/{len(train_loader)}] - "
  85. f"Loss: {loss.item():.4f} - Train Accuracy: {correct_train / total_train:.4f}")
  86. average_loss = total_loss / len(train_loader.dataset)
  87. train_accuracy = correct_train / total_train
  88. self.train_loss.append(average_loss)
  89. self.train_accuracy.append(train_accuracy)
  90. self.eval()
  91. total_val_loss = 0.0
  92. correct_val = 0
  93. total_val = 0
  94. y_true = []
  95. y_pred = []
  96. with torch.no_grad():
  97. for inputs, labels in valid_loader:
  98. inputs, labels = inputs.to(device), labels.to(device)
  99. outputs = self(inputs)
  100. val_loss = criterion(outputs, labels.float().unsqueeze(1))
  101. total_val_loss += val_loss.item() * inputs.size(0)
  102. predicted_labels = (outputs >= 0.0).float()
  103. correct_val += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
  104. total_val += labels.size(0)
  105. y_true.extend(labels.float().unsqueeze(1).cpu().numpy())
  106. y_pred.extend(predicted_labels.cpu().numpy())
  107. average_val_loss = total_val_loss / len(valid_loader.dataset)
  108. val_accuracy = correct_val / total_val
  109. self.val_loss.append(average_val_loss)
  110. self.val_accuracy.append(val_accuracy)
  111. print(f"Epoch [{epoch+1}/{num_epochs}] - "
  112. f"Train Loss: {average_loss:.4f} - Train Accuracy: {train_accuracy:.4f} - "
  113. f"Val Loss: {average_val_loss:.4f} - Val Accuracy: {val_accuracy:.4f} - "
  114. f"LR: {scheduler.optimizer.param_groups[0]['lr']:.6f}")
  115. scheduler.step(average_val_loss)
  116. self.plot_metrics_graph()
  117. self.plot_confusion_matrix(y_true, y_pred)

6.开始训练

  1. # 初始化模型
  2. num_classes = 1 # 你的类别数
  3. model = EfficientNetWithAttention(num_classes=num_classes, pretrained=True)
  4. # 输出模型结构
  5. #print(model)
  6. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  7. model.to(device)
  8. # Train the model using the integrated training loop
  9. num_epochs = config.epoch # Change this in last
  10. model.train_model(train_loader, valid_loader, num_epochs, device)
  11. #torch.save(model.state_dict(), 'model_efficient_b4.pth')

7.训练结果

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/337450
推荐阅读
相关标签
  

闽ICP备14008679号