图1为一张甲骨文原始拓片的图像分割示例,左图为一整张甲骨文原始拓片,右图即为利用图像分割算法[4]实现的拓片图像上甲骨文的单字分割。甲骨文的同一个字会有很多异体字,这无疑增加了 甲骨文识别的难度,图2展示了甲骨文中“人”字的不同异体字。
问题1:对于附件1(Pre test 文件夹)给定的三张甲骨文原始拓片图片进行图像预处理,提取图像特征,建立甲骨文图像预处理模型,实现对甲骨文图像千扰元素的初步判别和处理。
- import cv2
- import numpy as np
- import torch
- import matplotlib.image as mpimg
- import matplotlib.pyplot as plt
- # 读取图像
- image_path1 = r'cup_data\1_Pre_test\h02060.jpg'
- image_path2 = r'cup_data\1_Pre_test\w01637.jpg'
- image_path3 = r'cup_data\1_Pre_test\w01870.jpg'
- image1 = cv2.imread(image_path1)
- image2 = cv2.imread(image_path2)
- image3 = cv2.imread(image_path3)
- # 定义目标尺寸
- target_size = (416, 416) # YOLOv5 推荐的尺寸
- # 调整大小
- resized_image1 = cv2.resize(image1, target_size)
- resized_image2 = cv2.resize(image2, target_size)
- resized_image3 = cv2.resize(image3, target_size)
- # 将图像归一化为 [0, 1]
- normalized_image = []
- normalized_image.append(resized_image1.astype(np.float32) / 255.0)
- normalized_image.append(resized_image2.astype(np.float32) / 255.0)
- normalized_image.append(resized_image3.astype(np.float32) / 255.0)
- # 如果需要旋转,可以在这里进行旋转操作
- # 将图像转换为 PyTorch 的 Tensor 格式,并添加批次维度
- tensor_image = torch.tensor(normalized_image[0]).permute(2, 0, 1).unsqueeze(0)
- # 现在 tensor_image 就是你所需的输入数据,准备用于 YOLOv5 模型
- plt.imshow(image1)
- plt.axis('off')
- plt.show()

- # yolov5 训练代码
- def main(opt, callbacks=Callbacks()):
- """Runs training or hyperparameter evolution with specified options and optional callbacks."""
- if RANK in {-1, 0}:
- print_args(vars(opt))
- check_git_status()
- check_requirements(ROOT / "requirements.txt")
- # Resume (from specified or most recent last.pt)
- if opt.resume and not check_comet_resume(opt) and not opt.evolve:
- last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
- opt_yaml = last.parent.parent / "opt.yaml" # train options yaml
- opt_data = opt.data # original dataset
- if opt_yaml.is_file():
- with open(opt_yaml, errors="ignore") as f:
- d = yaml.safe_load(f)
- else:
- d = torch.load(last, map_location="cpu")["opt"]
- opt = argparse.Namespace(**d) # replace
- opt.cfg, opt.weights, opt.resume = "", str(last), True # reinstate
- if is_url(opt_data):
- opt.data = check_file(opt_data) # avoid HUB resume auth timeout
- else:
- opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
- check_file(opt.data),
- check_yaml(opt.cfg),
- check_yaml(opt.hyp),
- str(opt.weights),
- str(opt.project),
- ) # checks
- assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
- if opt.evolve:
- if opt.project == str(ROOT / "runs/train"): # if default project name, rename to runs/evolve
- opt.project = str(ROOT / "runs/evolve")
- opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
- if opt.name == "cfg":
- opt.name = Path(opt.cfg).stem # use model.yaml as name
- opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))

- # 训练结果可视化,我们微调的模型在训练集上识别准确率如下图所示:(我们提供了三张训练集上的识别准确率图像)
- import matplotlib.pyplot as plt
- import cv2
- # 读取图像
- image_path1 = "jupyter_need/test_datasets0.jpg"
- image_path2 = "jupyter_need/test_datasets1.jpg"
- image_path3 = "jupyter_need/test_datasets2.jpg"
- image1 = cv2.imread(image_path1)
- image2 = cv2.imread(image_path2)
- image3 = cv2.imread(image_path3)
- # 创建一个具有三个子图的图形
- plt.figure(figsize=(15, 5))
- # 在第一个子图中显示第一张图片
- plt.subplot(1, 3, 1)
- plt.imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
- plt.axis('off')
- # 在第二个子图中显示第二张图片
- plt.subplot(1, 3, 2)
- plt.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))
- plt.axis('off')
- # 在第三个子图中显示第三张图片
- plt.subplot(1, 3, 3)
- plt.imshow(cv2.cvtColor(image3, cv2.COLOR_BGR2RGB))
- plt.axis('off')
- # 展示图片
- plt.show()

# 为了评估我们模型的性能,我们计算了在训练集上50个epoch的F1_CURVE、P_CURVE、R_CURVE和PR_CURVE # F1 曲线是用于评估二元分类器性能的一种图形化指标。它显示了在不同阈值下的 F1 分数随着真阳性率(召回率)的变化情况。 # F1 分数是精确度(Precision)和召回率(Recall)的调和平均值,它可以帮助我们在精确度和召回率之间找到一个平衡点。 # "P curve" 通常指代 "Precision-Recall curve",即精确度-召回率曲线。它是用于评估分类模型性能的一种常见工具,特别是在处理不平衡数据集时。 # Precision-Recall 曲线显示了在不同阈值下的精确度和召回率之间的关系。精确度(Precision)是被正确分类的正例占所有被分类为正例的样本的比例, # 召回率(Recall)是被正确分类的正例占所有实际正例的样本的比例。 # 绘制 P-R 曲线的过程类似于绘制 ROC 曲线,只是在 P-R 曲线中,横轴通常是召回率,纵轴是精确度。在绘制过程中, # 可以通过在模型输出的概率或得分上变化阈值,计算不同阈值下的精确度和召回率,并绘制曲线。 # P-R 曲线对于不平衡数据集的分类器评估尤为重要,因为它可以更清晰地显示出分类器在不同类别之间的性能差异。 # 通常情况下,当类别不平衡时,使用 P-R 曲线比 ROC 曲线更能展现出分类器的优势和缺陷。 # 以下是这些指标的具体结果图:
- import matplotlib.pyplot as plt
- import cv2
- # 读取图像
- image_path1 = "jupyter_need/F1_curve.png"
- image_path2 = "jupyter_need/P_curve.png"
- image_path3 = "jupyter_need/PR_curve.png"
- image1 = cv2.imread(image_path1)
- image2 = cv2.imread(image_path2)
- image3 = cv2.imread(image_path3)
- # 创建一个具有三个子图的图形
- plt.figure(figsize=(15, 5))
- # 在第一个子图中显示第一张图片
- plt.subplot(1, 2, 1)
- plt.imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
- plt.axis('off')
- # 在第二个子图中显示第二张图片
- plt.subplot(1, 2, 2)
- plt.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))
- plt.axis('off')
- # 展示图片
- plt.show()

- # 利用第二问(前面)微调好的yolo,我们在对附件三中的200张甲骨文原始图像进行自动单字分割,分割结果保存在Test_results.xlsx文件中
- #自动分割代码如下:
- def main(opt):
- """Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
- check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
- if opt.task in ("train", "val", "test"): # run normally
- if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466
- LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
- if opt.save_hybrid:
- LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
- run(**vars(opt))
- else:
- weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
- opt.half = torch.cuda.is_available() and opt.device != "cpu" # FP16 for fastest results
- if opt.task == "speed": # speed benchmarks
- # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
- opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
- for opt.weights in weights:
- run(**vars(opt), plots=False)

- # inception model 训练代码:
- import json
- import os
- import torch
- import torch.nn as nn
- import torchvision.models as models
- from torchvision import transforms
- from torch.utils.data import DataLoader, Dataset
- from PIL import Image
- from sklearn.metrics import f1_score
- from torch.optim.lr_scheduler import StepLR
- from tqdm import tqdm
- # 检查CUDA是否可用,并设置设备
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- # 构建自定义数据集
- class CustomDataset(Dataset):
- def __init__(self, file_paths, labels, transform=None):
- self.file_paths = file_paths
- self.labels = labels
- self.transform = transform
- def __len__(self):
- return len(self.file_paths)
- def __getitem__(self, idx):
- image = Image.open(self.file_paths[idx]).convert('RGB')
- label = torch.tensor(self.labels[idx], dtype=torch.float32)
- if self.transform:
- image = self.transform(image)
- return image, label
- # 超参数
- batch_size = 32
- num_epochs = 50
- learning_rate = 0.001
- num_classes = 76 # 假设有10个类别
- # 转换图像
- transform = transforms.Compose([
- transforms.Resize((299, 299)),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
- ])
- train_file_dir = "q4_data/train/image"
- train_file_paths = os.listdir(train_file_dir)
- # 给每个文件路径添加文件夹路径
- train_file_paths = [os.path.join(train_file_dir, file_name) for file_name in train_file_paths]
- # 从JSON文件中读取数据
- with open("q4_train_inception.json", "r") as json_file:
- train_labels = json.load(json_file)
- # 数据加载
- train_dataset = CustomDataset(train_file_paths, train_labels, transform=transform)
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
- # 加载预训练的InceptionV3模型
- model = models.inception_v3(pretrained=True)
- model.aux_logits = False # 禁用辅助输出
- # # 冻结模型参数
- # for param in model.parameters():
- # param.requires_grad = False
- # 修改最后一层全连接层以适应多标签分类任务
- num_ftrs = model.fc.in_features
- model.fc = nn.Sequential(
- nn.Linear(num_ftrs, 512),
- nn.ReLU(inplace=True),
- nn.Linear(512, num_classes),
- nn.Sigmoid() # 多标签分类使用Sigmoid激活函数
- )
- model = model.to(device)
- # 定义损失函数和优化器
- criterion = nn.BCELoss()
- optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
- # 学习率调度器
- scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
- # 训练模型
- total_step = len(train_loader)
- for epoch in tqdm(range(num_epochs)):
- model.train()
- for i, (images, labels) in enumerate(tqdm(train_loader)):
- images = images.to(device)
- labels = labels.to(device)
- # 前向传播
- outputs = model(images)
- # 计算损失
- loss = criterion(outputs, labels)
- # 反向传播和优化
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- if (i + 1) % 1000 == 0:
- print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
- .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
- # 调整学习率
- scheduler.step()
- # 评估循环
- model.eval()
- with torch.no_grad():
- # 初始化预测和标签列表
- all_preds = []
- all_labels = []
- correct_predictions = 0
- total_predictions = 0
- for images, labels in tqdm(train_loader):
- images = images.to(device)
- labels = labels.to(device)
- # 前向传播
- outputs = model(images)
- predicted = outputs > 0.5
- # 计算准确率
- correct_predictions += (predicted == labels.byte()).all(1).sum().item()
- total_predictions += labels.size(0)
- # 收集预测和真实标签
- all_preds.extend(predicted.cpu().numpy())
- all_labels.extend(labels.cpu().numpy())
- accuracy = correct_predictions / total_predictions
- f1 = f1_score(all_labels, all_preds, average='micro')
- print('Epoch [{}/{}], Accuracy: {:.4f}, F1 Score: {:.4f}'.format(epoch + 1, num_epochs, accuracy, f1))
- # 保存模型
- torch.save(model.state_dict(), f'ckpt/inceptionv3_ft_{epoch}_f1_{f1:.4f}_acc_{accuracy:.4f}.pth')

利用我们预处理后的数据,在inception模型上训练11个epoch后,在训练集上,模型准确率达到99.4%,F1值达到99.6% 接下来,我们利用训练好的inception模型对附件四甲骨文原始图像进行文字自动识别
