赞
踩
一、写在前面
截至上期,我们一直都在做二分类的任务,无论是之前的机器学习任务,还是最近更新的图像分类任务。然而,在实际工作中,我们大概率需要进行多分类任务。例如肺部胸片可不仅仅能诊断肺结核,还有COVID-19、细菌性(病毒性)肺炎等等,这就涉及到图像识别的多分类任务。
本期以健康组、肺结核组、COVID-19组、细菌性(病毒性)肺炎组为数据集,构建Mobilenet多分类模型,原因还是因为它建模速度快。
同样,基于GPT-4辅助编程,改写过程见后面。
二、误判病例分析实战
使用胸片的数据集:肺结核病人和健康人的胸片的识别。其中,健康人900张,肺结核病人700张,COVID-19病人549张、细菌性(病毒性)肺炎组900张,分别存入单独的文件夹中。
(a)直接分享代码
- ######################################导入包###################################
- from tensorflow import keras
- import tensorflow as tf
- from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout, Activation, Reshape, Softmax, GlobalAveragePooling2D, BatchNormalization
- from tensorflow.python.keras.layers.convolutional import Convolution2D, MaxPooling2D
- from tensorflow.python.keras import Sequential
- from tensorflow.python.keras import Model
- from tensorflow.python.keras.optimizers import adam_v2
- import numpy as np
- import matplotlib.pyplot as plt
- from tensorflow.python.keras.preprocessing.image import ImageDataGenerator, image_dataset_from_directory
- from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomFlip, RandomRotation, RandomContrast, RandomZoom, RandomTranslation
- import os,PIL,pathlib
- import warnings
- #设置GPU
- gpus = tf.config.list_physical_devices("GPU")
-
- if gpus:
- gpu0 = gpus[0] #如果有多个GPU,仅使用第0个GPU
- tf.config.experimental.set_memory_growth(gpu0, True) #设置GPU显存用量按需使用
- tf.config.set_visible_devices([gpu0],"GPU")
-
- warnings.filterwarnings("ignore") #忽略警告信息
- plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
- plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
-
-
- ################################导入数据集#####################################
- #1.导入数据
- #1.导入数据
- data_dir = "./MTB-1" # 修改了路径
- data_dir = pathlib.Path(data_dir)
- image_count = len(list(data_dir.glob('*/*')))
- print("图片总数为:",image_count)
-
- batch_size = 32
- img_height = 100
- img_width = 100
-
- train_ds = image_dataset_from_directory(
- data_dir,
- validation_split=0.2,
- subset="training",
- seed=12,
- image_size=(img_height, img_width),
- batch_size=batch_size)
-
- val_ds = image_dataset_from_directory(
- data_dir,
- validation_split=0.2,
- subset="validation",
- seed=12,
- image_size=(img_height, img_width),
- batch_size=batch_size)
-
- class_names = train_ds.class_names
- print(class_names)
- print(train_ds)
-
-
- #2.检查数据
- for image_batch, labels_batch in train_ds:
- print(image_batch.shape)
- print(labels_batch.shape)
- break
-
- #3.配置数据
- AUTOTUNE = tf.data.AUTOTUNE
-
- def train_preprocessing(image,label):
- return (image/255.0,label)
-
- train_ds = (
- train_ds.cache()
- .shuffle(800)
- .map(train_preprocessing) # 这里可以设置预处理函数
- # .batch(batch_size) # 在image_dataset_from_directory处已经设置了batch_size
- .prefetch(buffer_size=AUTOTUNE)
- )
-
- val_ds = (
- val_ds.cache()
- .map(train_preprocessing) # 这里可以设置预处理函数
- # .batch(batch_size) # 在image_dataset_from_directory处已经设置了batch_size
- .prefetch(buffer_size=AUTOTUNE)
- )
-
- #4. 数据可视化
- plt.figure(figsize=(10, 8)) # 图形的宽为10高为5
- plt.suptitle("数据展示")
-
- class_names = ["COVID-19", "Normal", "Pneumonia", "Tuberculosis"] # 修改类别标签
-
- for images, labels in train_ds.take(1):
- for i in range(15):
- plt.subplot(4, 5, i + 1)
- plt.xticks([])
- plt.yticks([])
- plt.grid(False)
-
- # 显示图片
- plt.imshow(images[i])
- # 显示标签
- plt.xlabel(class_names[labels[i]-1])
-
- plt.show()
-
- ######################################数据增强函数################################
-
- data_augmentation = Sequential([
- RandomFlip("horizontal_and_vertical"),
- RandomRotation(0.2),
- RandomContrast(1.0),
- RandomZoom(0.5,0.2),
- RandomTranslation(0.3,0.5),
- ])
-
- def prepare(ds):
- ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=AUTOTUNE)
- return ds
- train_ds = prepare(train_ds)
-
-
- ###############################导入mobilenet_v2################################
- #获取预训练模型对输入的预处理方法
- from tensorflow.python.keras.applications import mobilenet_v2
- from tensorflow.python.keras import Input, regularizers
- IMG_SIZE = (img_height, img_width, 3)
-
- base_model = mobilenet_v2.MobileNetV2(input_shape=IMG_SIZE,
- include_top=False, #是否包含顶层的全连接层
- weights='imagenet')
-
- inputs = Input(shape=IMG_SIZE)
- #模型
- x = base_model(inputs, training=False) #参数不变化
- #全局池化
- x = GlobalAveragePooling2D()(x)
- #BatchNormalization
- x = BatchNormalization()(x)
- #Dropout
- x = Dropout(0.8)(x)
- #Dense
- x = Dense(128, kernel_regularizer=regularizers.l2(0.1))(x) # 全连接层减少到128,添加 L2 正则化
- #BatchNormalization
- x = BatchNormalization()(x)
- #激活函数
- x = Activation('relu')(x)
- #输出层
- outputs = Dense(4, kernel_regularizer=regularizers.l2(0.1))(x) # 输出层神经元数量修改为4
- #BatchNormalization
- outputs = BatchNormalization()(outputs)
- #激活函数
- outputs = Activation('softmax')(outputs) # 激活函数修改为'softmax'
- #整体封装
- model = Model(inputs, outputs)
- #打印模型结构
- print(model.summary())
- #############################编译模型#########################################
- #定义优化器
- from tensorflow.python.keras.optimizers import adam_v2, rmsprop_v2
- #from tensorflow.python.keras.optimizer_v2.gradient_descent import SGD
- optimizer = adam_v2.Adam()
- #optimizer = SGD(learning_rate=0.001)
- #optimizer = rmsprop_v2.RMSprop()
-
- #常用的优化器
- #all_classes = {
- # 'adadelta': adadelta_v2.Adadelta,
- # 'adagrad': adagrad_v2.Adagrad,
- # 'adam': adam_v2.Adam,
- # 'adamax': adamax_v2.Adamax,
- # 'experimentaladadelta': adadelta_experimental.Adadelta,
- # 'experimentaladagrad': adagrad_experimental.Adagrad,
- # 'experimentaladam': adam_experimental.Adam,
- # 'experimentalsgd': sgd_experimental.SGD,
- # 'nadam': nadam_v2.Nadam,
- # 'rmsprop': rmsprop_v2.RMSprop,
-
- #编译模型
- model.compile(optimizer=optimizer,
- loss='sparse_categorical_crossentropy', # 多分类问题
- metrics=['accuracy'])
-
- #训练模型
- from tensorflow.python.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
-
- NO_EPOCHS = 50
- PATIENCE = 10
- VERBOSE = 1
-
- # 设置动态学习率
- annealer = LearningRateScheduler(lambda x: 1e-5 * 0.99 ** (x+NO_EPOCHS))
-
- # 设置早停
- earlystopper = EarlyStopping(monitor='loss', patience=PATIENCE, verbose=VERBOSE)
-
- #
- checkpointer = ModelCheckpoint('mtb_4_jet_best_model_mobilenetv3samll.h5',
- monitor='val_accuracy',
- verbose=VERBOSE,
- save_best_only=True,
- save_weights_only=True)
-
- train_model = model.fit(train_ds,
- epochs=NO_EPOCHS,
- verbose=1,
- validation_data=val_ds,
- callbacks=[earlystopper, checkpointer, annealer])
-
- #保存模型
- model.save('mtb_4_jet_best_model_mobilenet.h5')
- print("The trained model has been saved.")
-
- from tensorflow.python.keras.models import load_model
- train_model=load_model('mtb_4_jet_best_model_mobilenet.h5')
- ###########################Accuracy和Loss可视化#################################
- import matplotlib.pyplot as plt
-
- loss = train_model.history['loss']
- acc = train_model.history['accuracy']
- val_loss = train_model.history['val_loss']
- val_acc = train_model.history['val_accuracy']
- epoch = range(1, len(loss)+1)
-
- fig, ax = plt.subplots(1, 2, figsize=(10,4))
- ax[0].plot(epoch, loss, label='Train loss')
- ax[0].plot(epoch, val_loss, label='Validation loss')
- ax[0].set_xlabel('Epochs')
- ax[0].set_ylabel('Loss')
- ax[0].legend()
- ax[1].plot(epoch, acc, label='Train acc')
- ax[1].plot(epoch, val_acc, label='Validation acc')
- ax[1].set_xlabel('Epochs')
- ax[1].set_ylabel('Accuracy')
- ax[1].legend()
- #plt.show()
- plt.savefig("loss-acc.pdf", dpi=300,format="pdf")
-
- ####################################混淆矩阵可视化#############################
- import numpy as np
- import matplotlib.pyplot as plt
- from tensorflow.python.keras.models import load_model
- from matplotlib.pyplot import imshow
- from sklearn.metrics import classification_report, confusion_matrix
- import seaborn as sns
- import pandas as pd
- import math
- from sklearn.metrics import precision_recall_fscore_support, accuracy_score
-
- # 定义一个绘制混淆矩阵图的函数
- def plot_cm(labels, predictions, class_names):
- # 生成混淆矩阵
- conf_numpy = confusion_matrix(labels, predictions)
- # 将矩阵转化为 DataFrame
- conf_df = pd.DataFrame(conf_numpy, index=class_names ,columns=class_names)
-
- plt.figure(figsize=(8,7))
-
- sns.heatmap(conf_df, annot=True, fmt="d", cmap="BuPu")
-
- plt.title('Confusion matrix',fontsize=15)
- plt.ylabel('Actual value',fontsize=14)
- plt.xlabel('Predictive value',fontsize=14)
-
- val_pre = []
- val_label = []
- for images, labels in val_ds:
- for image, label in zip(images, labels):
- img_array = tf.expand_dims(image, 0)
- prediction = model.predict(img_array)
- val_pre.append(np.argmax(prediction, axis=-1))
- val_label.append(label.numpy()) # 需要将标签转换为 numpy 数组
-
- class_names = ['COVID-19', 'Normal', 'Pneumonia', 'Tuberculosis'] # 修改为你的类别名称
- plot_cm(val_label, val_pre, class_names)
- plt.savefig("val-cm.pdf", dpi=300,format="pdf")
-
- precision_val, recall_val, f1_val, _ = precision_recall_fscore_support(val_label, val_pre, average='micro')
- acc_val = accuracy_score(val_label, val_pre)
- error_rate_val = 1 - acc_val
-
- print("验证集的灵敏度(召回率)为:",recall_val,
- "验证集的特异度为:",precision_val, # 在多分类问题中,特异度定义不明确,这里我们使用精确度来代替
- "验证集的准确率为:",acc_val,
- "验证集的错误率为:",error_rate_val,
- "验证集的F1为:",f1_val)
-
- train_pre = []
- train_label = []
- for images, labels in train_ds:
- for image, label in zip(images, labels):
- img_array = tf.expand_dims(image, 0)
- prediction = model.predict(img_array)
- train_pre.append(np.argmax(prediction, axis=-1))
- train_label.append(label.numpy())
-
- plot_cm(train_label, train_pre, class_names)
- plt.savefig("train-cm.pdf", dpi=300,format="pdf")
-
- precision_train, recall_train, f1_train, _ = precision_recall_fscore_support(train_label, train_pre, average='micro')
- acc_train = accuracy_score(train_label, train_pre)
- error_rate_train = 1 - acc_train
-
- print("训练集的灵敏度(召回率)为:",recall_train,
- "训练集的特异度为:",precision_train, # 在多分类问题中,特异度定义不明确,这里我们使用精确度来代替
- "训练集的准确率为:",acc_train,
- "训练集的错误率为:",error_rate_train,
- "训练集的F1为:",f1_train)
-
-
- ################################模型性能参数计算################################
- from sklearn import metrics
-
- def test_accuracy_report(model):
- print(metrics.classification_report(val_label, val_pre, target_names=class_names))
- score = model.evaluate(val_ds, verbose=0)
- print('Loss function: %s, accuracy:' % score[0], score[1])
-
- test_accuracy_report(model)
-
- def train_accuracy_report(model):
- print(metrics.classification_report(train_label, train_pre, target_names=class_names))
- score = model.evaluate(train_ds, verbose=0)
- print('Loss function: %s, accuracy:' % score[0], score[1])
-
- train_accuracy_report(model)
-
- ################################AUC曲线绘制####################################
- from sklearn import metrics
- from sklearn.preprocessing import LabelBinarizer
- import numpy as np
- import matplotlib.pyplot as plt
- from tensorflow.python.keras.models import load_model
- from matplotlib.pyplot import imshow
- from sklearn.metrics import classification_report, confusion_matrix
- import seaborn as sns
- import pandas as pd
- import math
-
- def plot_roc(name, labels, predictions, **kwargs):
- fp, tp, _ = metrics.roc_curve(labels, predictions)
-
- plt.plot(fp, tp, label=name, linewidth=2, **kwargs)
- plt.xlabel('False positives rate')
- plt.ylabel('True positives rate')
- ax = plt.gca()
- ax.set_aspect('equal')
-
- # 需要将标签进行one-hot编码
- lb = LabelBinarizer()
- lb.fit([0, 1, 2, 3]) # 训练标签编码器,这里设定有四个类别
- n_classes = 4 # 类别数量
-
- val_pre_auc = []
- val_label_auc = []
-
- for images, labels in val_ds:
- for image, label in zip(images, labels):
- img_array = tf.expand_dims(image, 0)
- prediction_auc = model.predict(img_array)
- val_pre_auc.append(prediction_auc[0])
- val_label_auc.append(lb.transform([label])[0]) # 这里需要使用标签编码器进行编码
-
- val_pre_auc = np.array(val_pre_auc)
- val_label_auc = np.array(val_label_auc)
-
- auc_score_val = [metrics.roc_auc_score(val_label_auc[:, i], val_pre_auc[:, i]) for i in range(n_classes)]
-
-
- train_pre_auc = []
- train_label_auc = []
-
- for images, labels in train_ds:
- for image, label in zip(images, labels):
- img_array_train = tf.expand_dims(image, 0)
- prediction_auc = model.predict(img_array_train)
- train_pre_auc.append(prediction_auc[0])
- train_label_auc.append(lb.transform([label])[0])
-
- train_pre_auc = np.array(train_pre_auc)
- train_label_auc = np.array(train_label_auc)
-
- auc_score_train = [metrics.roc_auc_score(train_label_auc[:, i], train_pre_auc[:, i]) for i in range(n_classes)]
-
- for i in range(n_classes):
- plot_roc('validation AUC for class {0}: {1:.4f}'.format(i, auc_score_val[i]), val_label_auc[:, i] , val_pre_auc[:, i], color="red", linestyle='--')
- plot_roc('training AUC for class {0}: {1:.4f}'.format(i, auc_score_train[i]), train_label_auc[:, i], train_pre_auc[:, i], color="blue", linestyle='--')
-
- plt.legend(loc='lower right')
- plt.savefig("roc.pdf", dpi=300,format="pdf")
-
- for i in range(n_classes):
- print("Class {0} 训练集的AUC值为:".format(i), auc_score_train[i], "验证集的AUC值为:", auc_score_val[i])
-
-
- ################################AUC曲线绘制-分开展示####################################
- from sklearn import metrics
- from sklearn.preprocessing import LabelBinarizer
- import numpy as np
- import matplotlib.pyplot as plt
- from tensorflow.python.keras.models import load_model
- from matplotlib.pyplot import imshow
- from sklearn.metrics import classification_report, confusion_matrix
- import seaborn as sns
- import pandas as pd
- import math
-
- def plot_roc(ax, name, labels, predictions, **kwargs):
- fp, tp, _ = metrics.roc_curve(labels, predictions)
- ax.plot(fp, tp, label=name, linewidth=2, **kwargs)
- ax.plot([0, 1], [0, 1], color='orange', linestyle='--')
- ax.set_xlabel('False positives rate')
- ax.set_ylabel('True positives rate')
- ax.set_aspect('equal')
-
- # 需要将标签进行one-hot编码
- lb = LabelBinarizer()
- lb.fit([0, 1, 2, 3]) # 训练标签编码器,这里设定有四个类别
- n_classes = 4 # 类别数量
-
- val_pre_auc = []
- val_label_auc = []
-
- for images, labels in val_ds:
- for image, label in zip(images, labels):
- img_array = tf.expand_dims(image, 0)
- prediction_auc = model.predict(img_array)
- val_pre_auc.append(prediction_auc[0])
- val_label_auc.append(lb.transform([label])[0]) # 这里需要使用标签编码器进行编码
-
- val_pre_auc = np.array(val_pre_auc)
- val_label_auc = np.array(val_label_auc)
-
- auc_score_val = [metrics.roc_auc_score(val_label_auc[:, i], val_pre_auc[:, i]) for i in range(n_classes)]
-
-
- train_pre_auc = []
- train_label_auc = []
-
- for images, labels in train_ds:
- for image, label in zip(images, labels):
- img_array_train = tf.expand_dims(image, 0)
- prediction_auc = model.predict(img_array_train)
- train_pre_auc.append(prediction_auc[0])
- train_label_auc.append(lb.transform([label])[0])
-
- train_pre_auc = np.array(train_pre_auc)
- train_label_auc = np.array(train_label_auc)
-
- auc_score_train = [metrics.roc_auc_score(train_label_auc[:, i], train_pre_auc[:, i]) for i in range(n_classes)]
-
- fig, axs = plt.subplots(n_classes, figsize=(5, 20))
-
- for i in range(n_classes):
- plot_roc(axs[i], 'validation AUC for class {0}: {1:.4f}'.format(i, auc_score_val[i]), val_label_auc[:, i] , val_pre_auc[:, i], color="red", linestyle='--')
- plot_roc(axs[i], 'training AUC for class {0}: {1:.4f}'.format(i, auc_score_train[i]), train_label_auc[:, i], train_pre_auc[:, i], color="blue", linestyle='--')
- axs[i].legend(loc='lower right')
-
- plt.tight_layout()
- plt.savefig("roc.pdf", dpi=300,format="pdf")
-
- for i in range(n_classes):
- print("Class {0} 训练集的AUC值为:".format(i), auc_score_train[i], "验证集的AUC值为:", auc_score_val[i])
(b)调教GPT-4的过程
(b1)咒语:请根据{代码1},改写和续写《代码2》。代码1:{也就是之前用tensorflow写的误判病例分析部分};代码2:《也就是修改之前的Mobilenet模型建模代码》
然后根据具体情况调整即可,当然是在GPT的帮助下。
三、输出结果
(1)学习曲线
(2)混淆矩阵
(3)性能参数
(4)ROC曲线
(4.1)和在一起的:
(4.2)分开的:
四、数据
链接:https://pan.baidu.com/s/1rqu15KAUxjNBaWYfEmPwgQ?pwd=xfyn
提取码:xfyn
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。