当前位置:   article > 正文

第61步 深度学习图像识别:多分类建模(TensorFlow)_tensorflow 图片识别

tensorflow 图片识别

基于WIN10的64位系统演示

一、写在前面

截至上期,我们一直都在做二分类的任务,无论是之前的机器学习任务,还是最近更新的图像分类任务。然而,在实际工作中,我们大概率需要进行多分类任务。例如肺部胸片可不仅仅能诊断肺结核,还有COVID-19、细菌性(病毒性)肺炎等等,这就涉及到图像识别的多分类任务。

本期以健康组、肺结核组、COVID-19组、细菌性(病毒性)肺炎组为数据集,构建Mobilenet多分类模型,原因还是因为它建模速度快。

同样,基于GPT-4辅助编程,改写过程见后面。

二、误判病例分析实战

使用胸片的数据集:肺结核病人和健康人的胸片的识别。其中,健康人900张,肺结核病人700张,COVID-19病人549张、细菌性(病毒性)肺炎组900张,分别存入单独的文件夹中。

(a)直接分享代码

  1. ######################################导入包###################################
  2. from tensorflow import keras
  3. import tensorflow as tf
  4. from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout, Activation, Reshape, Softmax, GlobalAveragePooling2D, BatchNormalization
  5. from tensorflow.python.keras.layers.convolutional import Convolution2D, MaxPooling2D
  6. from tensorflow.python.keras import Sequential
  7. from tensorflow.python.keras import Model
  8. from tensorflow.python.keras.optimizers import adam_v2
  9. import numpy as np
  10. import matplotlib.pyplot as plt
  11. from tensorflow.python.keras.preprocessing.image import ImageDataGenerator, image_dataset_from_directory
  12. from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomFlip, RandomRotation, RandomContrast, RandomZoom, RandomTranslation
  13. import os,PIL,pathlib
  14. import warnings
  15. #设置GPU
  16. gpus = tf.config.list_physical_devices("GPU")
  17. if gpus:
  18. gpu0 = gpus[0] #如果有多个GPU,仅使用第0个GPU
  19. tf.config.experimental.set_memory_growth(gpu0, True) #设置GPU显存用量按需使用
  20. tf.config.set_visible_devices([gpu0],"GPU")
  21. warnings.filterwarnings("ignore") #忽略警告信息
  22. plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
  23. plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
  24. ################################导入数据集#####################################
  25. #1.导入数据
  26. #1.导入数据
  27. data_dir = "./MTB-1" # 修改了路径
  28. data_dir = pathlib.Path(data_dir)
  29. image_count = len(list(data_dir.glob('*/*')))
  30. print("图片总数为:",image_count)
  31. batch_size = 32
  32. img_height = 100
  33. img_width = 100
  34. train_ds = image_dataset_from_directory(
  35. data_dir,
  36. validation_split=0.2,
  37. subset="training",
  38. seed=12,
  39. image_size=(img_height, img_width),
  40. batch_size=batch_size)
  41. val_ds = image_dataset_from_directory(
  42. data_dir,
  43. validation_split=0.2,
  44. subset="validation",
  45. seed=12,
  46. image_size=(img_height, img_width),
  47. batch_size=batch_size)
  48. class_names = train_ds.class_names
  49. print(class_names)
  50. print(train_ds)
  51. #2.检查数据
  52. for image_batch, labels_batch in train_ds:
  53. print(image_batch.shape)
  54. print(labels_batch.shape)
  55. break
  56. #3.配置数据
  57. AUTOTUNE = tf.data.AUTOTUNE
  58. def train_preprocessing(image,label):
  59. return (image/255.0,label)
  60. train_ds = (
  61. train_ds.cache()
  62. .shuffle(800)
  63. .map(train_preprocessing) # 这里可以设置预处理函数
  64. # .batch(batch_size) # 在image_dataset_from_directory处已经设置了batch_size
  65. .prefetch(buffer_size=AUTOTUNE)
  66. )
  67. val_ds = (
  68. val_ds.cache()
  69. .map(train_preprocessing) # 这里可以设置预处理函数
  70. # .batch(batch_size) # 在image_dataset_from_directory处已经设置了batch_size
  71. .prefetch(buffer_size=AUTOTUNE)
  72. )
  73. #4. 数据可视化
  74. plt.figure(figsize=(10, 8)) # 图形的宽为10高为5
  75. plt.suptitle("数据展示")
  76. class_names = ["COVID-19", "Normal", "Pneumonia", "Tuberculosis"] # 修改类别标签
  77. for images, labels in train_ds.take(1):
  78. for i in range(15):
  79. plt.subplot(4, 5, i + 1)
  80. plt.xticks([])
  81. plt.yticks([])
  82. plt.grid(False)
  83. # 显示图片
  84. plt.imshow(images[i])
  85. # 显示标签
  86. plt.xlabel(class_names[labels[i]-1])
  87. plt.show()
  88. ######################################数据增强函数################################
  89. data_augmentation = Sequential([
  90. RandomFlip("horizontal_and_vertical"),
  91. RandomRotation(0.2),
  92. RandomContrast(1.0),
  93. RandomZoom(0.5,0.2),
  94. RandomTranslation(0.3,0.5),
  95. ])
  96. def prepare(ds):
  97. ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=AUTOTUNE)
  98. return ds
  99. train_ds = prepare(train_ds)
  100. ###############################导入mobilenet_v2################################
  101. #获取预训练模型对输入的预处理方法
  102. from tensorflow.python.keras.applications import mobilenet_v2
  103. from tensorflow.python.keras import Input, regularizers
  104. IMG_SIZE = (img_height, img_width, 3)
  105. base_model = mobilenet_v2.MobileNetV2(input_shape=IMG_SIZE,
  106. include_top=False, #是否包含顶层的全连接层
  107. weights='imagenet')
  108. inputs = Input(shape=IMG_SIZE)
  109. #模型
  110. x = base_model(inputs, training=False) #参数不变化
  111. #全局池化
  112. x = GlobalAveragePooling2D()(x)
  113. #BatchNormalization
  114. x = BatchNormalization()(x)
  115. #Dropout
  116. x = Dropout(0.8)(x)
  117. #Dense
  118. x = Dense(128, kernel_regularizer=regularizers.l2(0.1))(x) # 全连接层减少到128,添加 L2 正则化
  119. #BatchNormalization
  120. x = BatchNormalization()(x)
  121. #激活函数
  122. x = Activation('relu')(x)
  123. #输出层
  124. outputs = Dense(4, kernel_regularizer=regularizers.l2(0.1))(x) # 输出层神经元数量修改为4
  125. #BatchNormalization
  126. outputs = BatchNormalization()(outputs)
  127. #激活函数
  128. outputs = Activation('softmax')(outputs) # 激活函数修改为'softmax'
  129. #整体封装
  130. model = Model(inputs, outputs)
  131. #打印模型结构
  132. print(model.summary())
  133. #############################编译模型#########################################
  134. #定义优化器
  135. from tensorflow.python.keras.optimizers import adam_v2, rmsprop_v2
  136. #from tensorflow.python.keras.optimizer_v2.gradient_descent import SGD
  137. optimizer = adam_v2.Adam()
  138. #optimizer = SGD(learning_rate=0.001)
  139. #optimizer = rmsprop_v2.RMSprop()
  140. #常用的优化器
  141. #all_classes = {
  142. # 'adadelta': adadelta_v2.Adadelta,
  143. # 'adagrad': adagrad_v2.Adagrad,
  144. # 'adam': adam_v2.Adam,
  145. # 'adamax': adamax_v2.Adamax,
  146. # 'experimentaladadelta': adadelta_experimental.Adadelta,
  147. # 'experimentaladagrad': adagrad_experimental.Adagrad,
  148. # 'experimentaladam': adam_experimental.Adam,
  149. # 'experimentalsgd': sgd_experimental.SGD,
  150. # 'nadam': nadam_v2.Nadam,
  151. # 'rmsprop': rmsprop_v2.RMSprop,
  152. #编译模型
  153. model.compile(optimizer=optimizer,
  154. loss='sparse_categorical_crossentropy', # 多分类问题
  155. metrics=['accuracy'])
  156. #训练模型
  157. from tensorflow.python.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
  158. NO_EPOCHS = 50
  159. PATIENCE = 10
  160. VERBOSE = 1
  161. # 设置动态学习率
  162. annealer = LearningRateScheduler(lambda x: 1e-5 * 0.99 ** (x+NO_EPOCHS))
  163. # 设置早停
  164. earlystopper = EarlyStopping(monitor='loss', patience=PATIENCE, verbose=VERBOSE)
  165. #
  166. checkpointer = ModelCheckpoint('mtb_4_jet_best_model_mobilenetv3samll.h5',
  167. monitor='val_accuracy',
  168. verbose=VERBOSE,
  169. save_best_only=True,
  170. save_weights_only=True)
  171. train_model = model.fit(train_ds,
  172. epochs=NO_EPOCHS,
  173. verbose=1,
  174. validation_data=val_ds,
  175. callbacks=[earlystopper, checkpointer, annealer])
  176. #保存模型
  177. model.save('mtb_4_jet_best_model_mobilenet.h5')
  178. print("The trained model has been saved.")
  179. from tensorflow.python.keras.models import load_model
  180. train_model=load_model('mtb_4_jet_best_model_mobilenet.h5')
  181. ###########################Accuracy和Loss可视化#################################
  182. import matplotlib.pyplot as plt
  183. loss = train_model.history['loss']
  184. acc = train_model.history['accuracy']
  185. val_loss = train_model.history['val_loss']
  186. val_acc = train_model.history['val_accuracy']
  187. epoch = range(1, len(loss)+1)
  188. fig, ax = plt.subplots(1, 2, figsize=(10,4))
  189. ax[0].plot(epoch, loss, label='Train loss')
  190. ax[0].plot(epoch, val_loss, label='Validation loss')
  191. ax[0].set_xlabel('Epochs')
  192. ax[0].set_ylabel('Loss')
  193. ax[0].legend()
  194. ax[1].plot(epoch, acc, label='Train acc')
  195. ax[1].plot(epoch, val_acc, label='Validation acc')
  196. ax[1].set_xlabel('Epochs')
  197. ax[1].set_ylabel('Accuracy')
  198. ax[1].legend()
  199. #plt.show()
  200. plt.savefig("loss-acc.pdf", dpi=300,format="pdf")
  201. ####################################混淆矩阵可视化#############################
  202. import numpy as np
  203. import matplotlib.pyplot as plt
  204. from tensorflow.python.keras.models import load_model
  205. from matplotlib.pyplot import imshow
  206. from sklearn.metrics import classification_report, confusion_matrix
  207. import seaborn as sns
  208. import pandas as pd
  209. import math
  210. from sklearn.metrics import precision_recall_fscore_support, accuracy_score
  211. # 定义一个绘制混淆矩阵图的函数
  212. def plot_cm(labels, predictions, class_names):
  213. # 生成混淆矩阵
  214. conf_numpy = confusion_matrix(labels, predictions)
  215. # 将矩阵转化为 DataFrame
  216. conf_df = pd.DataFrame(conf_numpy, index=class_names ,columns=class_names)
  217. plt.figure(figsize=(8,7))
  218. sns.heatmap(conf_df, annot=True, fmt="d", cmap="BuPu")
  219. plt.title('Confusion matrix',fontsize=15)
  220. plt.ylabel('Actual value',fontsize=14)
  221. plt.xlabel('Predictive value',fontsize=14)
  222. val_pre = []
  223. val_label = []
  224. for images, labels in val_ds:
  225. for image, label in zip(images, labels):
  226. img_array = tf.expand_dims(image, 0)
  227. prediction = model.predict(img_array)
  228. val_pre.append(np.argmax(prediction, axis=-1))
  229. val_label.append(label.numpy()) # 需要将标签转换为 numpy 数组
  230. class_names = ['COVID-19', 'Normal', 'Pneumonia', 'Tuberculosis'] # 修改为你的类别名称
  231. plot_cm(val_label, val_pre, class_names)
  232. plt.savefig("val-cm.pdf", dpi=300,format="pdf")
  233. precision_val, recall_val, f1_val, _ = precision_recall_fscore_support(val_label, val_pre, average='micro')
  234. acc_val = accuracy_score(val_label, val_pre)
  235. error_rate_val = 1 - acc_val
  236. print("验证集的灵敏度(召回率)为:",recall_val,
  237. "验证集的特异度为:",precision_val, # 在多分类问题中,特异度定义不明确,这里我们使用精确度来代替
  238. "验证集的准确率为:",acc_val,
  239. "验证集的错误率为:",error_rate_val,
  240. "验证集的F1为:",f1_val)
  241. train_pre = []
  242. train_label = []
  243. for images, labels in train_ds:
  244. for image, label in zip(images, labels):
  245. img_array = tf.expand_dims(image, 0)
  246. prediction = model.predict(img_array)
  247. train_pre.append(np.argmax(prediction, axis=-1))
  248. train_label.append(label.numpy())
  249. plot_cm(train_label, train_pre, class_names)
  250. plt.savefig("train-cm.pdf", dpi=300,format="pdf")
  251. precision_train, recall_train, f1_train, _ = precision_recall_fscore_support(train_label, train_pre, average='micro')
  252. acc_train = accuracy_score(train_label, train_pre)
  253. error_rate_train = 1 - acc_train
  254. print("训练集的灵敏度(召回率)为:",recall_train,
  255. "训练集的特异度为:",precision_train, # 在多分类问题中,特异度定义不明确,这里我们使用精确度来代替
  256. "训练集的准确率为:",acc_train,
  257. "训练集的错误率为:",error_rate_train,
  258. "训练集的F1为:",f1_train)
  259. ################################模型性能参数计算################################
  260. from sklearn import metrics
  261. def test_accuracy_report(model):
  262. print(metrics.classification_report(val_label, val_pre, target_names=class_names))
  263. score = model.evaluate(val_ds, verbose=0)
  264. print('Loss function: %s, accuracy:' % score[0], score[1])
  265. test_accuracy_report(model)
  266. def train_accuracy_report(model):
  267. print(metrics.classification_report(train_label, train_pre, target_names=class_names))
  268. score = model.evaluate(train_ds, verbose=0)
  269. print('Loss function: %s, accuracy:' % score[0], score[1])
  270. train_accuracy_report(model)
  271. ################################AUC曲线绘制####################################
  272. from sklearn import metrics
  273. from sklearn.preprocessing import LabelBinarizer
  274. import numpy as np
  275. import matplotlib.pyplot as plt
  276. from tensorflow.python.keras.models import load_model
  277. from matplotlib.pyplot import imshow
  278. from sklearn.metrics import classification_report, confusion_matrix
  279. import seaborn as sns
  280. import pandas as pd
  281. import math
  282. def plot_roc(name, labels, predictions, **kwargs):
  283. fp, tp, _ = metrics.roc_curve(labels, predictions)
  284. plt.plot(fp, tp, label=name, linewidth=2, **kwargs)
  285. plt.xlabel('False positives rate')
  286. plt.ylabel('True positives rate')
  287. ax = plt.gca()
  288. ax.set_aspect('equal')
  289. # 需要将标签进行one-hot编码
  290. lb = LabelBinarizer()
  291. lb.fit([0, 1, 2, 3]) # 训练标签编码器,这里设定有四个类别
  292. n_classes = 4 # 类别数量
  293. val_pre_auc = []
  294. val_label_auc = []
  295. for images, labels in val_ds:
  296. for image, label in zip(images, labels):
  297. img_array = tf.expand_dims(image, 0)
  298. prediction_auc = model.predict(img_array)
  299. val_pre_auc.append(prediction_auc[0])
  300. val_label_auc.append(lb.transform([label])[0]) # 这里需要使用标签编码器进行编码
  301. val_pre_auc = np.array(val_pre_auc)
  302. val_label_auc = np.array(val_label_auc)
  303. auc_score_val = [metrics.roc_auc_score(val_label_auc[:, i], val_pre_auc[:, i]) for i in range(n_classes)]
  304. train_pre_auc = []
  305. train_label_auc = []
  306. for images, labels in train_ds:
  307. for image, label in zip(images, labels):
  308. img_array_train = tf.expand_dims(image, 0)
  309. prediction_auc = model.predict(img_array_train)
  310. train_pre_auc.append(prediction_auc[0])
  311. train_label_auc.append(lb.transform([label])[0])
  312. train_pre_auc = np.array(train_pre_auc)
  313. train_label_auc = np.array(train_label_auc)
  314. auc_score_train = [metrics.roc_auc_score(train_label_auc[:, i], train_pre_auc[:, i]) for i in range(n_classes)]
  315. for i in range(n_classes):
  316. plot_roc('validation AUC for class {0}: {1:.4f}'.format(i, auc_score_val[i]), val_label_auc[:, i] , val_pre_auc[:, i], color="red", linestyle='--')
  317. plot_roc('training AUC for class {0}: {1:.4f}'.format(i, auc_score_train[i]), train_label_auc[:, i], train_pre_auc[:, i], color="blue", linestyle='--')
  318. plt.legend(loc='lower right')
  319. plt.savefig("roc.pdf", dpi=300,format="pdf")
  320. for i in range(n_classes):
  321. print("Class {0} 训练集的AUC值为:".format(i), auc_score_train[i], "验证集的AUC值为:", auc_score_val[i])
  322. ################################AUC曲线绘制-分开展示####################################
  323. from sklearn import metrics
  324. from sklearn.preprocessing import LabelBinarizer
  325. import numpy as np
  326. import matplotlib.pyplot as plt
  327. from tensorflow.python.keras.models import load_model
  328. from matplotlib.pyplot import imshow
  329. from sklearn.metrics import classification_report, confusion_matrix
  330. import seaborn as sns
  331. import pandas as pd
  332. import math
  333. def plot_roc(ax, name, labels, predictions, **kwargs):
  334. fp, tp, _ = metrics.roc_curve(labels, predictions)
  335. ax.plot(fp, tp, label=name, linewidth=2, **kwargs)
  336. ax.plot([0, 1], [0, 1], color='orange', linestyle='--')
  337. ax.set_xlabel('False positives rate')
  338. ax.set_ylabel('True positives rate')
  339. ax.set_aspect('equal')
  340. # 需要将标签进行one-hot编码
  341. lb = LabelBinarizer()
  342. lb.fit([0, 1, 2, 3]) # 训练标签编码器,这里设定有四个类别
  343. n_classes = 4 # 类别数量
  344. val_pre_auc = []
  345. val_label_auc = []
  346. for images, labels in val_ds:
  347. for image, label in zip(images, labels):
  348. img_array = tf.expand_dims(image, 0)
  349. prediction_auc = model.predict(img_array)
  350. val_pre_auc.append(prediction_auc[0])
  351. val_label_auc.append(lb.transform([label])[0]) # 这里需要使用标签编码器进行编码
  352. val_pre_auc = np.array(val_pre_auc)
  353. val_label_auc = np.array(val_label_auc)
  354. auc_score_val = [metrics.roc_auc_score(val_label_auc[:, i], val_pre_auc[:, i]) for i in range(n_classes)]
  355. train_pre_auc = []
  356. train_label_auc = []
  357. for images, labels in train_ds:
  358. for image, label in zip(images, labels):
  359. img_array_train = tf.expand_dims(image, 0)
  360. prediction_auc = model.predict(img_array_train)
  361. train_pre_auc.append(prediction_auc[0])
  362. train_label_auc.append(lb.transform([label])[0])
  363. train_pre_auc = np.array(train_pre_auc)
  364. train_label_auc = np.array(train_label_auc)
  365. auc_score_train = [metrics.roc_auc_score(train_label_auc[:, i], train_pre_auc[:, i]) for i in range(n_classes)]
  366. fig, axs = plt.subplots(n_classes, figsize=(5, 20))
  367. for i in range(n_classes):
  368. plot_roc(axs[i], 'validation AUC for class {0}: {1:.4f}'.format(i, auc_score_val[i]), val_label_auc[:, i] , val_pre_auc[:, i], color="red", linestyle='--')
  369. plot_roc(axs[i], 'training AUC for class {0}: {1:.4f}'.format(i, auc_score_train[i]), train_label_auc[:, i], train_pre_auc[:, i], color="blue", linestyle='--')
  370. axs[i].legend(loc='lower right')
  371. plt.tight_layout()
  372. plt.savefig("roc.pdf", dpi=300,format="pdf")
  373. for i in range(n_classes):
  374. print("Class {0} 训练集的AUC值为:".format(i), auc_score_train[i], "验证集的AUC值为:", auc_score_val[i])

(b)调教GPT-4的过程

(b1)咒语:请根据{代码1},改写和续写《代码2》。代码1:{也就是之前用tensorflow写的误判病例分析部分};代码2:《也就是修改之前的Mobilenet模型建模代码》

然后根据具体情况调整即可,当然是在GPT的帮助下。

三、输出结果

(1)学习曲线

(2)混淆矩阵

(3)性能参数

(4)ROC曲线

(4.1)和在一起的:

 (4.2)分开的:

 

四、数据

链接:https://pan.baidu.com/s/1rqu15KAUxjNBaWYfEmPwgQ?pwd=xfyn

提取码:xfyn

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/花生_TL007/article/detail/92708
推荐阅读
相关标签
  

闽ICP备14008679号