赞
踩
目录
本例提取了猫狗大战数据集中的部分数据做数据集,演示tensorflow2.0以上的版本如何使用Keras实现图像分类,分类的模型使用ResNet50。
import numpy as np
from tensorflow.keras.optimizers import Adam
import cv2
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications.resnet import ResNet50
import os
from tensorflow.keras.models import load_model
这里可以看出tensorflow2.0以上的版本集成了Keras,我们在使用的时候就不必单独安装Keras了,以前的代码升级到tensorflow2.0以上的版本将keras前面加上tensorflow即可。tensorflow说完了,再说明一下几个重要的全局参数
norm_size = 100 设置输入图像的大小,图像的大小根据自己的需求设置,别太大,够用就行了。
datapath = 'data/train' 设置图片存放的路径,在这里要说明一下如果图片很多,一定不要放在工程目录下,否则Pycharm加载工程的时候会浏览所有的图片,很慢很慢。
EPOCHS = 100 epochs的数量,关于epoch的设置多少合适,这个问题很纠结,一般情况设置300足够了,如果感觉没有训练好,再载入模型训练。
INIT_LR = 1e-3 学习率,一般情况从0.001开始逐渐降低,也别太小了到1e-6就可以了。
classnum = 2 类别数量,数据集有两个类别,所有就分为两类。
batch_size = 16 batchsize,根据硬件的情况和数据集的大小设置,太小了抖的厉害,太大了收敛不好,根据经验来,一般设置为2的次方。
处理图像的步骤:
具体做法详见代码:
labelList = []
dicClass = {'cat': 0, 'dog': 1}
def loadImageData():
imageList = []
listImage = os.listdir(datapath)
for img in listImage:
labelName = dicClass[img.split('.')[0]]
print(labelName)
labelList.append(labelName)
dataImgPath = os.path.join(datapath, img)
print(dataImgPath)
image = cv2.imdecode(np.fromfile(dataImgPath, dtype=np.uint8), -1)
image = cv2.resize(image, (norm_size, norm_size), interpolation=cv2.INTER_LANCZOS4)
image = img_to_array(image)
imageList.append(image)
imageList = np.array(imageList, dtype="int") / 255.0
return imageList
print("开始加载数据")
imageArr = loadImageData()
labelList = np.array(labelList)
print("加载数据完成")
做好数据之后,我们需要切分训练集和测试集,一般按照4:1的比例来切分。切分数据集使用train_test_split()方法,需要导入from sklearn.model_selection import train_test_split 包。例:
trainX, valX, trainY, valY = train_test_split(imageArr, labelList, test_size=0.2, random_state=42)
ImageDataGenerator()是keras.preprocessing.image模块中的图片生成器,同时也可以在batch中对数据进行增强,扩充数据集大小,增强模型的泛化能力。比如进行旋转,变形,归一化等等。
- keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,samplewise_center
- =False, featurewise_std_normalization=False, samplewise_std_normalization=False,zca_whitening=False,
- zca_epsilon=1e-06, rotation_range=0.0, width_shift_range=0.0, height_shift_range=0.0,brightness_range=None, shear_range=0.0, zoom_range=0.0,channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None,data_format=None,validation_split=0.0)
参数:
本例使用的图像增强代码如下:
train_datagen = ImageDataGenerator(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
val_datagen = ImageDataGenerator() # 验证集不做图片增强
train_generator = train_datagen.flow(trainX, trainY, batch_size=batch_size, shuffle=True)
val_generator = val_datagen.flow(valX, valY, batch_size=batch_size, shuffle=True)
ModelCheckpoint用来保存成绩最好的模型。
语法如下:
keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)
该回调函数将在每个epoch后保存模型到filepath
filepath可以是格式化的字符串,里面的占位符将会被epoch值和传入on_epoch_end的logs关键字所填入
例如,filepath若为weights.{epoch:02d-{val_loss:.2f}}.hdf5,则会生成对应epoch和验证集loss的多个文件。
ReduceLROnPlateau当评价指标不在提升时,减少学习率,语法如下:
keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
当学习停滞时,减少2倍或10倍的学习率常常能获得较好的效果。该回调函数检测指标的情况,如果在patience个epoch中看不到模型性能提升,则减少学习率
本例代码如下:
checkpointer = ModelCheckpoint(filepath='weights_best_Reset50_model.hdf5',
monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
reduce = ReduceLROnPlateau(monitor='val_accuracy', patience=10,
verbose=1,
factor=0.5,
min_lr=1e-6)
model = ResNet50(weights=None, classes=classnum)
optimizer = Adam(lr=INIT_LR)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model=load_model("my_model_resnet.h5")
history = model.fit_generator(train_generator,
steps_per_epoch=trainX.shape[0] / batch_size,
validation_data=val_generator,
epochs=EPOCHS,
validation_steps=valX.shape[0] / batch_size,
callbacks=[checkpointer, reduce],
verbose=1, shuffle=True)
model.save('my_model_resnet.h5')
loss_trend_graph_path = r"WW_loss.jpg"
acc_trend_graph_path = r"WW_acc.jpg"
import matplotlib.pyplot as plt
print("Now,we start drawing the loss and acc trends graph...")
# summarize history for accuracy
fig = plt.figure(1)
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.savefig(acc_trend_graph_path)
plt.close(1)
# summarize history for loss
fig = plt.figure(2)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.savefig(loss_trend_graph_path)
plt.close(2)
- import numpy as np
- from tensorflow.keras.optimizers import Adam
- import cv2
- from tensorflow.keras.preprocessing.image import img_to_array
- from sklearn.model_selection import train_test_split
- from tensorflow.python.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
- from tensorflow.keras.applications.resnet import ResNet50
- import os
- from tensorflow.keras.models import load_model
-
- norm_size = 100
- datapath = 'data/train'
- EPOCHS = 100
- INIT_LR = 1e-3
- labelList = []
- dicClass = {'cat': 0, 'dog': 1}
- classnum = 2
- batch_size = 16
- def loadImageData():
- imageList = []
- listImage = os.listdir(datapath)
- for img in listImage:
- labelName = dicClass[img.split('.')[0]]
- print(labelName)
- labelList.append(labelName)
- dataImgPath = os.path.join(datapath, img)
- print(dataImgPath)
- image = cv2.imdecode(np.fromfile(dataImgPath, dtype=np.uint8), -1)
- image = cv2.resize(image, (norm_size, norm_size), interpolation=cv2.INTER_LANCZOS4)
- image = img_to_array(image)
- imageList.append(image)
- imageList = np.array(imageList, dtype="int") / 255.0
- return imageList
-
-
- print("开始加载数据")
- imageArr = loadImageData()
- labelList = np.array(labelList)
- print("加载数据完成")
- print(labelList)
- trainX, valX, trainY, valY = train_test_split(imageArr, labelList, test_size=0.2, random_state=42)
- from tensorflow.keras.preprocessing.image import ImageDataGenerator
-
- train_datagen = ImageDataGenerator(featurewise_center=True,
- featurewise_std_normalization=True,
- rotation_range=20,
- width_shift_range=0.2,
- height_shift_range=0.2,
- horizontal_flip=True)
- val_datagen = ImageDataGenerator() # 验证集不做图片增强
-
- train_generator = train_datagen.flow(trainX, trainY, batch_size=batch_size, shuffle=True)
- val_generator = val_datagen.flow(valX, valY, batch_size=batch_size, shuffle=True)
- checkpointer = ModelCheckpoint(filepath='weights_best_Reset50_model.hdf5',
- monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
-
- reduce = ReduceLROnPlateau(monitor='val_accuracy', patience=10,
- verbose=1,
- factor=0.5,
- min_lr=1e-6)
-
- model = ResNet50(weights=None, classes=classnum)
- optimizer = Adam(lr=INIT_LR)
- model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
- model=load_model("my_model_resnet.h5")
- history = model.fit_generator(train_generator,
- steps_per_epoch=trainX.shape[0] / batch_size,
- validation_data=val_generator,
- epochs=EPOCHS,
- validation_steps=valX.shape[0] / batch_size,
- callbacks=[checkpointer, reduce],
- verbose=1, shuffle=True)
- model.save('my_model_resnet.h5')
- print(history)
-
- loss_trend_graph_path = r"WW_loss.jpg"
- acc_trend_graph_path = r"WW_acc.jpg"
- import matplotlib.pyplot as plt
-
- print("Now,we start drawing the loss and acc trends graph...")
- # summarize history for accuracy
- fig = plt.figure(1)
- plt.plot(history.history["accuracy"])
- plt.plot(history.history["val_accuracy"])
- plt.title("Model accuracy")
- plt.ylabel("accuracy")
- plt.xlabel("epoch")
- plt.legend(["train", "test"], loc="upper left")
- plt.savefig(acc_trend_graph_path)
- plt.close(1)
- # summarize history for loss
- fig = plt.figure(2)
- plt.plot(history.history["loss"])
- plt.plot(history.history["val_loss"])
- plt.title("Model loss")
- plt.ylabel("loss")
- plt.xlabel("epoch")
- plt.legend(["train", "test"], loc="upper left")
- plt.savefig(loss_trend_graph_path)
- plt.close(2)
- print("We are done, everything seems OK...")
- # #windows系统设置10关机
- os.system("shutdown -s -t 10")
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。