赞
踩
tensorflow提供了一个优化工具tensorflow_model_optimization ,专门针对keras进行模型优化
主要可以进行剪枝、量化和权重聚类
这里主要使用前面两个
数据集使用以前的文章:mnn模型从训练-转换-预测
具体训练代码如下
注意:使用之前需要手动安装tensorflow_model_optimization,使用pip install tensorflow_model_optimization就行
- import tempfile
- import os
- import tensorflow as tf
- import numpy as np
- from tensorflow import keras
- from tensorflow.keras import layers
- from tensorflow.keras.models import Sequential
- import tensorflow_model_optimization as tfmot
-
-
-
- batch_size = 2
- img_height = 180
- img_width = 180
- num_classes = 5
- epochs = 50
- validation_split=0.2
- data_dir='flower_photos'
-
- #数据集准备
- train_ds = tf.keras.preprocessing.image_dataset_from_directory(
- data_dir,
- validation_split=validation_split,
- subset="training",
- seed=123,
- image_size=(img_height, img_width),
- batch_size=batch_size)
-
- val_ds = tf.keras.preprocessing.image_dataset_from_directory(
- data_dir,
- validation_split=validation_split,
- subset="validation",
- seed=123,
- image_size=(img_height, img_width),
- batch_size=batch_size)
-
- AUTOTUNE = tf.data.experimental.AUTOTUNE
- train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
- val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
-
-
- model = keras.Sequential([
- keras.layers.InputLayer(input_shape=(img_height, img_height,3)),
- keras.layers.Reshape(target_shape=(img_height, img_height, 3)),
- layers.Conv2D(16, 3, padding='same', activation='relu'),
- layers.MaxPooling2D(),
- layers.Conv2D(32, 3, padding='same', activation='relu'),
- layers.MaxPooling2D(),
- layers.Conv2D(64, 3, padding='same', activation='relu'),
- layers.MaxPooling2D(),
- layers.Dropout(0.2),
- layers.Flatten(),
- layers.Dense(128, activation='relu'),
- layers.Dense(num_classes)
- ])
-
- model.compile(optimizer='adam',
- loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
- metrics=['accuracy'])
-
- print(model.summary())
- model.fit(
- train_ds,
- validation_data=val_ds,
- epochs=epochs
- )
-
- tf.keras.models.save_model(model, 'baseline_model.h5', include_optimizer=False)
- converter = tf.lite.TFLiteConverter.from_keras_model(model)
- tflite_model = converter.convert()
- open("baseline_model.tflite", "wb").write(tflite_model)
-
-
- #开始剪枝
- print("start pruning")
- prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
- num_images =3670
- end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
- pruning_params = {
- 'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
- final_sparsity=0.80,
- begin_step=0,
- end_step=end_step)
- }
-
- model_for_pruning = prune_low_magnitude(model, **pruning_params)
- model_for_pruning.compile(optimizer='adam',
- loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
- metrics=['accuracy'])
- model_for_pruning.summary()
- logdir = tempfile.mkdtemp()
- callbacks = [
- tfmot.sparsity.keras.UpdatePruningStep(),
- tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
- ]
-
- model_for_pruning.fit(train_ds,
- batch_size=batch_size, epochs=5, validation_data=val_ds,
- callbacks=callbacks)
- model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
- #开始量化
- print("start quantize")
- quantize_model = tfmot.quantization.keras.quantize_model
- q_aware_model = quantize_model(model_for_export)
- q_aware_model.compile(optimizer='adam',
- loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
- metrics=['accuracy'])
-
- q_aware_model.summary()
- q_aware_model.fit(train_ds,
- batch_size=batch_size, epochs=5, validation_data=val_ds)
- converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
- converter.optimizations = [tf.lite.Optimize.DEFAULT]
- quantized_and_pruned_tflite_model = converter.convert()
- quantized_and_pruned_tflite_file ='pruned_and_quantized.tflite'
- with open(quantized_and_pruned_tflite_file, 'wb') as f:
- f.write(quantized_and_pruned_tflite_model)
-
-
-
-
-
-
运行结束后,我们看一下模型文件:
可以看到文件确实压缩了不少了,4倍左右
接下来试一下,推理速度
优化过的模型文件推理如下
- import tensorflow as tf
- import cv2
- import numpy as np
- import time
-
- start=time.time()
- image = cv2.imread('397.jpg')
- image=cv2.resize(image,(180,180))
- image=image[np.newaxis,:,:,:].astype(np.float32)
- print(image.shape)
- interpreter = tf.lite.Interpreter(model_path='pruned_and_quantized.tflite')
- interpreter.allocate_tensors()
- input_details = interpreter.get_input_details()
- output_details = interpreter.get_output_details()
- for _ in range(10):
- interpreter.set_tensor(input_details[0]['index'],image)
- interpreter.invoke()
- output_data = interpreter.get_tensor(output_details[0]['index'])
- print(output_data)
- print('avg infer time is %.6f s'%((time.time()-start)/10.0))
运行结果:
原始模型推理:
- import tensorflow as tf
- import cv2
- import numpy as np
- import time
-
- start=time.time()
- image = cv2.imread('397.jpg')
- image=cv2.resize(image,(180,180))
- image=image[np.newaxis,:,:,:].astype(np.float32)
- print(image.shape)
- interpreter = tf.lite.Interpreter(model_path='baseline_model.tflite')
- interpreter.allocate_tensors()
- input_details = interpreter.get_input_details()
- output_details = interpreter.get_output_details()
- for _ in range(10):
- interpreter.set_tensor(input_details[0]['index'],image)
- interpreter.invoke()
- output_data = interpreter.get_tensor(output_details[0]['index'])
- print(output_data)
- print('avg infer time is %.6f s'%((time.time()-start)/10.0))
运行结果:
出乎意料的是,模型文件虽然变小了,但是速度居然还慢了,还慢了几十倍
这里是使用的ubuntu,也试过了win10上面推理差距更大,慢更多倍
反正就是一句话,优化后居然慢了。。。。
只有是找不到tflite的benchmark工具,只能使用这样的方式进行测试,也许时间不靠谱
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。