当前位置:   article > 正文

autokeras--自动机器学习模型训练_机器学习 自主训练模型

机器学习 自主训练模型

模型

  • 分类模型反应的是在不同类别上的概率
  • 回归模型反应的推测可能值
  • 准备数据集
  • 数据集预处理
  • 模型训练
  • 模型导出
  • 模型加载部署
  • 模型预测

封装数据集与模型训练


from tensorflow.keras.datasets import mnist
import abc
import os
import numpy as np
import tensorflow as tf
from sklearn.datasets import load_files
from tensorflow.keras.models import load_model
import autokeras as ak
import requests
import cv2


class ABCDatasets(metaclass=abc.ABCMeta):
    """数据集抽象类,以下方法必须全部重新复写"""

    @property
    @abc.abstractmethod
    def load_data(self):
        """加载数据"""
        pass

    @property
    @abc.abstractmethod
    def train_data(self):
        """训练数据"""
        pass

    @property
    @abc.abstractmethod
    def test_data(self):
        """测试数据"""
        pass

    @property
    @abc.abstractmethod
    def label_mapping(self):
        """标签映射关系"""
        return {}


class ABCModel(metaclass=abc.ABCMeta):
    """模型抽象类,以下方法必须全部重新复写"""

    @abc.abstractmethod
    def train(self):
        """训练方法"""
        pass

    @abc.abstractmethod
    def export_model(self, filename):
        """导出模型"""
        pass

    @abc.abstractmethod
    def load_model(self, filename):
        """加载模型"""
        pass

    @abc.abstractmethod
    def predict(self, image: list):
        """模型预测"""
        pass


class Model(ABCModel):
    modeler: ak.AutoModel

    def __init__(self, datasets: ABCDatasets = None):
        self.datasets = datasets
        self.x_train, self.y_train = self.datasets.train_data
        self.x_test, self.y_test = self.datasets.test_data
        self.label_mapping = self.datasets.label_mapping

    def train(self):
        self.modeler.fit(self.x_train, self.y_train, epochs=1)

    def export_model(self, filename):
        self.modeler.export_model().save(filename)

    def load_model(self, filename):
        self.modeler = load_model(filename, custom_objects=ak.CUSTOM_OBJECTS)

    def predict(self, images: np.array):
        return self.modeler.predict(np.array(images))

    def post(self, predict_result):
        return


class MnistDataSets(ABCDatasets):
    """
    配置数据集,以及标签
    """

    def __init__(self):
        self.x_train = self.y_train = self.x_test = self.y_test = None

    def load_data(self):
        """加载官方的手写数据集"""
        (self.x_train, self.y_train), (self.x_test, self.y_test) = mnist.load_data()
        # print(self.x_train.shape)
        # print(self.y_train.shape)
        # print(self.x_train[0].shape)
        # (60000, 28, 28)
        # (60000,)
        # (28, 28)
        # 这里输入可知,数据集包含了60000张图片,且素材是一个单通道28x28

    @property
    def label_mapping(self):
        """标签映射关系"""
        return {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 0: 0}

    @property
    def train_data(self):
        """训练数据集"""
        return self.x_train, self.y_train

    @property
    def test_data(self):
        """测试数据集"""
        return self.x_test, self.y_test

    def get_online_test_data(self):
        """
        在线获取一张手写体图片,并做前处理
        :return:
        """
        label = 3
        url = "https://img1.baidu.com/it/u=3472197447,93830654&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=281"
        image = requests.get(url).content
        nparr = np.fromstring(image, np.uint8)
        gray = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
        gray = cv2.resize(gray, (28, 28))
        _, gray = cv2.threshold(gray, thresh=165, maxval=255, type=cv2.THRESH_BINARY)
        return gray, label


class IMDBDataSets(ABCDatasets):
    """
    配置数据集,以及标签
    """

    def __init__(self):
        self.x_train = self.y_train = self.x_test = self.y_test = None

    def load_data(self):
        """加载数据"""
        dataset = tf.keras.utils.get_file(
            fname="aclImdb.tar.gz",
            origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
            extract=True,
        )
        IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")

        self.classes = ["pos", "neg"]
        train_data = load_files(
            os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=self.classes
        )
        test_data = load_files(
            os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=self.classes
        )
        self.x_train = np.array(train_data.data)
        self.y_train = np.array(train_data.target)
        self.x_test = np.array(test_data.data)
        self.y_test = np.array(test_data.target)
        print(self.x_train[0])
        print(self.y_train[0])
        print(self.x_train.shape)
        print(self.y_train.shape)
        print(self.x_train[0].shape)
        # 第一个print输出是一个文本
        # 1
        # (25000,)
        # (25000,)
        # ()

    @property
    def label_mapping(self):
        """标签映射关系"""
        return {0: self.classes[0], 1: self.classes[1]}

    @property
    def train_data(self):
        """训练数据集"""
        return self.x_train, self.y_train

    @property
    def test_data(self):
        """测试数据集"""
        return self.x_test, self.y_test

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193

模型训练与测试

from tools import Model, MnistDataSets  # 这里的包由上面的封装导入
import autokeras as ak


class ImageClassifier(Model):
    """图像分类"""
    modeler = ak.ImageClassifier(overwrite=True, max_trials=1)

    def post(self, predict_result):
        """后处理"""
        label_predict = []
        prob_predict = predict_result
        for img_predict in prob_predict:
            idx = img_predict.argmax()
            label_predict.append(self.label_mapping.get(idx))
        return label_predict


class ImageRegressor(Model):
    """
    图像回归
    """
    modeler = ak.ImageRegressor(overwrite=True, max_trials=1)

    def post(self, predict_result):
        label_predict = []
        for img_predict in predict_result:
            label_predict.append(self.label_mapping.get(int(img_predict)))
        return label_predict


def train_image_classifier():
    """训练数据"""
    data = MnistDataSets()
    data.load_data()

    model = ImageClassifier(datasets=data)
    model.train()
    model.export_model("mnist_image_classifier.h5")


def test_image_classifier():
    """使用在线数据进行测试"""
    data = MnistDataSets()
    image, label = data.get_online_test_data()
    model = ImageClassifier(datasets=data)
    model.load_model("mnist_image_classifier.h5")
    predict_result = model.predict(images=[image])
    post_result = model.post(predict_result)[0]
    print("predict_result", predict_result)
    print(label, post_result, label == post_result)


def train_image_regressor():
    """训练数据"""
    data = MnistDataSets()
    data.load_data()

    model = ImageRegressor(datasets=data)
    model.train()
    model.export_model("mnist_image_regressor.h5")


def test_image_regressor():
    """使用在线数据进行测试"""
    data = MnistDataSets()
    image, label = data.get_online_test_data()
    model = ImageRegressor(datasets=data)
    model.load_model("mnist_image_regressor.h5")
    predict_result = model.predict(images=[image])
    post_result = model.post(predict_result)[0]
    print("predict_result", predict_result)
    print(label, post_result, label == post_result)


if __name__ == '__main__':
    import fire

    fire.Fire()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80

执行结果

  • 训练
[~]# python3 model.py train_image_classifier
Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
image_block_1/b...|vanilla           |?                 
image_block_1/n...|True              |?                 
image_block_1/a...|False             |?                 
image_block_1/c...|3                 |?                 
image_block_1/c...|1                 |?                 
image_block_1/c...|2                 |?                 
image_block_1/c...|True              |?                 
image_block_1/c...|False             |?                 
image_block_1/c...|0.25              |?                 
image_block_1/c...|32                |?                 
image_block_1/c...|64                |?                 
classification_...|flatten           |?                 
classification_...|0.5               |?                 
optimizer         |adam              |?                 
learning_rate     |0.001             |?                 

1500/1500 [==============================] - 76s 50ms/step - loss: 0.1742 - accuracy: 0.9471 - val_loss: 0.0739 - val_accuracy: 0.9791
...

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 测试
[~]# python3 model.py test_image_classifier
predict_result [[3.1589475e-04 3.8880799e-02 5.0686980e-03 9.2180651e-01 9.0317568e-03
  2.1918179e-02 9.9024124e-05 3.8853439e-05 2.5504678e-03 2.8968096e-04]]
3 3 True

  • 1
  • 2
  • 3
  • 4
  • 5
本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号