赞
踩
这里是引用
一对多提供了一种利用二元分类的方法。鉴于一个分类问题会有 N 个可行的解决方案:
一对多解决方案包括 N 个单独的二元分类器,每个可能的结果对应一个二元分类器。
在训练期间,模型会训练一系列二元分类器,使每个分类器都能回答单独的分类问题。
以一张狗狗的照片为例,可能需要训练五个不同的识别器,其中四个将图片看作负样本(不是狗狗),一个将图片看作正样本(是狗狗)。即:
当类别总数较少时,这种方法比较合理,但随着类别数量的增加,其效率会变得越来越低下。
我们可以借助深度神经网络(在该网络中,每个输出节点表示一个不同的类别)创建明显更加高效的一对多模型。下图展示了这种方法:
我们已经知道,逻辑回归可生成介于 0 和 1.0 之间的小数。例如,某电子邮件分类器的逻辑回归输出值为 0.8,表明电子邮件是垃圾邮件的概率为 80%,不是垃圾邮件的概率为 20%。很明显,一封电子邮件是垃圾邮件或非垃圾邮件的概率之和为 1.0。
Softmax 将这一想法延伸到多类别领域。也就是说,在多类别问题中,Softmax 会为每个类别分配一个用小数表示的概率。这些用小数表示的概率相加之和必须是 1.0。与其他方式相比,这种附加限制有助于让训练过程更快速地收敛。
例如,回到我们在图 1 中看到的图片分析示例,Softmax 可能会得出图片属于某一特定类别的以下概率:
类别 | 概率 |
---|---|
苹果 | 0.001 |
熊 | 0.04 |
糖果 | 0.008 |
狗狗 | 0.95 |
鸡蛋 | 0.001 |
centered |
Softmax 层是紧挨着输出层之前的神经网络层。Softmax 层必须和输出层拥有一样的节点数。
from __future__ import print_function import glob import math import os from IPython import display from matplotlib import cm from matplotlib import gridspec from matplotlib import pyplot as plt import numpy as np import pandas as pd import seaborn as sns from sklearn import metrics import tensorflow as tf from tensorflow.python.data import Dataset tf.logging.set_verbosity(tf.logging.ERROR) pd.options.display.max_rows = 10 pd.options.display.float_format = '{:.1f}'.format mnist_dataframe = pd.read_csv( "https://download.mlcc.google.cn/mledu-datasets/mnist_train_small.csv", sep=",", header=None) # Use just the first 10,000 records for training/validation. mnist_dataframe = mnist_dataframe.head(10000) #原始数据有60000行 mnist_dataframe = mnist_dataframe.reindex(np.random.permutation(mnist_dataframe.index)) mnist_dataframe.describe() def parse_labels_and_features(dataset): """Extracts labels and features. This is a good place to scale or transform the features if needed. Args: dataset: A Pandas `Dataframe`, containing the label on the first column and monochrome pixel values on the remaining columns, in row major order. Returns: A `tuple` `(labels, features)`: labels: A Pandas `Series`. features: A Pandas `DataFrame`. """ labels = dataset[0] #第一列的数据即为target # DataFrame.loc index ranges are inclusive at both ends. features = dataset.loc[:,1:784] #提取表格除了第一列外的所有数据 (即去除第一列) # Scale the data to [0, 1] by dividing out the max value, 255. features = features / 255 return labels, features training_targets, training_examples = parse_labels_and_features(mnist_dataframe[:7500]) #training_examples.describe() validation_targets, validation_examples = parse_labels_and_features(mnist_dataframe[7500:10000]) #validation_examples.describe() rand_example = np.random.choice(training_examples.index)#在样本行数的范围内,均匀的随机生成一个随机数 plt.figure(figsize=(12,12),dpi=200) for i in range(1,10): ax = plt.subplot(3,3,i) ax.matshow(training_examples.iloc[i].values.reshape(28, 28),) #选择一行loc的数据,并且数值化,变成28x28的矩阵 ax.set_title("Label: %i" % training_targets.iloc[i]) ax.grid(False) def construct_feature_columns(): """Construct the TensorFlow Feature Columns. Returns: A set of feature columns """ # There are 784 pixels in each image. return set([tf.feature_column.numeric_column('pixels', shape=784)]) #构建数值化特征列???? def create_training_input_fn(features, labels, batch_size, num_epochs=None, shuffle=True): """A custom input_fn for sending MNIST data to the estimator for training. Args: features: The training features. labels: The training labels. batch_size: Batch size to use during training. Returns: A function that returns batches of training features and labels during training. """ def _input_fn(num_epochs=None, shuffle=True): # Input pipelines are reset with each call to .train(). To ensure model # gets a good sampling of data, even when number of steps is small, we # shuffle all the data before creating the Dataset object idx = np.random.permutation(features.index) #随机打乱行序号 raw_features = {"pixels":features.reindex(idx)}#创建一个字典 raw_targets = np.array(labels[idx]) ds = Dataset.from_tensor_slices((raw_features,raw_targets)) # warning: 2GB limit 取数据集 ds = ds.batch(batch_size).repeat(num_epochs)#分割数据集,分割成无数个大小为batchsize大小的份数 if shuffle: ds = ds.shuffle(10000) #随机取一万个数据 # Return the next batch of data. feature_batch, label_batch = ds.make_one_shot_iterator().get_next() #该迭代模型会返回分组打包好的特征与标签 return feature_batch, label_batch return _input_fn def create_predict_input_fn(features, labels, batch_size): """A custom input_fn for sending mnist data to the estimator for predictions. Args: features: The features to base predictions on. labels: The labels of the prediction examples. Returns: A function that returns features and labels for predictions. """ def _input_fn(): raw_features = {"pixels": features.values} raw_targets = np.array(labels) ds = Dataset.from_tensor_slices((raw_features, raw_targets)) # warning: 2GB limit ds = ds.batch(batch_size) # Return the next batch of data. feature_batch, label_batch = ds.make_one_shot_iterator().get_next() return feature_batch, label_batch return _input_fn def train_linear_classification_model( learning_rate, steps, batch_size, training_examples, training_targets, validation_examples, validation_targets): """Trains a linear classification model for the MNIST digits dataset. In addition to training, this function also prints training progress information, a plot of the training and validation loss over time, and a confusion matrix. Args: learning_rate: An `int`, the learning rate to use. steps: A non-zero `int`, the total number of training steps. A training step consists of a forward and backward pass using a single batch. batch_size: A non-zero `int`, the batch size. training_examples: A `DataFrame` containing the training features. training_targets: A `DataFrame` containing the training labels. validation_examples: A `DataFrame` containing the validation features. validation_targets: A `DataFrame` containing the validation labels. Returns: The trained `LinearClassifier` object. """ periods = 10 steps_per_period = steps / periods # Create the input functions. predict_training_input_fn = create_predict_input_fn( #用于预测的训练集 training_examples, training_targets, batch_size) predict_validation_input_fn = create_predict_input_fn( #用于验证的验证集 validation_examples, validation_targets, batch_size) training_input_fn = create_training_input_fn( #用于训练的训练集 training_examples, training_targets, batch_size) # Create a LinearClassifier object. my_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0) classifier = tf.estimator.LinearClassifier( feature_columns=construct_feature_columns(), n_classes=10,#确定分类的种数 optimizer=my_optimizer, config=tf.estimator.RunConfig(keep_checkpoint_max=1) ) # Train the model, but do so inside a loop so that we can periodically assess # loss metrics. print("Training model...") print("LogLoss error (on validation data):") training_errors = [] validation_errors = [] for period in range (0, periods): # Train the model, starting from the prior state. classifier.train( input_fn=training_input_fn, steps=steps_per_period ) # Take a break and compute probabilities. training_predictions = list(classifier.predict(input_fn=predict_training_input_fn)) training_probabilities = np.array([item['probabilities'] for item in training_predictions]) training_pred_class_id = np.array([item['class_ids'][0] for item in training_predictions]) training_pred_one_hot = tf.keras.utils.to_categorical(training_pred_class_id,10) validation_predictions = list(classifier.predict(input_fn=predict_validation_input_fn)) validation_probabilities = np.array([item['probabilities'] for item in validation_predictions]) validation_pred_class_id = np.array([item['class_ids'][0] for item in validation_predictions]) validation_pred_one_hot = tf.keras.utils.to_categorical(validation_pred_class_id,10) # Compute training and validation errors. training_log_loss = metrics.log_loss(training_targets, training_pred_one_hot) validation_log_loss = metrics.log_loss(validation_targets, validation_pred_one_hot) # Occasionally print the current loss. print(" period %02d : %0.2f" % (period, validation_log_loss)) # Add the loss metrics from this period to our list. training_errors.append(training_log_loss) validation_errors.append(validation_log_loss) print("Model training finished.") # Remove event files to save disk space. _ = map(os.remove, glob.glob(os.path.join(classifier.model_dir, 'events.out.tfevents*'))) # Calculate final predictions (not probabilities, as above). final_predictions = classifier.predict(input_fn=predict_validation_input_fn) final_predictions = np.array([item['class_ids'][0] for item in final_predictions]) accuracy = metrics.accuracy_score(validation_targets, final_predictions) print("Final accuracy (on validation data): %0.2f" % accuracy) # Output a graph of loss metrics over periods. plt.ylabel("LogLoss") plt.xlabel("Periods") plt.title("LogLoss vs. Periods") plt.plot(training_errors, label="training") plt.plot(validation_errors, label="validation") plt.legend() plt.show() # Output a plot of the confusion matrix. cm = metrics.confusion_matrix(validation_targets, final_predictions) # Normalize the confusion matrix by row (i.e by the number of samples # in each class). cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] ax = sns.heatmap(cm_normalized, cmap="bone_r") ax.set_aspect(1) plt.title("Confusion matrix") plt.ylabel("True label") plt.xlabel("Predicted label") plt.show() return classifier classifier = train_linear_classification_model( learning_rate=0.02, steps=100, batch_size=10, training_examples=training_examples, training_targets=training_targets, validation_examples=validation_examples, validation_targets=validation_targets)
Training model...
LogLoss error (on validation data):
period 00 : 16.21
period 01 : 12.09
period 02 : 8.18
period 03 : 8.46
period 04 : 8.04
period 05 : 6.22
period 06 : 6.96
period 07 : 5.73
period 08 : 5.77
period 09 : 5.51
Model training finished.
Final accuracy (on validation data): 0.84
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。