当前位置:   article > 正文

深度学习与神经网络之分类问题_深度学习分类处理

深度学习分类处理

1.1 逻辑回归

线性回归

线性回归就是将自变量和因变量之间的关系,用线性模型表示,也就是说自变量和因变量之间存在线性关系,使我们能够根据已知的样本数据,对未来或者未知的数据进行估计。


 实际问题中我们会遇到很多数据之间并非是线性关系,因此我们引入新的概念广义线性回归。


 分类问题:垃圾邮件识别,图片分类,疾病判断

        分类器:能够自动对输入的数据进行分类

        输入:特征,输出:离散值


实现分类器:

1.准备训练样本

2.训练分类器

3.对新样本进行分类 


交叉熵损失函数


1.2 实现一元逻辑回归-Tensorflow

sigmoid()函数

  1. import tensorflow as tf
  2. import numpy as np
  3. # sigmoid
  4. x = np.array([1., 2., 3., 4.])
  5. w = tf.Variable(1.)
  6. b = tf.Variable(1.)
  7. y = 1 / (1 + tf.exp(- (w * x + b)))

交叉熵损失函数

  1. import tensorflow as tf
  2. import numpy as np
  3. # 交叉熵损失函数
  4. y = np.array([0, 0, 1, 1])
  5. pred = np.array([0.1, 0.2, 0.8, 0.49])
  6. Loss = -tf.reduce_sum(y * tf.math.log(pred) + (1 - y) * tf.math.log(1 - pred))
  7. # 平均交叉熵损失函数
  8. avgLoss = -tf.reduce_mean(y * tf.math.log(pred) + (1 - y) * tf.math.log(1 - pred))

准确率 

  1. import tensorflow as tf
  2. import numpy as np
  3. y = np.array([0, 0 ,1 ,1])
  4. pred = np.array([0.1, 0.2, 0.8, 0.49])
  5. tf.round(pred)
  6. tf.equal(tf.round(pred), y)
  7. accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(pred), y), tf.float32))

 1.3 房屋销售记录demo

1.准备数据


 2.加载数据 

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. x = np.array([137.97, 104.50, 100.00, 126.32, 79.20, 99.00, 124.00, 114.00,
  5. 106.69, 140.05, 53.75, 46.91, 68.00, 63.02, 81.26, 86.21])
  6. y = np.array([1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0])
  7. plt.scatter(x, y)
  8. plt.show()


3.数据处理,由于sigmoid函数是以零点为中心的,因此对数据进行中心化处理 

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 加载数据
  5. x = np.array([137.97, 104.50, 100.00, 126.32, 79.20, 99.00, 124.00, 114.00,
  6. 106.69, 140.05, 53.75, 46.91, 68.00, 63.02, 81.26, 86.21])
  7. y = np.array([1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0])
  8. # 数据处理
  9. x_train = x - np.mean(x)
  10. y_train = y
  11. plt.scatter(x_train, y_train)
  12. plt.show()

 数据相对位置没变,但是被整体平移。


4.设置超参数和模型变量初始值 

  1. # 设置超参数
  2. learn_rate = 0.005
  3. iter = 5
  4. display_step = 1
  5. # 设置模型变量初始值
  6. np.random.seed(612)
  7. w = tf.Variable(np.random.randn())
  8. b = tf.Variable(np.random.randn())

5.训练模型

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 加载数据
  5. x = np.array([137.97, 104.50, 100.00, 126.32, 79.20, 99.00, 124.00, 114.00,
  6. 106.69, 140.05, 53.75, 46.91, 68.00, 63.02, 81.26, 86.21])
  7. y = np.array([1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0])
  8. # 数据处理
  9. x_train = x - np.mean(x)
  10. y_train = y
  11. # 设置超参数
  12. learn_rate = 0.005
  13. iter = 5
  14. display_step = 1
  15. # 设置模型变量初始值
  16. np.random.seed(612)
  17. w = tf.Variable(np.random.randn())
  18. b = tf.Variable(np.random.randn())
  19. # 训练模型
  20. cross_train = [] # 存放训练集的交叉熵损失
  21. acc_train = [] # 用来存放训练集的分类准确率
  22. for i in range(0, iter + 1):
  23. with tf.GradientTape() as type:
  24. pred_train = 1 / (1 + tf.exp(- (w * x_train + b)))
  25. Loss_train = - tf.reduce_mean(y_train * tf.math.log(pred_train) + (1 - y_train) * tf.math.log(1 - pred_train))
  26. Accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(pred_train < 0.5, 0, 1), y_train), tf.float32))
  27. cross_train.append(Loss_train)
  28. acc_train.append(Accuracy_train)
  29. dL_dw, dL_db = type.gradient(Loss_train, [w,b])
  30. w.assign_sub(learn_rate * dL_dw)
  31. b.assign_sub(learn_rate * dL_db)
  32. if i % display_step == 0:
  33. print("i: %i, Train Loss: %f, Accuracy: %f" %(i, Loss_train, Accuracy_train))


6.使用初始权值的sigmoid函数

  1. # 使用初始权值的sigmoid函数
  2. np.random.seed(612)
  3. w = tf.Variable(np.random.randn())
  4. b = tf.Variable(np.random.randn())
  5. x_ = range(-80, 80)
  6. y_ = 1 / (1+ tf.exp(- (w * x_ +b)))
  7. plt.plot(x_, y_)
  8. plt.show()


7.绘制所有的sigmoid函数曲线

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 加载数据
  5. x = np.array([137.97, 104.50, 100.00, 126.32, 79.20, 99.00, 124.00, 114.00,
  6. 106.69, 140.05, 53.75, 46.91, 68.00, 63.02, 81.26, 86.21])
  7. y = np.array([1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0])
  8. # 数据处理
  9. x_train = x - np.mean(x)
  10. y_train = y
  11. # 设置超参数
  12. learn_rate = 0.005
  13. iter = 5
  14. display_step = 1
  15. # 设置模型变量初始值
  16. np.random.seed(612)
  17. w = tf.Variable(np.random.randn())
  18. b = tf.Variable(np.random.randn())
  19. x_ = range(-80, 80)
  20. y_ = 1 / (1+ tf.exp(- (w * x_ +b)))
  21. # 训练模型
  22. plt.scatter(x_train, y_train)
  23. plt.plot(x_, y_, color ="red", linewidth=3)
  24. cross_train = [] # 存放训练集的交叉熵损失
  25. acc_train = [] # 用来存放训练集的分类准确率
  26. for i in range(0, iter + 1):
  27. with tf.GradientTape() as type:
  28. pred_train = 1 / (1 + tf.exp(- (w * x_train + b)))
  29. Loss_train = - tf.reduce_mean(y_train * tf.math.log(pred_train) + (1 - y_train) * tf.math.log(1 - pred_train))
  30. Accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(pred_train < 0.5, 0, 1), y_train), tf.float32))
  31. cross_train.append(Loss_train)
  32. acc_train.append(Accuracy_train)
  33. dL_dw, dL_db = type.gradient(Loss_train, [w,b])
  34. w.assign_sub(learn_rate * dL_dw)
  35. b.assign_sub(learn_rate * dL_db)
  36. if i % display_step == 0:
  37. print("i: %i, Train Loss: %f, Accuracy: %f" %(i, Loss_train, Accuracy_train))
  38. y_ = 1 / (1 + tf.exp(- (w * x_ + b)))
  39. plt.plot(x_, y_)
  40. plt.show()


8.对训练好的模型进行商品房分类和结果可视化

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 加载数据
  5. x = np.array([137.97, 104.50, 100.00, 126.32, 79.20, 99.00, 124.00, 114.00,
  6. 106.69, 140.05, 53.75, 46.91, 68.00, 63.02, 81.26, 86.21])
  7. y = np.array([1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0])
  8. # 数据处理
  9. x_train = x - np.mean(x)
  10. y_train = y
  11. # 设置超参数
  12. learn_rate = 0.005
  13. iter = 5
  14. display_step = 1
  15. # 设置模型变量初始值
  16. np.random.seed(612)
  17. w = tf.Variable(np.random.randn())
  18. b = tf.Variable(np.random.randn())
  19. x_ = range(-80, 80)
  20. y_ = 1 / (1+ tf.exp(- (w * x_ +b)))
  21. # 训练模型
  22. cross_train = [] # 存放训练集的交叉熵损失
  23. acc_train = [] # 用来存放训练集的分类准确率
  24. for i in range(0, iter + 1):
  25. with tf.GradientTape() as type:
  26. pred_train = 1 / (1 + tf.exp(- (w * x_train + b)))
  27. Loss_train = - tf.reduce_mean(y_train * tf.math.log(pred_train) + (1 - y_train) * tf.math.log(1 - pred_train))
  28. Accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(pred_train < 0.5, 0, 1), y_train), tf.float32))
  29. cross_train.append(Loss_train)
  30. acc_train.append(Accuracy_train)
  31. dL_dw, dL_db = type.gradient(Loss_train, [w,b])
  32. w.assign_sub(learn_rate * dL_dw)
  33. b.assign_sub(learn_rate * dL_db)
  34. if i % display_step == 0:
  35. print("i: %i, Train Loss: %f, Accuracy: %f" %(i, Loss_train, Accuracy_train))
  36. x_test = [128.15, 45.00, 141.43, 106.27, 99.00, 53.84, 85.36, 70.00, 162.00, 114.60]
  37. pred_test = 1 / (1 + tf.exp(- (w * (x_test - np.mean(x)) + b)))
  38. y_test = tf.where(pred_test < 0.5, 0, 1)
  39. for i in range(len(x_test)):
  40. print(x_test[i], "\t\t", pred_test[i].numpy(), "\t\t", y_test[i].numpy(), "\t\t")
  41. plt.scatter(x_test, y_test)
  42. x_ = np.array(range(-80, 80))
  43. y_ = 1 / (1 + tf.exp(- (w * x_ +b)))
  44. plt.plot(x_ + np.mean(x), y_)
  45. plt.show()

1.4 线性分类器 (决策边界)

 一维空间:将两个数据集分开的点

二维空间:将两个数据集分开的线

二维空间:将两个数据集分开的面


非线性可分:无法通过一条直线划分


逻辑运算 

1.5 多元逻辑回归

 鸢尾花数据集特征:

  • 150个样本
  • 4个属性
    • 花萼长度
    • 花萼宽度
    • 花瓣长度
    • 花瓣宽度
  • 一个标签
    • 山鸢尾
    • 变色鸢尾
    • 维吉尼亚鸢尾

 通过花萼长度和花萼宽度逻辑回归实现山鸢尾和变色鸢尾的分类

  1. import tensorflow.keras as keras
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import matplotlib as mpl
  6. import tensorflow as tf
  7. TRAIN_URL = "https://download.tensorflow.org/data/iris_training.csv"
  8. train_path = keras.utils.get_file(TRAIN_URL.split("/")[-1], TRAIN_URL)
  9. df_iris = pd.read_csv(train_path, header=0)
  10. # 处理数据
  11. """"
  12. 1.转化为NumPy数组
  13. 2.提取属性和标签
  14. 3.提取山鸢尾和变色鸢尾
  15. """
  16. iris = np.array(df_iris)
  17. train_x = iris[:, 0:2]
  18. train_y = iris[:, 4]
  19. x_train = train_x[train_y < 2]
  20. y_train = train_y[train_y < 2]
  21. num = len(x_train)
  22. # 属性中心化
  23. x_train = x_train - np.mean(x_train, axis=0)
  24. # 生成多元模型的属性矩阵和标签列向量
  25. x0_train = np.ones(num).reshape(-1, 1)
  26. X = tf.cast(tf.concat((x0_train, x_train), axis=1), tf.float32)
  27. Y = tf.cast(y_train.reshape(-1, 1), tf.float32)
  28. # 设置超参数
  29. learn_rate = 0.2
  30. iter = 120
  31. display_step = 30
  32. # 设置模型参数初始值
  33. np.random.seed(612)
  34. W = tf.Variable(np.random.randn(3, 1), dtype=tf.float32)
  35. # 训练模型
  36. cm_pt = mpl.colors.ListedColormap(["blue", "red"])
  37. plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_pt)
  38. x_ = [-1.5, 1.5]
  39. y_ = -(W[1] * x_ + W[0]) / W[2]
  40. plt.plot(x_, y_, color="red", linewidth=3)
  41. plt.xlim([-1.5, 1.5])
  42. plt.ylim([-1.5, 1.5])
  43. ce = []
  44. acc = []
  45. for i in range(0, iter + 1):
  46. with tf.GradientTape() as tape:
  47. PRED = 1 / (1 + tf.exp(- tf.matmul(X, W)))
  48. Loss = -tf.reduce_mean(Y * tf.math.log(PRED) + (1 - Y) * tf.math.log(1 - PRED))
  49. accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED.numpy() < 0.5, 0., 1.), Y), tf.float32))
  50. ce.append(Loss)
  51. acc.append(accuracy)
  52. dL_dW = tape.gradient(Loss, W)
  53. W.assign_sub(learn_rate * dL_dW)
  54. if i % display_step == 0:
  55. print("i: %i, Acc: %f, Loss: %f" % (i, accuracy, Loss))
  56. y_ = - (W[0] + W[1] * x_) / W[2]
  57. plt.plot(x_, y_)
  58. plt.show()

使用测试集来评价模型的性能

  1. import tensorflow.keras as keras
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import matplotlib as mpl
  6. import tensorflow as tf
  7. TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
  8. train_path = keras.utils.get_file(TRAIN_URL.split("/")[-1], TRAIN_URL)
  9. TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
  10. test_path = keras.utils.get_file(TEST_URL.split("/")[-1], TEST_URL)
  11. df_iris_train = pd.read_csv(train_path, header=0)
  12. df_iris_test = pd.read_csv(test_path, header=0)
  13. # 处理数据
  14. """"
  15. 1.转化为NumPy数组
  16. 2.提取属性和标签
  17. 3.提取山鸢尾和变色鸢尾
  18. """
  19. iris_train = np.array(df_iris_train)
  20. iris_test = np.array(df_iris_test)
  21. train_x = iris_train[:, 0:2]
  22. train_y = iris_train[:, 4]
  23. test_x = iris_test[:, 0:2]
  24. test_y = iris_test[:, 4]
  25. x_train = train_x[train_y < 2]
  26. y_train = train_y[train_y < 2]
  27. x_test = test_x[test_y < 2]
  28. y_test = test_y[test_y < 2]
  29. num_train = len(x_train)
  30. num_test = len(x_test)
  31. # 属性中心化
  32. x_train = x_train - np.mean(x_train, axis=0)
  33. x_test = x_test - np.mean(x_test, axis=0)
  34. # 生成多元模型的属性矩阵和标签列向量
  35. x0_train = np.ones(num_train).reshape(-1, 1)
  36. X_train = tf.cast(tf.concat((x0_train, x_train), axis=1), tf.float32)
  37. Y_train = tf.cast(y_train.reshape(-1, 1), dtype=tf.float32)
  38. x0_test = np.ones(num_test).reshape(-1, 1)
  39. X_test = tf.cast(tf.concat((x0_test, x_test), axis=1), tf.float32)
  40. Y_test = tf.cast(y_test.reshape(-1, 1), dtype=tf.float32)
  41. # 设置超参数
  42. learn_rate = 0.2
  43. iter = 120
  44. display_step = 30
  45. # 设置模型参数初始值
  46. np.random.seed(612)
  47. W = tf.Variable(np.random.randn(3, 1), dtype=tf.float32)
  48. # 训练模型
  49. ce_train = []
  50. acc_train = []
  51. ce_test = []
  52. acc_test = []
  53. for i in range(0, iter + 1):
  54. with tf.GradientTape() as tape:
  55. PRED_train = 1 / (1 + tf.exp(- tf.matmul(X_train, W)))
  56. Loss_train = -tf.reduce_mean(Y_train * tf.math.log(PRED_train) + (1 - Y_train) * tf.math.log(1 - PRED_train))
  57. PRED_test = 1 / (1 + tf.exp(- tf.matmul(X_test, W)))
  58. Loss_test = -tf.reduce_mean(Y_test * tf.math.log(PRED_test) + (1 - Y_test) * tf.math.log(1 - PRED_test))
  59. accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_train.numpy() < 0.5, 0., 1.), Y_train), tf.float32))
  60. accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_test.numpy() < 0.5, 0., 1.), Y_test), tf.float32))
  61. ce_train.append(Loss_train)
  62. acc_train.append(accuracy_train)
  63. ce_test.append(Loss_test)
  64. acc_test.append(accuracy_test)
  65. dL_dW = tape.gradient(Loss_train, W)
  66. W.assign_sub(learn_rate * dL_dW)
  67. if i % display_step == 0:
  68. print("i: %i, Acc_train: %f, Loss_train: %f, Acc_test: %f, Loss_test: %f" % (
  69. i, accuracy_train, Loss_train, accuracy_test, Loss_test))
  70. # 可视化训练集和测试集的损失函数和准确率
  71. plt.figure(figsize=(10, 3))
  72. plt.subplot(121)
  73. plt.plot(ce_train, color="blue", label="train")
  74. plt.plot(ce_test, color="red", label="test")
  75. plt.ylabel("Loss")
  76. plt.legend()
  77. plt.subplot(122)
  78. plt.plot(acc_train, color="blue", label="train")
  79. plt.plot(acc_test, color="red", label="test")
  80. plt.ylabel("Accuracy")
  81. plt.legend()
  82. plt.show()

1.6 绘制分类图 

 生成网格坐标矩阵np.meshgrid() 填充网格plt.pcolomesh()

  1. import tensorflow as tf
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import matplotlib as mpl
  5. n = 10 # n值越大,颜色过度越细腻
  6. x = np.linspace(-10, 10, n)
  7. y = np.linspace(-10, 10, n)
  8. X, Y = np.meshgrid(x, y)
  9. Z = X + Y
  10. plt.pcolormesh(X, Y, Z, cmap="rainbow")
  11. plt.show()

训练集根据鸢尾花分类模型绘制分类图
  1. import tensorflow.keras as keras
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import matplotlib as mpl
  6. import tensorflow as tf
  7. TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
  8. train_path = keras.utils.get_file(TRAIN_URL.split("/")[-1], TRAIN_URL)
  9. TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
  10. test_path = keras.utils.get_file(TEST_URL.split("/")[-1], TEST_URL)
  11. df_iris_train = pd.read_csv(train_path, header=0)
  12. df_iris_test = pd.read_csv(test_path, header=0)
  13. # 处理数据
  14. """"
  15. 1.转化为NumPy数组
  16. 2.提取属性和标签
  17. 3.提取山鸢尾和变色鸢尾
  18. """
  19. iris_train = np.array(df_iris_train)
  20. iris_test = np.array(df_iris_test)
  21. train_x = iris_train[:, 0:2]
  22. train_y = iris_train[:, 4]
  23. test_x = iris_test[:, 0:2]
  24. test_y = iris_test[:, 4]
  25. x_train = train_x[train_y < 2]
  26. y_train = train_y[train_y < 2]
  27. x_test = test_x[test_y < 2]
  28. y_test = test_y[test_y < 2]
  29. num_train = len(x_train)
  30. num_test = len(x_test)
  31. # 属性中心化
  32. x_train = x_train - np.mean(x_train, axis=0)
  33. x_test = x_test - np.mean(x_test, axis=0)
  34. # 生成多元模型的属性矩阵和标签列向量
  35. x0_train = np.ones(num_train).reshape(-1, 1)
  36. X_train = tf.cast(tf.concat((x0_train, x_train), axis=1), tf.float32)
  37. Y_train = tf.cast(y_train.reshape(-1, 1), dtype=tf.float32)
  38. x0_test = np.ones(num_test).reshape(-1, 1)
  39. X_test = tf.cast(tf.concat((x0_test, x_test), axis=1), tf.float32)
  40. Y_test = tf.cast(y_test.reshape(-1, 1), dtype=tf.float32)
  41. # 设置超参数
  42. learn_rate = 0.2
  43. iter = 120
  44. display_step = 30
  45. # 设置模型参数初始值
  46. np.random.seed(612)
  47. W = tf.Variable(np.random.randn(3, 1), dtype=tf.float32)
  48. # 训练模型
  49. ce_train = []
  50. acc_train = []
  51. ce_test = []
  52. acc_test = []
  53. for i in range(0, iter + 1):
  54. with tf.GradientTape() as tape:
  55. PRED_train = 1 / (1 + tf.exp(- tf.matmul(X_train, W)))
  56. Loss_train = -tf.reduce_mean(Y_train * tf.math.log(PRED_train) + (1 - Y_train) * tf.math.log(1 - PRED_train))
  57. PRED_test = 1 / (1 + tf.exp(- tf.matmul(X_test, W)))
  58. Loss_test = -tf.reduce_mean(Y_test * tf.math.log(PRED_test) + (1 - Y_test) * tf.math.log(1 - PRED_test))
  59. accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_train.numpy() < 0.5, 0., 1.), Y_train), tf.float32))
  60. accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_test.numpy() < 0.5, 0., 1.), Y_test), tf.float32))
  61. ce_train.append(Loss_train)
  62. acc_train.append(accuracy_train)
  63. ce_test.append(Loss_test)
  64. acc_test.append(accuracy_test)
  65. dL_dW = tape.gradient(Loss_train, W)
  66. W.assign_sub(learn_rate * dL_dW)
  67. if i % display_step == 0:
  68. print("i: %i, Acc_train: %f, Loss_train: %f, Acc_test: %f, Loss_test: %f" % (
  69. i, accuracy_train, Loss_train, accuracy_test, Loss_test))
  70. # 训练集根据鸢尾花分类模型绘制分类图
  71. M = 300
  72. x1_min, x2_min = x_train.min(axis=0)
  73. x1_max, x2_max = x_train.max(axis=0)
  74. t1 = np.linspace(x1_min, x1_max, M)
  75. t2 = np.linspace(x2_min, x2_max, M)
  76. m1, m2 = np.meshgrid(t1, t2)
  77. m0 = np.ones(M * M)
  78. X_mesh = tf.cast(np.stack((m0, m1.reshape(-1), m2.reshape(-1)),axis=1),dtype=tf.float32)
  79. Y_mesh = tf.cast(1 / (1 + tf.exp(-tf.matmul(X_mesh, W))),dtype=tf.float32)
  80. Y_mesh = tf.where(Y_mesh < 0.5, 0, 1)
  81. n = tf.reshape(Y_mesh, m1.shape)
  82. cm_pt = mpl.colors.ListedColormap(["blue", "red"])
  83. cm_bg = mpl.colors.ListedColormap(["#FD971F", "#A6E22E"])
  84. plt.pcolormesh(m1, m2, n, cmap=cm_bg, shading='auto') # 添加shading='auto'以改善外观
  85. plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_pt)
  86. plt.show()

 测试集根据鸢尾花分类模型绘制分类图

  1. import tensorflow.keras as keras
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import matplotlib as mpl
  6. import tensorflow as tf
  7. TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
  8. train_path = keras.utils.get_file(TRAIN_URL.split("/")[-1], TRAIN_URL)
  9. TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
  10. test_path = keras.utils.get_file(TEST_URL.split("/")[-1], TEST_URL)
  11. df_iris_train = pd.read_csv(train_path, header=0)
  12. df_iris_test = pd.read_csv(test_path, header=0)
  13. # 处理数据
  14. """"
  15. 1.转化为NumPy数组
  16. 2.提取属性和标签
  17. 3.提取山鸢尾和变色鸢尾
  18. """
  19. iris_train = np.array(df_iris_train)
  20. iris_test = np.array(df_iris_test)
  21. train_x = iris_train[:, 0:2]
  22. train_y = iris_train[:, 4]
  23. test_x = iris_test[:, 0:2]
  24. test_y = iris_test[:, 4]
  25. x_train = train_x[train_y < 2]
  26. y_train = train_y[train_y < 2]
  27. x_test = test_x[test_y < 2]
  28. y_test = test_y[test_y < 2]
  29. num_train = len(x_train)
  30. num_test = len(x_test)
  31. # 属性中心化
  32. x_train = x_train - np.mean(x_train, axis=0)
  33. x_test = x_test - np.mean(x_test, axis=0)
  34. # 生成多元模型的属性矩阵和标签列向量
  35. x0_train = np.ones(num_train).reshape(-1, 1)
  36. X_train = tf.cast(tf.concat((x0_train, x_train), axis=1), tf.float32)
  37. Y_train = tf.cast(y_train.reshape(-1, 1), dtype=tf.float32)
  38. x0_test = np.ones(num_test).reshape(-1, 1)
  39. X_test = tf.cast(tf.concat((x0_test, x_test), axis=1), tf.float32)
  40. Y_test = tf.cast(y_test.reshape(-1, 1), dtype=tf.float32)
  41. # 设置超参数
  42. learn_rate = 0.2
  43. iter = 120
  44. display_step = 30
  45. # 设置模型参数初始值
  46. np.random.seed(612)
  47. W = tf.Variable(np.random.randn(3, 1), dtype=tf.float32)
  48. # 训练模型
  49. ce_train = []
  50. acc_train = []
  51. ce_test = []
  52. acc_test = []
  53. for i in range(0, iter + 1):
  54. with tf.GradientTape() as tape:
  55. PRED_train = 1 / (1 + tf.exp(- tf.matmul(X_train, W)))
  56. Loss_train = -tf.reduce_mean(Y_train * tf.math.log(PRED_train) + (1 - Y_train) * tf.math.log(1 - PRED_train))
  57. PRED_test = 1 / (1 + tf.exp(- tf.matmul(X_test, W)))
  58. Loss_test = -tf.reduce_mean(Y_test * tf.math.log(PRED_test) + (1 - Y_test) * tf.math.log(1 - PRED_test))
  59. accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_train.numpy() < 0.5, 0., 1.), Y_train), tf.float32))
  60. accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.where(PRED_test.numpy() < 0.5, 0., 1.), Y_test), tf.float32))
  61. ce_train.append(Loss_train)
  62. acc_train.append(accuracy_train)
  63. ce_test.append(Loss_test)
  64. acc_test.append(accuracy_test)
  65. dL_dW = tape.gradient(Loss_train, W)
  66. W.assign_sub(learn_rate * dL_dW)
  67. if i % display_step == 0:
  68. print("i: %i, Acc_train: %f, Loss_train: %f, Acc_test: %f, Loss_test: %f" % (
  69. i, accuracy_train, Loss_train, accuracy_test, Loss_test))
  70. # 测试集根据鸢尾花分类模型绘制分类图
  71. M = 300
  72. x1_min, x2_min = x_test.min(axis=0)
  73. x1_max, x2_max = x_test.max(axis=0)
  74. t1 = np.linspace(x1_min, x1_max, M)
  75. t2 = np.linspace(x2_min, x2_max, M)
  76. m1, m2 = np.meshgrid(t1, t2)
  77. m0 = np.ones(M * M)
  78. X_mesh = tf.cast(np.stack((m0, m1.reshape(-1), m2.reshape(-1)),axis=1),dtype=tf.float32)
  79. Y_mesh = tf.cast(1 / (1 + tf.exp(-tf.matmul(X_mesh, W))),dtype=tf.float32)
  80. Y_mesh = tf.where(Y_mesh < 0.5, 0, 1)
  81. n = tf.reshape(Y_mesh, m1.shape)
  82. cm_pt = mpl.colors.ListedColormap(["blue", "red"])
  83. cm_bg = mpl.colors.ListedColormap(["#FD971F", "#A6E22E"])
  84. plt.pcolormesh(m1, m2, n, cmap=cm_bg, shading='auto') # 添加shading='auto'以改善外观
  85. plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_pt)
  86. plt.show()


1.7 多分类问题

 


1.8 实现多分类问题 

使用花瓣长度、花瓣宽度将三种鸢尾花区分开 

  1. import tensorflow.keras as keras
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import matplotlib as mpl
  6. import tensorflow as tf
  7. TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
  8. train_path = keras.utils.get_file(TRAIN_URL.split("/")[-1], TRAIN_URL)
  9. df_iris_train = pd.read_csv(train_path, header=0)
  10. # 处理数据
  11. iris_train = np.array(df_iris_train)
  12. x_train = iris_train[:, 2:4]
  13. y_train = iris_train[:, 4]
  14. num_train = len(x_train)
  15. x0_train = np.ones(num_train).reshape(-1, 1)
  16. X_train = tf.cast(tf.concat([x0_train, x_train], axis=1), tf.float32)
  17. # 将鸢尾花标签值转换为独热编码
  18. Y_train = tf.one_hot(tf.constant(y_train, dtype=tf.int32), 3)
  19. # 设置超参数,设置模型参数初始值
  20. learn_rate = 0.2
  21. iter = 500
  22. display_step = 100
  23. np.random.seed(612)
  24. W = tf.Variable(np.random.randn(3, 3), dtype=tf.float32)
  25. # 训练模型
  26. acc = [] # 准确率
  27. cce = [] # 交叉熵损失
  28. for i in range(0, iter + 1):
  29. with tf.GradientTape() as tape:
  30. PRED_train = tf.nn.softmax(tf.matmul(X_train, W)) # 计算预测值
  31. Loss_train = -tf.reduce_sum(Y_train * tf.math.log(PRED_train)) / num_train
  32. accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train, axis=1), y_train), tf.float32))
  33. acc.append(accuracy)
  34. cce.append(Loss_train)
  35. # 更新模型参数
  36. dL_dW = tape.gradient(Loss_train, W)
  37. W.assign_sub(learn_rate * dL_dW)
  38. if i % display_step == 0:
  39. print("i: %i, Acc: %f, Loss: %f" % (i, accuracy, Loss_train))
  40. # 训练结果并转化为自然顺序码
  41. tf.reduce_sum(PRED_train, axis=1)
  42. tf.argmax(PRED_train, axis=1)
  43. # 绘制分类图
  44. M = 500
  45. x1_min, x2_min = x_train.min(axis=0)
  46. x1_max, x2_max = x_train.max(axis=0)
  47. t1 = np.linspace(x1_min, x1_max, M)
  48. t2 = np.linspace(x2_min, x2_max, M)
  49. m1, m2 = np.meshgrid(t1, t2)
  50. m0 = np.ones(M * M)
  51. X_ = tf.cast(np.stack((m0, m1.reshape(-1), m2.reshape(-1)),axis=1), tf.float32)
  52. Y_ = tf.nn.softmax(tf.matmul(X_, W))
  53. Y_ = tf.argmax(Y_, axis=1)
  54. n = tf.reshape(Y_, m1.shape)
  55. # 可视化
  56. plt.figure(figsize=(8, 6))
  57. cm_bg = mpl.colors.ListedColormap(["#A0FFA0","#FFA0A0","#A0A0FF"])
  58. plt.pcolormesh(m1, m2, n, cmap=cm_bg)
  59. plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap="brg")
  60. plt.show()

 

 

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/235426
推荐阅读
相关标签
  

闽ICP备14008679号