当前位置:   article > 正文

「超级干货大放送」机器学习十二种经典模型实例_机器学习实例

机器学习实例

目录

实例一:线性回归波士顿房价

实例二:KNN实现电影分类

实例三:基于线性回归预测波士顿房价

​ 实例四:sklearn完成逻辑回归鸢尾花分类

实例五:支持向量机完成逻辑回归鸢尾花分类

实例六:使用决策树实现鸢尾花分类

实例七:使用随机森林实现鸢尾花分类

实例八:使用朴素贝叶斯进行鸢尾花分类

实例九:使用Kmeans来进行鸢尾花分类

实例十:K最近邻的使用方式

实例十一:kmeans的其他展示方式

实例十二:Kmeans实现鸢尾花聚类


实例一:线性回归波士顿房价

  1. '''
  2. 实例一:线性回归波士顿房价【回归问题】
  3. '''
  4. # 导入数据集(波士顿房价--小型数据集)
  5. from sklearn.datasets import load_boston
  6. from sklearn.model_selection import train_test_split
  7. from sklearn.linear_model import LinearRegression # 线性回归
  8. import matplotlib.pyplot as plt
  9. # 加载数据
  10. X, y = load_boston(return_X_y=True)
  11. # print(X)
  12. # print(X.shape)
  13. # 对数据集进行处理,只展示房间数
  14. X1 = X[:,5:6]
  15. # print(X1)
  16. # print(X1.shape)
  17. # 切分数据集
  18. train_x, test_x, train_y, test_y = train_test_split(X1, y, test_size=0.3, random_state=2)
  19. # 创建线性回归对象
  20. lr = LinearRegression()
  21. # 训练模型
  22. lr.fit(train_x, train_y)
  23. # 预测得到结果
  24. result = lr.predict(test_x)
  25. # 展示数据
  26. plt.scatter(train_x, train_y, color='blue')
  27. # 划线
  28. # plt.plot(test_x, test_y, color='red')
  29. plt.plot(test_x, result, color='red')
  30. plt.show()

实例二:KNN实现电影分类

  1. '''
  2. 实例二:KNN实现电影分类【分类问题】
  3. '''
  4. import numpy as np
  5. import pandas as pd
  6. # 训练数据
  7. train_data = {'宝贝当家':[45,2,9,'喜剧片'],
  8. '美人鱼':[21,17,5,'喜剧片'],
  9. '澳门风云3':[54,9,11,'喜剧片'],
  10. '功夫熊猫3':[39,0,31,'喜剧片'],
  11. '谍影重重':[5,2,57,'动作片'],
  12. '叶问3':[3,2,65,'动作片'],
  13. '我的特工爷爷':[6,4,21,'动作片'],
  14. '奔爱':[7,46,4,'爱情片'],
  15. '夜孔雀':[9,39,8,'爱情片'],
  16. '代理情人':[9,38,2,'爱情片'],
  17. '新步步惊心':[8,34,17,'爱情片'],
  18. '伦敦陷落':[2,3,55,'动作片']
  19. }
  20. # 将训练数据封装为 DataFrame
  21. train_df = pd.DataFrame(train_data).T
  22. # 设置表格列名
  23. train_df.columns = ['搞笑镜头','拥抱镜头','打斗镜头','电影类型']
  24. # 设置测试数据
  25. test_data = {'唐人街探案':[23,3,17]}
  26. # 计算欧氏距离
  27. def euclidean_distance(vec1,vec2):
  28. return np.sqrt(np.sum(np.square(vec1 - vec2)))
  29. # 设定 K 值
  30. K = 3
  31. movie = '唐人街探案'
  32. # 计算出所有的欧式距离
  33. d = []
  34. for train_x in train_df.values[:,:-1]:
  35. test_x = np.array(test_data[movie])
  36. d.append(euclidean_distance(train_x,test_x))
  37. dd = pd.DataFrame(train_df.values, index=d)
  38. # 根据排序显示
  39. dd1 = pd.DataFrame(dd.sort_index())
  40. print(dd1.values[:K,-1:].max())

实例三:基于线性回归预测波士顿房价

  1. '''
  2. 实例三:基于线性回归预测波士顿房价
  3. '''
  4. # 1. 数据加载和预处理
  5. import pandas as pd
  6. from sklearn.datasets import load_boston
  7. from sklearn.preprocessing import MinMaxScaler
  8. from sklearn.model_selection import train_test_split
  9. # 获取波士顿房价数据集
  10. boston = load_boston()
  11. # 获取数据集特征(训练数据X)
  12. X = boston.data
  13. # 获取数据集标记(label数据y)
  14. y = boston.target
  15. # print(pd.DataFrame(X))
  16. # 特征归一化到 [0,1] 范围内:提升模型收敛速度
  17. X = MinMaxScaler().fit_transform(X)
  18. # print(X)
  19. # print(pd.DataFrame(X))
  20. # 划分训练集和测试集
  21. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2020)
  22. # 2. 线性回归算法实现
  23. import numpy as np
  24. import matplotlib.pyplot as plt
  25. class LinearRegression:
  26. '''线性回归算法实现'''
  27. def __init__(self, alpha=0.1, epoch=5000, fit_bias=True):
  28. '''
  29. alpha: 学习率,控制参数更新的幅度
  30. epoch: 在整个训练集上训练迭代(参数更新)的次数
  31. fit_bias: 是否训练偏置项参数
  32. '''
  33. self.alpha = alpha
  34. self.epoch = epoch
  35. # cost_record 记录每一次迭代的经验风险
  36. self.cost_record = []
  37. self.fit_bias = fit_bias
  38. # 预测函数
  39. def predict(self, X_test):
  40. '''
  41. X_test: m x n 的 numpy 二维数组
  42. '''
  43. # 模型有偏置项参数时:为每个测试样本增加特征 x_0 = 1
  44. if self.fit_bias:
  45. x_0 = np.ones(X_test.shape[0])
  46. X_test = np.column_stack((x_0, X_test))
  47. # 根据公式返回结果
  48. return np.dot(X_test, self.w)
  49. # 模型训练:使用梯度下降法更新参数(模型参数)
  50. def fit(self, X_train, y_train):
  51. '''
  52. X_train: m x n 的 numpy 二维数组
  53. y_train:有 m 个元素的 numpy 一维数组
  54. '''
  55. # 训练偏置项参数时:为每个训练样本增加特征 x_0 = 1
  56. if self.fit_bias:
  57. x_0 = np.ones(X_train.shape[0])
  58. X_train = np.column_stack((x_0, X_train))
  59. # 训练样本数量
  60. m = X_train.shape[0]
  61. # 样本特征维数
  62. n = X_train.shape[1]
  63. # 初始模型参数
  64. self.w = np.ones(n)
  65. # 模型参数迭代
  66. for i in range(self.epoch):
  67. # 计算训练样本预测值
  68. y_pred = np.dot(X_train, self.w)
  69. # 计算训练集经验风险
  70. cost = np.dot(y_pred - y_train, y_pred - y_train) / (2 * m)
  71. # 记录训练集经验风险
  72. self.cost_record.append(cost)
  73. # 参数更新
  74. self.w -= self.alpha / m * np.dot(y_pred - y_train, X_train)
  75. # 保存模型
  76. self.save_model()
  77. # 显示经验风险的收敛趋势图
  78. def polt_cost(self):
  79. plt.plot(np.arange(self.epoch), self.cost_record)
  80. plt.xlabel("epoch")
  81. plt.ylabel("cost")
  82. plt.show()
  83. # 保存模型参数
  84. def save_model(self):
  85. np.savetxt("model.txt", self.w)
  86. # 加载模型参数
  87. def load_model(self):
  88. self.w = np.loadtxt("model.txt")
  89. # 3. 模型的训练和预测
  90. # 实例化一个对象
  91. model = LinearRegression()
  92. # 在训练集上训练
  93. model.fit(X_train, y_train)
  94. # 在测试集上预测
  95. y_pred = model.predict(X_test)
  96. # 4. ToDo:打印模型参数
  97. print('偏置参数:', 'ToDo')
  98. print('特征权重:', 'ToDo')
  99. # 5. 打印测试集前5个样本的预测结果
  100. print('预测结果:', y_pred[:5])
  101. # # 评分
  102. # print(model.score(X_test,y_test))

 实例四:sklearn完成逻辑回归鸢尾花分类

  1. '''
  2. 实例四:sklearn完成逻辑回归鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. import matplotlib.pyplot as plt
  7. from mlxtend.plotting import plot_decision_regions
  8. iris = datasets.load_iris()
  9. X = iris.data[:, [2, 3]]
  10. y = iris.target
  11. from sklearn.model_selection import train_test_split
  12. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  13. from sklearn.preprocessing import StandardScaler
  14. #正则化
  15. sc = StandardScaler()
  16. sc.fit(X_train)
  17. X_train_std = sc.transform(X_train)
  18. X_test_std = sc.transform(X_test)
  19. from sklearn.linear_model import LogisticRegression
  20. lr = LogisticRegression(C=1000.0, random_state=0)
  21. lr.fit(X_train_std, y_train)
  22. X_combined_std = np.vstack((X_train_std, X_test_std))
  23. y_combined = np.hstack((y_train, y_test))
  24. plot_decision_regions(X_combined_std, y_combined, clf=lr, filler_feature_ranges=range(105, 150))
  25. plt.xlabel('petal length [standardized]')
  26. plt.ylabel('petal width [standardized]')
  27. plt.legend(loc='upper left')
  28. plt.tight_layout()
  29. # plt.savefig('./figures/logistic_regression.png', dpi=300)
  30. plt.show()

实例五:支持向量机完成逻辑回归鸢尾花分类

  1. '''
  2. 实例五:支持向量机完成逻辑回归鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from mlxtend.plotting import plot_decision_regions
  9. iris = datasets.load_iris()
  10. X = iris.data[:, [2, 3]]
  11. y = iris.target
  12. from sklearn.model_selection import train_test_split
  13. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  14. from sklearn.preprocessing import StandardScaler
  15. ss = StandardScaler().fit(X_train)
  16. x_train_std = ss.transform(X_train)
  17. x_test_std = ss.transform(X_test)
  18. #正则化
  19. svc = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0)
  20. svc.fit(x_train_std,y_train)
  21. X_combined_std = np.vstack((x_train_std, x_test_std))
  22. y_combined = np.hstack((y_train, y_test))
  23. plot_decision_regions(X_combined_std, y_combined, clf=svc, filler_feature_ranges=range(105, 150))
  24. plt.xlabel('petal length [standardized]')
  25. plt.ylabel('petal width [standardized]')
  26. plt.legend(loc='upper left')
  27. plt.tight_layout()
  28. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  29. plt.show()

实例六:使用决策树实现鸢尾花分类

  1. '''
  2. 实例六:使用决策树实现鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from sklearn.tree import DecisionTreeClassifier
  9. from mlxtend.plotting import plot_decision_regions
  10. iris = datasets.load_iris()
  11. X = iris.data[:, [2, 3]]
  12. y = iris.target
  13. from sklearn.model_selection import train_test_split
  14. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  15. from sklearn.preprocessing import StandardScaler
  16. ss = StandardScaler().fit(X_train)
  17. x_train_std = ss.transform(X_train)
  18. x_test_std = ss.transform(X_test)
  19. # 正则化
  20. dtc = DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=3)
  21. dtc.fit(X_train,y_train)
  22. X_combined_std = np.vstack((x_train_std, x_test_std))
  23. y_combined = np.hstack((y_train, y_test))
  24. plot_decision_regions(X_combined_std, y_combined, clf=dtc, filler_feature_ranges=range(105, 150))
  25. plt.xlabel('petal length [standardized]')
  26. plt.ylabel('petal width [standardized]')
  27. plt.legend(loc='upper left')
  28. plt.tight_layout()
  29. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  30. plt.show()

 

实例七:使用随机森林实现鸢尾花分类

  1. '''
  2. 实例七:使用随机森林实现鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from sklearn.tree import DecisionTreeClassifier
  9. from sklearn.ensemble import RandomForestClassifier
  10. from mlxtend.plotting import plot_decision_regions
  11. iris = datasets.load_iris()
  12. X = iris.data[:, [2, 3]]
  13. y = iris.target
  14. from sklearn.model_selection import train_test_split
  15. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  16. from sklearn.preprocessing import StandardScaler
  17. ss = StandardScaler().fit(X_train)
  18. x_train_std = ss.transform(X_train)
  19. x_test_std = ss.transform(X_test)
  20. # 正则化
  21. rfc = RandomForestClassifier(criterion='entropy',n_estimators=10, random_state=1,n_jobs=2)
  22. rfc.fit(X_train,y_train)
  23. X_combined_std = np.vstack((x_train_std, x_test_std))
  24. y_combined = np.hstack((y_train, y_test))
  25. plot_decision_regions(X_combined_std, y_combined, clf=rfc, filler_feature_ranges=range(105, 150))
  26. plt.xlabel('petal length [standardized]')
  27. plt.ylabel('petal width [standardized]')
  28. plt.legend(loc='upper left')
  29. plt.tight_layout()
  30. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  31. plt.show()

实例八:使用朴素贝叶斯进行鸢尾花分类

  1. '''
  2. 实例八:使用朴素贝叶斯进行鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from sklearn.naive_bayes import GaussianNB
  9. from sklearn.tree import DecisionTreeClassifier
  10. from sklearn.ensemble import RandomForestClassifier
  11. from mlxtend.plotting import plot_decision_regions
  12. iris = datasets.load_iris()
  13. X = iris.data[:, [2, 3]]
  14. y = iris.target
  15. from sklearn.model_selection import train_test_split
  16. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  17. from sklearn.preprocessing import StandardScaler
  18. ss = StandardScaler().fit(X_train)
  19. x_train_std = ss.transform(X_train)
  20. x_test_std = ss.transform(X_test)
  21. # 正则化
  22. gnb = GaussianNB()
  23. gnb.fit(X_train,y_train)
  24. X_combined_std = np.vstack((x_train_std, x_test_std))
  25. y_combined = np.hstack((y_train, y_test))
  26. plot_decision_regions(X_combined_std, y_combined, clf=gnb, filler_feature_ranges=range(105, 150))
  27. plt.xlabel('petal length [standardized]')
  28. plt.ylabel('petal width [standardized]')
  29. plt.legend(loc='upper left')
  30. plt.tight_layout()
  31. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  32. plt.show()

实例九:使用Kmeans来进行鸢尾花分类

  1. '''
  2. 实例九:使用Kmeans来进行鸢尾花分类
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from sklearn.naive_bayes import GaussianNB
  9. from sklearn.cluster import KMeans
  10. from sklearn.tree import DecisionTreeClassifier
  11. from sklearn.ensemble import RandomForestClassifier
  12. from mlxtend.plotting import plot_decision_regions
  13. iris = datasets.load_iris()
  14. X = iris.data[:, [2, 3]]
  15. y = iris.target
  16. from sklearn.model_selection import train_test_split
  17. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  18. from sklearn.preprocessing import StandardScaler
  19. ss = StandardScaler().fit(X_train)
  20. x_train_std = ss.transform(X_train)
  21. x_test_std = ss.transform(X_test)
  22. # 正则化
  23. km = KMeans(n_clusters=3)
  24. km.fit(X_train,y_train)
  25. X_combined_std = np.vstack((x_train_std, x_test_std))
  26. y_combined = np.hstack((y_train, y_test))
  27. plot_decision_regions(X_combined_std, y_combined, clf=km, filler_feature_ranges=range(105, 150))
  28. plt.xlabel('petal length [standardized]')
  29. plt.ylabel('petal width [standardized]')
  30. plt.legend(loc='upper left')
  31. plt.tight_layout()
  32. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  33. plt.show()

实例十:K最近邻的使用方式

  1. '''
  2. 实例十:K最近邻的使用方式
  3. '''
  4. from sklearn import datasets
  5. import numpy as np
  6. from sklearn.svm import SVC
  7. import matplotlib.pyplot as plt
  8. from sklearn.naive_bayes import GaussianNB
  9. from sklearn.cluster import KMeans
  10. from sklearn.neighbors import KNeighborsClassifier
  11. from sklearn.tree import DecisionTreeClassifier
  12. from sklearn.ensemble import RandomForestClassifier
  13. from mlxtend.plotting import plot_decision_regions
  14. iris = datasets.load_iris()
  15. X = iris.data[:, [2, 3]]
  16. y = iris.target
  17. from sklearn.model_selection import train_test_split
  18. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
  19. from sklearn.preprocessing import StandardScaler
  20. ss = StandardScaler().fit(X_train)
  21. x_train_std = ss.transform(X_train)
  22. x_test_std = ss.transform(X_test)
  23. # 正则化
  24. knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
  25. knn.fit(x_train_std, y_train)
  26. X_combined_std = np.vstack((x_train_std, x_test_std))
  27. y_combined = np.hstack((y_train, y_test))
  28. plot_decision_regions(X_combined_std, y_combined, clf=knn, filler_feature_ranges=range(105, 150))
  29. plt.xlabel('petal length [standardized]')
  30. plt.ylabel('petal width [standardized]')
  31. plt.legend(loc='upper left')
  32. plt.tight_layout()
  33. # plt.savefig('./figures/support_vector_machine_rbf_iris_1.png', dpi=300)
  34. plt.show()

实例十一:kmeans的其他展示方式

  1. ''''
  2. 实例十一:•kmeans的其他展示方式
  3. '''
  4. import pandas as pd
  5. from sklearn import datasets
  6. from sklearn.cluster import KMeans
  7. import matplotlib.pyplot as plt
  8. iris = datasets.load_iris()
  9. X = iris.data[:, [2, 3]]
  10. y = iris.target
  11. df = pd.DataFrame(X)
  12. df.columns=['x','y']
  13. df['kind'] = y
  14. df['kind'] = y
  15. # 读取数据
  16. data = iris
  17. # 去除最后一列的数据,也就是标签
  18. data1 = df
  19. # print(data1)
  20. # 聚类数为3
  21. km = KMeans(n_clusters=3)
  22. # 拟合数据
  23. km.fit(data1)
  24. predict = km.predict(data1)
  25. # 设定坐标范围
  26. # plt.figure(figsize=(10,10))
  27. # 开始绘图
  28. colored = ['orange', 'green', 'pink']
  29. col = [colored[i] for i in predict]
  30. plt.scatter(data1['x'], data1['y'], color=col)
  31. plt.xlabel('x')
  32. plt.ylabel('y')
  33. plt.show()
  34. print(predict)
  35. # 真实的值
  36. # 将列表中的最后一行标签转化为数字类型
  37. class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
  38. data1['kind'] = data1['kind'].map(class_mapping)
  39. # 设定颜色
  40. # colored = ['green','orange','pink']
  41. # 把标签设置为不同的颜色
  42. c = [colored[i] for i in y]
  43. # 绘制散点图,用x和y标签
  44. plt.scatter(data1['x'], data1['y'], color=c)
  45. plt.xlabel('x')
  46. plt.ylabel('y')
  47. plt.show()

 

实例十二:Kmeans实现鸢尾花聚类

  1. import pandas as pd
  2. from sklearn.cluster import KMeans
  3. import matplotlib.pyplot as plt
  4. # 读取数据
  5. data = pd.read_csv(r"C:\Users\单纯小男子\Downloads\iris.csv")
  6. # 去除最后一列的数据,也就是标签
  7. data1 = data.drop(['kind'], axis=1)
  8. # print(data1)
  9. # 聚类数为3
  10. km = KMeans(n_clusters=3)
  11. # 拟合数据
  12. km.fit(data1)
  13. predict = km.predict(data1)
  14. # 设定坐标范围
  15. # plt.figure(figsize=(10,10))
  16. # 开始绘图
  17. colored = ['orange', 'green', 'pink']
  18. col = [colored[i] for i in predict]
  19. plt.scatter(data1['x'], data1['y'], color=col)
  20. plt.xlabel('x')
  21. plt.ylabel('y')
  22. plt.show()
  23. print(predict)
  24. # 真实的值
  25. # 将列表中的最后一行标签转化为数字类型
  26. class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
  27. data['kind'] = data['kind'].map(class_mapping)
  28. # 设定颜色
  29. # colored = ['green','orange','pink']
  30. # 把标签设置为不同的颜色
  31. c = [colored[i] for i in data['kind']]
  32. # 绘制散点图,用x和y标签
  33. plt.scatter(data['x'], data['y'], color=c)
  34. plt.xlabel('x')
  35. plt.ylabel('y')
  36. plt.show()

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/weixin_40725706/article/detail/263300
推荐阅读
相关标签
  

闽ICP备14008679号