赞
踩
参考文献:【Python 3 利用机器学习模型 进行手写体数字检测】
整体的设计流程:
这一步是提取图像中的特征。保存784个像素点。
from PIL import Image import csv import os # 提取单张图像的特征 def get_features_single(img): # 提取特征 # 30*30的图像 global pixel_cnt_list pixel_cnt_list = [] height, width = 28, 28 # 统计784个像素点 for y in range(height): for x in range(width): if img.getpixel((x, y)) != 0: # 白点 pixel_cnt_list.append(1) else: pixel_cnt_list.append(0) # 统计28行每行的白点数 # for y in range(height): # pixel_cnt_x = 0 # for x in range(width): # if img.getpixel((x, y)) != 0: # 白点 # pixel_cnt_x += 1 # pixel_cnt_list.append(pixel_cnt_x) # # 统计28列每列的白点数 # for x in range(width): # pixel_cnt_y = 0 # for y in range(height): # if img.getpixel((x, y)) != 0: # 白点 # pixel_cnt_y += 1 # pixel_cnt_list.append(pixel_cnt_y) return pixel_cnt_list # 遍历文件夹提取特征存入CSV # 取 sample_nums 个 def save_features_to_CSV(): path_images = "3-手写数字集/" path_csv = "data_csvs/" sum_images = 0 # 读取图像文件 with open(path_csv+"tmp.csv", "w", newline="") as csvfile: writer = csv.writer(csvfile) # 访问文件夹 0-9 for i in range(10): num_list = os.listdir(path_images + str(i)) print(path_images + str(i)) print("num_list:", num_list) # 读到图像文件 if os.path.isdir(path_images + str(i)): print("样本个数:", len(num_list)) sum_images = sum_images + len(num_list) # Travsel every single image to generate the features for j in range(0, len(num_list)): # 处理读取单个图像文件提取特征 img = Image.open(path_images + str(i)+"/" + num_list[j]) print("分析:", num_list[j]) get_features_single(img) pixel_cnt_list.append(i) # 写入 CSV writer.writerow(pixel_cnt_list) img.close() print('\n') print("样本总数:", sum_images) # 以 "test_+样本数.csv" 重新命名 CSV if "data_"+str(sum_images)+".csv" in os.listdir(path_csv): # 之前生成过 data_XXX.csv,需要先删除掉 os.remove(path_csv+"data_" + str(sum_images) + ".csv") os.rename(path_csv+"tmp.csv", path_csv+"data_"+str(sum_images)+".csv") else: os.rename(path_csv+"tmp.csv", path_csv+"data_"+str(sum_images)+".csv") save_features_to_CSV()
def pre_data(): # CSV57维表头名 column_names = [] for i in range(0, 784): column_names.append("feature_" + str(i)) column_names.append("true_number") # 读取csv path_csv = "data_csvs/" data = pd.read_csv(path_csv + "data_500.csv", names=column_names) # 提取数据集 global X_train, X_test, y_train, y_test X_train, X_test, y_train, y_test = train_test_split( data[column_names[0:784]], data[column_names[784]], test_size=0.2, # 80% for 训练,20% for 测试 random_state=0 ) path_saved_models = "data_models/" pre_data()
def way_KNN(): X_train_KNN = X_train y_train_KNN = y_train X_test_KNN = X_test y_test_KNN = y_test ss_KNN = StandardScaler() X_train_KNN = ss_KNN.fit_transform(X_train_KNN) X_test_KNN = ss_KNN.transform(X_test_KNN) KNN = KNeighborsClassifier(algorithm='auto', leaf_size=50, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=3) KNN.fit(X_train_KNN, y_train_KNN) global y_predict_KNN y_predict_KNN = KNN.predict(X_test_KNN) score_KNN = KNN.score(X_test_KNN, y_test_KNN) print("The accurary of KNN:", '\t', score_KNN) # 保存模型 joblib.dump(filename=path_saved_models +'KNN.model',value=KNN)
# LR, logistic regression, 逻辑斯特回归分类(线性模型) def way_LR(): X_train_LR = X_train y_train_LR = y_train X_test_LR = X_test y_test_LR = y_test # 数据预加工 ss_LR = StandardScaler() X_train_LR = ss_LR.fit_transform(X_train_LR) X_test_LR = ss_LR.transform(X_test_LR) # 初始化LogisticRegression LR = LogisticRegression() # 调用LogisticRegression中的fit()来训练模型参数 LR.fit(X_train_LR, y_train_LR) # 使用训练好的模型lr对X_test进行预测 global y_predict_LR y_predict_LR = LR.predict(X_test_LR) # 评分函数 score_LR = LR.score(X_test_LR, y_test_LR) print("The accurary of LR:", '\t', score_LR) # 保存模型 joblib.dump(filename=path_saved_models +'LR.model',value=LR)
# SGDC, stochastic gradient decent 随机梯度下降法求解(线性模型) def way_SGDC(): X_train_SGDC = X_train y_train_SGDC = y_train X_test_SGDC = X_test y_test_SGDC = y_test ss_SGDC = StandardScaler() X_train_SGDC = ss_SGDC.fit_transform(X_train_SGDC) X_test_SGDC = ss_SGDC.transform(X_test_SGDC) SGDC = SGDClassifier(penalty='l2',alpha=0.1) SGDC.fit(X_train_SGDC, y_train_SGDC) global y_predict_SGDC y_predict_SGDC = SGDC.predict(X_test_SGDC) score_SGDC = SGDC.score(X_test_SGDC, y_test_SGDC) print("The accurary of SGDC:", '\t', score_SGDC) # 保存模型 joblib.dump(filename=path_saved_models +'SGDC.model',value=SGDC)
# SVC, Supported Vector Classifier, 线性支持向量分类(SVM支持向量机) def way_SVC(): X_train_SVC = X_train y_train_SVC = y_train X_test_SVC = X_test y_test_SVC = y_test ss_SVC = StandardScaler() X_train_SVC = ss_SVC.fit_transform(X_train_SVC) X_test_SVC = ss_SVC.transform(X_test_SVC) LSVC = SVC(kernel='rbf', C=1E6) LSVC.fit(X_train_SVC, y_train_SVC) global y_predict_SVC y_predict_SVC = LSVC.predict(X_test_SVC) score_SVC = LSVC.score(X_test_SVC, y_test_SVC) print("The accurary of SVC:", '\t', score_SVC) # 保存模型 joblib.dump(filename=path_saved_models +'SVC.model',value=LSVC)
# 多层感知机分类(神经网络) def way_MLPC(): X_train_MLPC = X_train y_train_MLPC = y_train X_test_MLPC = X_test y_test_MLPC = y_test ss_MLPC = StandardScaler() X_train_MLPC = ss_MLPC.fit_transform(X_train_MLPC) X_test_MLPC = ss_MLPC.transform(X_test_MLPC) MLPC = MLPClassifier(hidden_layer_sizes=(45, 45, 45), max_iter=10000) MLPC.fit(X_train_MLPC, y_train_MLPC) global y_predict_MLPC y_predict_MLPC = MLPC.predict(X_test_MLPC) score_MLPC = MLPC.score(X_test_MLPC, y_test_MLPC) print("The accurary of MLPC:", '\t', score_MLPC) # 保存模型 joblib.dump(filename=path_saved_models +'MLPC.model',value=MLPC)
way_LR()
way_SVC()
way_MLPC()
way_SGDC()
way_KNN()
The accurary of LR: 0.89
The accurary of SVC: 0.9
The accurary of MLPC: 0.85
The accurary of SGDC: 0.9
The accurary of KNN: 0.84
# 利用保存到本地的训练好的模型,来检测单张 image 的标记 import joblib from PIL import Image img = Image.open("3-手写数字集/0/mnist_train_359.png") # Get features import get_features features_test_png = get_features.get_features_single(img) path_saved_models = "data_models/" # LR LR = joblib.load(path_saved_models + "LR.model") predict_LR = LR.predict([features_test_png]) print("LR:", predict_LR[0]) # LSVC SVC = joblib.load(path_saved_models + "SVC.model") predict_SVC = SVC.predict([features_test_png]) print("SVC:", predict_SVC[0]) # MLPC MLPC = joblib.load(path_saved_models + "MLPC.model") predict_MLPC = MLPC.predict([features_test_png]) print("MLPC:", predict_MLPC[0]) # SGDC SGDC = joblib.load(path_saved_models + "SGDC.model") predict_SGDC = SGDC.predict([features_test_png]) print("SGDC:", predict_SGDC[0]) # KNN KNN = joblib.load(path_saved_models + "KNN.model") predict_KNN = KNN.predict([features_test_png]) print("KNN:", predict_KNN[0]) # 关闭图像 img.close()
结果
:
LR: 0
SVC: 0
MLPC: 0
SGDC: 0
KNN: 0
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。