赞
踩
引入了手写读取mnist数据集的模块Read_File+内置可视化函数
Read_File模块参见https://blog.csdn.net/wangdiedang/article/details/125335812?spm=1001.2014.3001.5502
- # creator : wangdiedang
- # time : 2022/6/7 11:57
- # filename : Bayes.py
-
- # 引入读文件模块
- import Read_File as RF
- import numpy as np
- from collections import Counter
- import time
-
- def featureExtraction(img, dim, num):
- res = np.empty((dim, dim))
- for i in range(0, dim):
- for j in range(0, dim):
- # 算出每一个片区像素点的个数 若大于某一特定数则设为1
- tmp = img[num * i:num * (i + 1), num * j:num * (j + 1)].sum()
- if tmp > max((28 // dim - 1), 1):
- res[i, j] = 1
- else:
- res[i, j] = 0
- return res
-
-
- # 输入图像集和转化维度
- def Extraction2AllImgs(imgs, dim):
- res = np.empty((imgs.shape[0], dim, dim))
- num = 28 // dim
- for k, img in enumerate(imgs):
- # 对于每一个图像进行特征降维
- res[k] = featureExtraction(imgs[k], dim, num)
- return res
-
-
- def read_data(dim=7):
- # 返回生数据 二值图像
- a, b, c, d = RF.read_main()
- RF.show_img(a)
- # 降低训练集和测试集的特征维度 初始28*28转化为dim*dim
- if dim < 28:
- a = Extraction2AllImgs(a, dim)
- c = Extraction2AllImgs(c, dim)
- return a, b, c, d
-
-
- # 训练所有图片
- def trainsAllImgs(train_imgs, train_labels):
- # shape 为(60000, dim, dim)
- total, row, col = train_imgs.shape
- # 统计出标签在训练集中总计个数
- cnt = Counter(train_labels)
- # 初始化先验概率矩阵 维度为(10,)
- priori_ = np.empty(10)
- for i in range(10):
- # 拉普拉斯修正
- priori_[i] = (cnt[i] + 1) / (total + 10)
- # 将图像矩阵转化为二维矩阵方便计算 shape为(60000, dim*dim)
- new_train_imgs = train_imgs.reshape(total, row * col)
- # 初始化后验概率矩阵 维度为(10, dim*dim)
- posterior_ = np.empty((10, row * col))
- for i in range(10):
- # 在为某一数时,某一像素点取值为1的频率 取值为0的频率为 1 - posterior_[i]
- # 拉普拉斯修正
- posterior_[i] = (new_train_imgs[train_labels == i].sum(axis=0) + 1) / (cnt[i] + 2)
- return priori_, posterior_
-
-
- # 利用 先验和后验概率进行极大似然估计分类
- def bayesClassifier_MLE(test_imgs, priori_, posterior_):
- row, col = test_imgs.shape
- new_test_imgs = test_imgs.reshape(row * col)
- # 初始化标签对应属性的取值概率
- each_P = np.empty(10)
- for j in range(10):
- tmpsum = 0
- # 对数似然求和 防止连乘下溢
- for i, c in enumerate(new_test_imgs):
- if new_test_imgs[i] == 0:
- tmpsum += np.log(1 - posterior_[j][i])
- else:
- tmpsum += np.log(posterior_[j][i])
- each_P[j] = np.log(priori_[j]) + tmpsum
- return np.argmax(each_P)
-
-
- def evaluate_Bayes_model_MLE(train_imgs, train_labels, test_imgs, test_labels):
- # forecast = np.empty(test_labels.shape[0])
- # 初始化预测正确的个数
- correctCnt = 0
- # 使用测试集训练并返回先验概率和后验概率
- print("-----训练模型获得模型的先验和后验概率-----")
- priori_, posterior_ = trainsAllImgs(train_imgs, train_labels)
- print("-----成功获得模型的先验和后验概率-----")
- # print(priori_)
- # print(posterior_)
- # 利用先验概率和后验概率进行对测试集的预测
- print("-----将测试集载入模型进行精确度评估-----")
- for i, img in enumerate(test_imgs):
- t = bayesClassifier_MLE(test_imgs[i], priori_, posterior_)
- # print(t, test_labels[i])
- if t == test_labels[i]:
- correctCnt += 1
- # forecast[i] = bayesClassifier_MLE(img, priori_, posterior_)
- current_time = time.time()
- print("-----模型评估结束-----")
- print("--------------------------------------------------------------")
- print("训练模型样本:%d,样本尺寸(%d, %d)" % (60000, dim, dim))
- print("总样本: %d, 预测成功数: %d, 预测成功率: %.3f" % (test_labels.shape[0], correctCnt, correctCnt / test_labels.shape[0] * 100) + "%")
- print("运行时间为" + str(current_time - old_time) + "s")
- print("--------------------------------------------------------------")
-
-
- if __name__ == '__main__':
- old_time = time.time()
- print("-----读取数据集-----")
- dim = 28
- train_imgs, train_labels, test_imgs, test_labels = read_data(dim)
- RF.show_img(train_imgs)
- print("-----读取成功开始训练-----")
- # 开始训练
- evaluate_Bayes_model_MLE(train_imgs, train_labels, test_imgs, test_labels)
赞
踩
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。