赞
踩
结果示例
完整代码
# -*- coding: utf-8 -*- # @Time : 2021/6/21 15:33 # @Author : weiwei # @File : Horse.py import numpy as np from math import log def loadSimpData(): datMat = np.matrix([ [1., 2.1], [2, 1.1], [1.3, 1.], [1., 1.], [2., 1.] ]) classLabels = [1.0, 1.0, -1.0, -1.0, 1.0] return datMat, classLabels def loadDataSet(fileName): numFeat = len(open(fileName).readline().split('\t')) dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr = [] curLine = line.strip().split('\t') for i in range(numFeat - 1): lineArr.append(float(curLine[i])) dataMat.append(lineArr) labelMat.append(float(curLine[-1])) return dataMat, labelMat def stumpClassify(dataMatrix, dimen, threshVal, threshIneq): retArray = np.ones((np.shape(dataMatrix)[0], 1)) if threshIneq == 'lt': retArray[dataMatrix[:, dimen] <= threshVal] = -1.0 else: retArray[dataMatrix[:, dimen] <= threshVal] = 1.0 return retArray def buildStump(dataArr, classLabels, D): dataMatrix = np.mat(dataArr) labelMat = np.mat(classLabels).T m, n = np.shape(dataMatrix) numSteps = 10.0; bestStump = {}; bestClassEst = np.mat(np.zeros((m, 1))) minError = np.inf for i in range(n): rangeMin = dataMatrix[:, i].min(); rangeMax = dataMatrix[:, i].max(); stepSize = (rangeMax - rangeMin) / numSteps for j in range(-1, int(numSteps) + 1): for inequal in ['lt', 'gt']: threshVal = (rangeMin + float(j) * stepSize) predictedVals = stumpClassify(dataMatrix, i, threshVal, inequal) errArr = np.mat(np.ones((m, 1))) errArr[predictedVals == labelMat] = 0 weightError = D.T * errArr print("the error rate of this test is %.3f" % (weightError)) if weightError < minError: minError = weightError bestClassEst = predictedVals.copy() bestStump['dim'] = i bestStump['thresh'] = threshVal bestStump['ineq'] = inequal return bestStump, minError, bestClassEst def adaBoostTrainDS(dataArr, classLabels, numIt=40): weakClassArr = [] m = np.shape(dataArr)[0] D = np.mat(np.ones((m, 1)) / m) aggClassEst = np.mat(np.zeros((m, 1))) for i in range(numIt): bestStump, error, classEst = buildStump(dataArr, classLabels, D) print("D:", D.T) alpha = float(0.5 * log((1.0 - error) / max(error, 1e-16))) bestStump['alpha'] = alpha weakClassArr.append(bestStump) print("classEst: ", classEst.T) expon = np.multiply(-1 * alpha * np.mat(classLabels).T, classEst) D = np.multiply(D, np.exp(expon)) D = D / D.sum() aggClassEst += alpha * classEst print("aggClassEst: ", aggClassEst.T) aggErrors = np.multiply(np.sign(aggClassEst) != np.mat(classLabels).T, np.ones((m, 1))) errorRate = aggErrors.sum() / m print("total error: ", errorRate) if errorRate == 0.0: break return weakClassArr, aggClassEst def adaClassify(datToClass, classifierArr): dataMatrix = np.mat(datToClass) m = np.shape(dataMatrix)[0] aggClassEst = np.mat(np.zeros((m, 1))) for i in range(len(classifierArr)): print("the error rate of this test is %.6f" % (classifierArr[i]['alpha'])) classEst = stumpClassify(dataMatrix, classifierArr[i]['dim'], \ classifierArr[i]['thresh'], \ classifierArr[i]['ineq']) aggClassEst += classifierArr[i]['alpha'] * classEst return np.sign(aggClassEst) if __name__ == '__main__': datMat, classLabels = loadSimpData() D = np.mat(np.ones((5, 1)) / 5) buildStump(datMat, classLabels, D) classifierArr, aggClassEst = adaBoostTrainDS(datMat, classLabels) print(adaClassify([[0, 0], [1, 0], [2, 2]], classifierArr)) datArr, labelArr = loadDataSet('horseColicTraining.txt') classifierArr, aggClassEst = adaBoostTrainDS(datArr, labelArr, 10) testArr, testLableArr = loadDataSet('horseColicTest.txt') prediction = adaClassify(testArr, classifierArr) errArr = np.mat(np.ones((67, 1))) errRate = errArr[prediction != np.mat(testLableArr).T].sum() / 67 print("after 10 iterations the average error rate is: %f" % (errRate))
数据集相关资料可以从Machine-learning中找到
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。