赞
踩
from math import log import operator import pandas as pd import numpy as np def calcShannonEnt(dataSet): # 计算数据的熵(entropy) numEntries=len(dataSet) # 数据条数 labelCounts={ } for featVec in dataSet: currentLabel=featVec[-1] # 每行数据的最后一个字(类别) if currentLabel not in labelCounts.keys(): labelCounts[currentLabel]=0 labelCounts[currentLabel]+=1 # 统计有多少个类以及每个类的数量 shannonEnt=0 for key in labelCounts: prob=float(labelCounts[key])/numEntries # 计算单个类的熵值 shannonEnt-=prob*log(prob,2) # 累加每个类的熵值 return shannonEnt def createDataSet1(): # 创造数据集 dataSet = [['<=30', 'high', 'no', 'fair', 'no'], ['<=30', 'high', 'no', 'excellent', 'no'], ['31…40', 'high', 'no', 'fair'
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。