赞
踩
处理连续变量不好,类别较多时,错误增加的比较快,可规模性一般
模拟源代码:
- # coding=utf-8
-
- #根据信息熵来计算信息获取量
- import math
-
- #结果信息熵
- def info(A):
- YesNum = 0
- NoNum = 0
- sum = len(A)
- for i in (A):
- if i == 1:
- YesNum+=1
- else:
- NoNum+=1
- return -(YesNum/sum)*math.log(YesNum/sum,2)-(NoNum/sum)*math.log(NoNum/sum,2)
- #单一条件的信息熵
- def info(D1, D2, D3, A):
- d1Num = 0
- d2Num = 0
- d3Num = 0
- dYes = 0
- dYes = 0
- sum = len(A)
- for i in range(sum):
- if D1[i] == 1:#是D1
- d1Num+=1
- if A[i] == 1:
- dYes+=1
- else:
- dNo+=1
- infoD1 = -(d1Num/sum)*(-(dYes/d1Num)*math.log(dYes/d1Num,2)-(dNo/d1Num)*math.log(dNo/d1Num,2))
- for i in range(sum):
- if D2[i] == 1:#是D2
- d2Num+=1
- if A[i] == 1:
- dYes+=1
- else:
- dNo+=1
- infoD2 = -(d2Num/sum)*(-(dYes/d2Num)*math.log(dYes/d2Num,2)-(dNo/d2Num)*math.log(dNo/d2Num,2))
- for i in range(sum):
- if D3[i] == 1:#是D1
- d3Num+=1
- if A[i] == 1:
- dYes+=1
- else:
- dNo+=1
- infoD3 = -(d3Num/sum)*(-(dYes/d3Num)*math.log(dYes/d3Num,2)-(dNo/d3Num)*math.log(dNo/d3Num,2))
- return infoD1+infoD2+infoD3
- #求时间D的信息获取量
- def Gain(D, A):
- return info(A)-info(D1,D2,D3,A)
-
- def main():
- gain = []
- for i in range(len(All)):
- gain[i]=Gain(D[i],A)
- gain.sort()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。