当前位置:   article > 正文

python3 带注释 基于粗糙集的属性约简。_python属性约简代码

python属性约简代码
  1. import pandas as pd
  2. from collections import defaultdict
  3. import numpy as np
  4. import scipy.io as sio
  5. from scipy.io import savemat
  6. from sklearn import preprocessing
  7. from scipy.io import loadmat
  8. from sklearn.svm import SVC
  9. from sklearn import metrics
  10. from sklearn.model_selection import train_test_split
  11. import time
  12. #加载数据
  13. m= loadmat("Data\\optimal_data.mat")
  14. data=m['data']
  15. target=m['target']
  16. target=np.array(target).flatten()
  17. time_start = time.perf_counter() # 记录开始时间
  18. #归一化处理
  19. min_max_scaler = preprocessing.MinMaxScaler()
  20. x_minmax = min_max_scaler.fit_transform(data)
  21. data1=x_minmax
  22. #print(data1)
  23. # #邻域粗糙集创新点:对每个属性列进行单独的归一化处理
  24. neighbor_list=list()#存放邻域的集合
  25. for a in range(data1.shape[1]):
  26. # 计算每个属性的邻域,避免原始邻域粗糙集邻域半径不能确定问题,lambda(一般取0.5-1.5之间)
  27. neighbor = np.std(data1[:,a]) / 0.5
  28. neighbor_list.append(neighbor)
  29. #print(neighbor_list)
  30. # #计算样本的δ邻域
  31. delta_neighbor_dict = dict()
  32. delta_neighbor = list()#存放每个属性对应邻域的样本集合
  33. for index in range(data1.shape[1]):
  34. delta_neighbor_dict=dict()
  35. for k in range(data1.shape[0]):
  36. delta_neighbor_list = list()
  37. for v in range(data1.shape[0]):
  38. #欧氏距离计算样本间距离
  39. dis = np.sqrt(np.sum(np.square(data1[k,index] - data1[v,index])))
  40. if dis <= neighbor_list[index]:
  41. delta_neighbor_list.append(v)
  42. delta_neighbor_dict.update({k: delta_neighbor_list})
  43. delta_neighbor.append(delta_neighbor_dict)
  44. #print(delta_neighbor)
  45. #对决策属性进行划分,计算U/IND(D)
  46. sample_list = list()
  47. decision_value_dict=defaultdict(list)
  48. for m, n in [(n, m) for m, n in list(enumerate(target[:]))]:
  49. decision_value_dict[m].append(n) # m为某个标签,n为某些样本
  50. #print(decision_value_dict)
  51. #计算各个属性的下近似U/IND(ai) & U/IND(A)
  52. #计算当前特征下的属性依赖度
  53. corr_list=list()
  54. for index in range(data1.shape[1]):
  55. low_similar_set = defaultdict(list)
  56. for m in decision_value_dict.keys():
  57. #print(m)
  58. for k in delta_neighbor[index].keys():
  59. if set(delta_neighbor[index].get(k)).issubset(decision_value_dict.get(m)):
  60. #low_similar_set[m].append(delta_neighbor[index].get(k))
  61. low_similar_set[m].append(k)
  62. #print(low_similar_set)
  63. h_sum = [0]
  64. for i in low_similar_set.values():
  65. h_sum += i
  66. h_sum.pop(0)
  67. POS = list(set(h_sum))
  68. #print(POS)
  69. #计算当前特征下的属性依赖度
  70. corr = len(POS) / data1.shape[0]
  71. #print(corr)
  72. corr_list.append(corr)
  73. #print(corr_list)
  74. #计算总依赖度
  75. A_neighbor=np.std(data1[:,:])/0.5
  76. # print(A_neighbor)
  77. A_delta_neighbor_list = list()
  78. A_delta_neighbor = list()
  79. for k in range(data1.shape[0]):
  80. A_delta_neighbor_list = list()
  81. A_delta_neighbor_dict = dict()
  82. for v in range(data1.shape[0]):
  83. #欧氏距离计算样本间距离
  84. A_dis = np.sqrt(np.sum(np.square(data1[k] - data1[v])))
  85. if A_dis <= A_neighbor:
  86. A_delta_neighbor_list.append(v)
  87. A_delta_neighbor_dict.update({k: A_delta_neighbor_list})
  88. #print(A_delta_neighbor_dict)
  89. A_delta_neighbor.append(A_delta_neighbor_dict)
  90. #print(A_delta_neighbor)
  91. A_low_similar_set = defaultdict(list)
  92. for element in A_delta_neighbor:
  93. for h,k in element.items():
  94. for m in decision_value_dict.keys():
  95. if set(k).issubset(decision_value_dict.get(m)):
  96. A_low_similar_set[m].append(h)
  97. #print(A_low_similar_set)
  98. A_h_sum = [0]
  99. for l in A_low_similar_set.values():
  100. A_h_sum += l
  101. A_h_sum.pop(0)
  102. A_POS = list(set(A_h_sum))
  103. #计算当前特征下的属性依赖度
  104. A_corr = len(A_POS) / data1.shape[0]
  105. #print(A_corr)
  106. reduct_attribute = [x for x, y in list(enumerate(corr_list)) if A_corr - y != A_corr]
  107. print(reduct_attribute)#输出约简属性
  108. reduct_sets=data1[:,reduct_attribute]
  109. #print(data1[:,reduct_attribute])#输出约简属性对应集合
  110. time_end = time.perf_counter() # 记录结束时间
  111. time_sum = time_end - time_start # 计算的时间差为程序的执行时间,单位为秒/s
  112. print(time_sum)#输出CPU的执行时间
  113. X_train, X_test, y_train, y_test = train_test_split(reduct_sets, target, test_size=0.2,random_state=1)
  114. clf = SVC()
  115. clf.fit(X_train,y_train)
  116. result = clf.predict(X_test)
  117. #准确率 召回率 F值
  118. print(metrics.classification_report(y_test, result))
  119. #汉明损失:取值在0~1之间,距离为0说明预测结果与真实结果完全相同,距离为1就说明模型与我们想要的结果完全就是背道而驰
  120. print("hamming_loss", (metrics.hamming_loss(y_test, result)))

几个月前写的这个基于粗糙集进行属性约简的代码。从网上看了很多这方面的代码,但都不太完善,也没有注释,故做此修改,菜鸟一枚,如有错误,欢迎指正~在我的电脑上,可以顺利运行。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/685690
推荐阅读
相关标签
  

闽ICP备14008679号