赞
踩
- import pandas as pd
- from collections import defaultdict
- import numpy as np
- import scipy.io as sio
- from scipy.io import savemat
- from sklearn import preprocessing
- from scipy.io import loadmat
- from sklearn.svm import SVC
- from sklearn import metrics
- from sklearn.model_selection import train_test_split
- import time
- #加载数据
- m= loadmat("Data\\optimal_data.mat")
- data=m['data']
- target=m['target']
- target=np.array(target).flatten()
- time_start = time.perf_counter() # 记录开始时间
- #归一化处理
- min_max_scaler = preprocessing.MinMaxScaler()
- x_minmax = min_max_scaler.fit_transform(data)
- data1=x_minmax
- #print(data1)
- # #邻域粗糙集创新点:对每个属性列进行单独的归一化处理
- neighbor_list=list()#存放邻域的集合
- for a in range(data1.shape[1]):
- # 计算每个属性的邻域,避免原始邻域粗糙集邻域半径不能确定问题,lambda(一般取0.5-1.5之间)
- neighbor = np.std(data1[:,a]) / 0.5
- neighbor_list.append(neighbor)
- #print(neighbor_list)
- # #计算样本的δ邻域
- delta_neighbor_dict = dict()
- delta_neighbor = list()#存放每个属性对应邻域的样本集合
- for index in range(data1.shape[1]):
- delta_neighbor_dict=dict()
- for k in range(data1.shape[0]):
- delta_neighbor_list = list()
- for v in range(data1.shape[0]):
- #欧氏距离计算样本间距离
- dis = np.sqrt(np.sum(np.square(data1[k,index] - data1[v,index])))
- if dis <= neighbor_list[index]:
- delta_neighbor_list.append(v)
- delta_neighbor_dict.update({k: delta_neighbor_list})
- delta_neighbor.append(delta_neighbor_dict)
- #print(delta_neighbor)
- #对决策属性进行划分,计算U/IND(D)
- sample_list = list()
- decision_value_dict=defaultdict(list)
- for m, n in [(n, m) for m, n in list(enumerate(target[:]))]:
- decision_value_dict[m].append(n) # m为某个标签,n为某些样本
- #print(decision_value_dict)
- #计算各个属性的下近似U/IND(ai) & U/IND(A)
- #计算当前特征下的属性依赖度
- corr_list=list()
- for index in range(data1.shape[1]):
- low_similar_set = defaultdict(list)
- for m in decision_value_dict.keys():
- #print(m)
- for k in delta_neighbor[index].keys():
- if set(delta_neighbor[index].get(k)).issubset(decision_value_dict.get(m)):
- #low_similar_set[m].append(delta_neighbor[index].get(k))
- low_similar_set[m].append(k)
- #print(low_similar_set)
- h_sum = [0]
- for i in low_similar_set.values():
- h_sum += i
- h_sum.pop(0)
- POS = list(set(h_sum))
- #print(POS)
- #计算当前特征下的属性依赖度
- corr = len(POS) / data1.shape[0]
- #print(corr)
- corr_list.append(corr)
- #print(corr_list)
- #计算总依赖度
- A_neighbor=np.std(data1[:,:])/0.5
- # print(A_neighbor)
- A_delta_neighbor_list = list()
- A_delta_neighbor = list()
- for k in range(data1.shape[0]):
- A_delta_neighbor_list = list()
- A_delta_neighbor_dict = dict()
- for v in range(data1.shape[0]):
- #欧氏距离计算样本间距离
- A_dis = np.sqrt(np.sum(np.square(data1[k] - data1[v])))
- if A_dis <= A_neighbor:
- A_delta_neighbor_list.append(v)
- A_delta_neighbor_dict.update({k: A_delta_neighbor_list})
- #print(A_delta_neighbor_dict)
- A_delta_neighbor.append(A_delta_neighbor_dict)
- #print(A_delta_neighbor)
- A_low_similar_set = defaultdict(list)
- for element in A_delta_neighbor:
- for h,k in element.items():
- for m in decision_value_dict.keys():
- if set(k).issubset(decision_value_dict.get(m)):
- A_low_similar_set[m].append(h)
- #print(A_low_similar_set)
- A_h_sum = [0]
- for l in A_low_similar_set.values():
- A_h_sum += l
- A_h_sum.pop(0)
- A_POS = list(set(A_h_sum))
- #计算当前特征下的属性依赖度
- A_corr = len(A_POS) / data1.shape[0]
- #print(A_corr)
- reduct_attribute = [x for x, y in list(enumerate(corr_list)) if A_corr - y != A_corr]
- print(reduct_attribute)#输出约简属性
- reduct_sets=data1[:,reduct_attribute]
- #print(data1[:,reduct_attribute])#输出约简属性对应集合
- time_end = time.perf_counter() # 记录结束时间
- time_sum = time_end - time_start # 计算的时间差为程序的执行时间,单位为秒/s
- print(time_sum)#输出CPU的执行时间
- X_train, X_test, y_train, y_test = train_test_split(reduct_sets, target, test_size=0.2,random_state=1)
- clf = SVC()
- clf.fit(X_train,y_train)
- result = clf.predict(X_test)
- #准确率 召回率 F值
- print(metrics.classification_report(y_test, result))
- #汉明损失:取值在0~1之间,距离为0说明预测结果与真实结果完全相同,距离为1就说明模型与我们想要的结果完全就是背道而驰
- print("hamming_loss", (metrics.hamming_loss(y_test, result)))
几个月前写的这个基于粗糙集进行属性约简的代码。从网上看了很多这方面的代码,但都不太完善,也没有注释,故做此修改,菜鸟一枚,如有错误,欢迎指正~在我的电脑上,可以顺利运行。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。