当前位置:   article > 正文

python-提取特征 & 特征选择_python 脑电信号特征提取

python 脑电信号特征提取

1. dataset :archive.ics.uci.edu/ml/datasets/Adult

2. 讲解 & 代码

#    dataset :archive.ics.uci.edu/ml/datasets/Adult

import os
import pandas as pd
data_folder = os.path.join(os.getcwd(),"Data/adult.data")

adult = pd.read_csv(data_folder,header=None,names=["Age","Work-Class","fnlwgt","Education","Education-Num","Marital-Status","Occupation","Relationship","Race","Sex","Capital-gain","Capital-loss","Hours-per-week","Native-country","Earnings-Raw"])
adult.dropna(how='all',inplace=True)
# print(adult.columns)

# print(adult["Hours-per-week"].describe())

# print(adult["Work-Class"].unique())

#   创建特征
adult["LongHours"] = adult["Hours-per-week"]>40

# print(adult.columns)

x = adult[["Age","Education-Num","Capital-gain","Capital-loss","Hours-per-week"]].values

y = (adult["Earnings-Raw"]==' >50K').values
print(y)

from sklearn.feature_selection import SelectKBest,chi2
transformer =SelectKBest(score_func=chi2,k=3)
Xt_chi2 = transformer.fit_transform(x,y)
print(transformer.scores_)

from scipy.stats import pearsonr
import numpy as np
def multivariate_pearsonr(x,y):
    scores, pvalues = [],[]
    for column in range(x.shape[1]):
        cur_score,cur_p = pearsonr(x[:,column],y)
        scores.append(cur_score)
        pvalues.append(cur_p)
    return (np.array(scores),np.array(pvalues))

transformer = SelectKBest(score_func=multivariate_pearsonr,k=3)
Xt_pearsonr = transformer.fit_transform(x,y)
print(transformer.scores_)
#
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

clf = DecisionTreeClassifier(random_state=14)
score_chi2 = cross_val_score(clf,Xt_chi2,y,scoring='accuracy')
score_pearsonr = cross_val_score(clf,Xt_pearsonr,y,scoring='accuracy')
print("{0:.3f},{0:.3f}".format(np.mean(score_chi2*100),np.mean(score_pearsonr*100)))
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50

3. dataset: https://archive.ics.uci.edu/ml/datasets/Internet+Advertisements

4. 代码 & 讲解

import os
from collections import defaultdict

import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier

data_file = os.path.join(os.getcwd(),"Data/ad.data")

def convert_number(x):
    try:
        return float(x)
    except ValueError:
        return np.nan

converters = defaultdict(convert_number)
converters[1558] = lambda x:1 if x.strip() == "ad." else 0

#问号处理
for i in range(0,1558):
    converters[i]=lambda x: np.nan if x.strip() == "?" else x

ads = pd.read_csv(data_file, header=None, converters=converters, low_memory=False)


print(ads[:5])
ads.dropna(inplace=True)
X = ads.drop(1558,axis=1).values
y = ads[1558]

from sklearn.decomposition import PCA
pca = PCA(n_components=5)
Xd = pca.fit_transform(X)

np.set_printoptions(precision=5,suppress=True)
print(pca.explained_variance_ratio_)

clf = DecisionTreeClassifier(random_state=14)
scores_reduce = cross_val_score(clf,Xd,y,scoring='accuracy')
print(np.mean(scores_reduce*100))

from matplotlib import pyplot as plt
classes = set(y)
colors = ['red','green']
for cur_class ,color in zip(classes,colors):
    mask = (y==cur_class).values
    plt.scatter(Xd[mask,0],Xd[mask,1],marker='o',color=color,label=int(cur_class))
plt.legend()
plt.show()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/358922
推荐阅读
相关标签
  

闽ICP备14008679号