赞
踩
数据需要进行归一化,整理了以下三种版本
(1)按列进行归一化
- #(1)
- import pandas as pd
- import numpy as np
- from sklearn.preprocessing import MinMaxScaler
- weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
- scaler = MinMaxScaler( )
- scaler.fit(weight_unbn)
- scaler.data_max_
-
- weight_bn=scaler.transform(weight_unbn)
- np.savetxt('D:\\weight_bn.txt', weight_bn)
- weight_bn_pd = pd.DataFrame(weight_bn)
- weight_bn_pd.to_csv('D:\\weight_bn.csv')
- # print(weight_bn_pd.head())
(2)按列进行归一化
- import numpy as np
- def noramlization(data):
- minVals = data.min(0)
- maxVals = data.max(0)
- ranges = maxVals - minVals
- normData = np.zeros(np.shape(data))
- m = data.shape[0]
- normData = data - np.tile(minVals, (m, 1))
- normData = normData/np.tile(ranges, (m, 1))
- return normData, ranges, minVals
-
- weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
- weight_unbn = np.array(weight_unbn)
- weight_bn, _, _, = noramlization(weight_unbn)
- #print(weight_bn[:3,:])
(3)按列进行归一化
- from sklearn import preprocessing
- import numpy as np
-
- weight_unbn = np.loadtxt('D:\\Data\\biclustering_data\\weight_unbn.txt')
- min_max_scaler = preprocessing.MinMaxScaler()
-
- weight_bn = min_max_scaler.fit_transform(weight_unbn)
- #print(weight_bn[:3,:])
(4)全局进行归一化
- import numpy as np
- def noramlization(data):
- minVals = data.min()
- maxVals = data.max()
- ranges = maxVals - minVals
- normData = np.zeros(np.shape(data))
- m = data.shape[0]
- normData = data - np.tile(minVals, (m, 1))
- normData = normData/np.tile(ranges, (m, 1))
- return normData, ranges, minVals, maxVals
-
- weight_unbn = np.array([[ 0, 1, 5, 3, 4],
- [ 5, 5.5, 6, 8, 9],
- [10, 11, 12, 13, 14]])
- weight_unbn = np.array(weight_unbn)
- weight_bn, ranges, minVals, maxVals,= noramlization(weight_unbn)
- print(weight_bn, ranges, minVals, maxVals)
- from sklearn import preprocessing
- import numpy as np
-
- genome_derived_features_con = pd.read_csv('D:/Data/features/genome_derived_features_con.csv', index_col=0)
- min_max_scaler = preprocessing.MinMaxScaler()
- genome_derived_features_con = np.array(genome_derived_features_con)
- genome_derived_features_con_reshape = genome_derived_features_con.reshape([-1, 1])
-
- genome_derived_features_con_row_norm_reshape = min_max_scaler.fit_transform(genome_derived_features_con_reshape)
- #print(weight_bn[:3,:])
- genome_derived_features_con_row_norm = genome_derived_features_con_row_norm_reshape.reshape(genome_derived_features_con.shape)
- genome_derived_features_con_row_norm = pd.DataFrame(genome_derived_features_con_row_norm)
- genome_derived_features_con_row_norm.to_csv('D:/Data/features/genome_derived_features_con_row_norm_1.csv')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。