赞
踩
数据来自kaggle。
- #加载相关模块
- import pandas as pd
- import numpy as np
- import os
- import matplotlib.pyplot as plt
- import seaborn as sns
- #数据路径
- Directory='Gear Data\BrokenTooth'
- for root, dirs, files in os.walk(Directory):
- for i in range (len(files)):
- print(files[i])
-
- path = os.path.join(root,files[0])
- path
- df_temp = pd.read_csv(path)
- df_temp
- #时域波形
- plt.plot(df_temp.iloc[:,0])
- #设置标签
- load_col = [int(files[0][5:-4])/100 for j in range(len(df_temp))]
- lab='F'
- label_col = [lab for j in range(len(df_temp))]
- label_col
- df_temp['load']=load_col
- df_temp['fault']=label_col
- df_temp
- #数据集处理
- def MakeDataset(Directory,lab):
- df=pd.DataFrame(columns=['a1','a2','a3','a4'])
- for root, dirs, files in os.walk(Directory):
- for i in range (len(files)):
- path = os.path.join(root,files[i])
- df_temp = pd.read_csv(path)
- load_col = [int(files[i][5:-4])/100 for j in range(len(df_temp))]
- label_col = [lab for j in range(len(df_temp))]
- df_temp['load']=load_col
- df_temp['fault']=label_col
- df = pd.concat([df,df_temp],axis=0)
- print(path)
-
- return df
- #故障数据
- Directory='Gear Data\BrokenTooth'
- df_F = MakeDataset(Directory,lab='F')
- df_F
- #健康数据
- Directory='Gear Data\Healthy'
- df_H = MakeDataset(Directory,lab='H')
- df_H
- #将故障数据和健康数据利用concat函数进行连接并输出,便于后续模型使用
- df = pd.concat([df_F,df_H],axis=0)
- df.to_csv('Gear_Fault_data.csv',index=False)
- #数据归一化操作
- df = pd.read_csv('Gear_Fault_data.csv')
- from sklearn.preprocessing import StandardScaler
- scaler=StandardScaler()
- df.iloc[:,:-2]=scaler.fit_transform(df.iloc[:,:-2])
- ##为 CNN 创建数据集
- from sklearn.preprocessing import LabelEncoder
- from tensorflow.keras.utils import to_categorical
- win_len=100 #窗口长度
- stride=200 #移动步长
- X=[]
- Y=[]
- for k in ['F','H']:
-
- df_temp_1 = df[df['fault']==k]
-
- for j in (np.arange(0,1,0.1)):
- df_temp_2=df_temp_1[df_temp_1['load']==j]
- for i in np.arange(0,len(df_temp_2)-(win_len),stride):
- X.append(df_temp_2.iloc[i:i+win_len,:-1])
- Y.append(df_temp_2.iloc[i+win_len,-1])
- #训练数据
- X=np.array(X)
- X=X.reshape((X.shape[0],X.shape[1],X.shape[2],1))
- #X = np.repeat(X, 3, axis=3) # To repeat into 3 chanel format
- #标签
- Y=np.array(Y)
- encoder= LabelEncoder()
- encoder.fit(Y)
- encoded_Y = encoder.transform(Y)
- OHE_Y = to_categorical(encoded_Y)
- #训练集尺寸
- X.shape
- ##T-sne可视化
- X_pre_cnn = X.reshape(X.shape[0],X.shape[1]*X.shape[2])
- from sklearn.manifold import TSNE
- X_t_sne = TSNE(n_components=2, learning_rate='auto',verbose=1, perplexity=40, n_iter=300).fit_transform(X_pre_cnn)
- tSNEdf = pd.DataFrame(data = X_t_sne, columns = ['t-SNE component 1', 't-SNE component 2'])
- tSNEdf['Fault']=Y
- #绘制2个主成分
- fig, ax = plt.subplots(figsize=(7,7))
- sns.scatterplot(x=tSNEdf['t-SNE component 1'],y=tSNEdf['t-SNE component 2'],hue='Fault',
- data=tSNEdf,
- legend="full",
- alpha=0.3)
- plt.show()
- #训练集和测试集划分
- from sklearn.model_selection import train_test_split
- X_train,X_test,y_train,y_test = train_test_split(X,OHE_Y,test_size=0.3,shuffle=True)
- #构建CNN模型
- from tensorflow.keras.models import Sequential,Model
- from tensorflow.keras.layers import Input,Dense, Dropout, Flatten
- from tensorflow.keras.layers import Conv2D, MaxPooling2D
- no_classes = 2 #2个类别
- cnn_model = Sequential()
- cnn_model.add(Conv2D(32, kernel_size=(20, 3),activation='relu',input_shape=(X.shape[1],X.shape[2],1),padding='same'))
- cnn_model.add(MaxPooling2D((20, 2),strides=(5, 5),padding='same'))
- cnn_model.add(Conv2D(64, (10, 3), activation='relu',padding='same'))
- cnn_model.add(MaxPooling2D(pool_size=(10, 2),strides=(3, 3),padding='same'))
- cnn_model.add(Flatten())
- cnn_model.add(Dense(128, activation='relu'))
-
- cnn_model.add(Dense(no_classes, activation='softmax'))
- cnn_model.summary()
- cnn_model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
- #设置训练参数并训练CNN
- batch_size = 128
- epochs = 5
- history = cnn_model.fit(X_train, y_train, batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(X_test,y_test),shuffle=True)
- #保存模型
- cnn_model.save('CNN_model_gear.h5')
- ##模型性能计算
- def inv_Transform_result(y_pred):
- y_pred = y_pred.argmax(axis=1)
- y_pred = encoder.inverse_transform(y_pred)
- return y_pred
- #预测
- y_pred=cnn_model.predict(X_test)
- Y_pred=inv_Transform_result(y_pred)
- Y_test = inv_Transform_result(y_test)
- from sklearn.metrics import confusion_matrix
- #混淆矩阵
- plt.figure(figsize=(5,5))
- cm = confusion_matrix(Y_test, Y_pred)
- f = sns.heatmap(cm, annot=True, fmt='d',xticklabels=encoder.classes_,yticklabels=encoder.classes_)
- plt.show()
- #输出可视化
- dummy_cnn = Model(inputs=cnn_model.input,outputs=cnn_model.layers[5].output)
- y_viz = dummy_cnn.predict(X_train)
- y_viz.shape
- from sklearn.manifold import TSNE
- X_t_sne = TSNE(n_components=2, learning_rate='auto',verbose=1, perplexity=40, n_iter=300).fit_transform(y_viz)
- tSNEdf = pd.DataFrame(data = X_t_sne, columns = ['principal component 1', 'principal component 2'])
- tSNEdf['Fault']=inv_Transform_result(y_train)
- # 绘制两个主成分分量
- fig, ax = plt.subplots(figsize=(10,10))
- sns.scatterplot(x=tSNEdf['principal component 1'],y=tSNEdf['principal component 2'],hue='Fault',
- data=tSNEdf,
- legend="full",
- alpha=0.3)
- plt.show()
- #Flatten层可视化
- dummy_cnn = Model(inputs=cnn_model.input,outputs=cnn_model.layers[4].output)
- y_viz = dummy_cnn.predict(X_train)
- from sklearn.manifold import TSNE
- X_t_sne = TSNE(n_components=2, learning_rate='auto',verbose=1, perplexity=40, n_iter=300).fit_transform(y_viz)
- tSNEdf = pd.DataFrame(data = X_t_sne, columns = ['t-SNE component 1', 't-SNE component 2'])
- tSNEdf['Fault']=inv_Transform_result(y_train)
- # 绘制两个主成分
- fig, ax = plt.subplots(figsize=(7,7))
- sns.scatterplot(x=tSNEdf['t-SNE component 1'],y=tSNEdf['t-SNE component 2'],hue='Fault',
- data=tSNEdf,
- legend="full",
- alpha=0.3)
- plt.show()
赞
踩
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。