赞
踩
本期给大家分享介绍如何用Dataloader创建数据集
from torch import nn import torch import os import numpy as np import pandas as pd import matplotlib.pyplot as plt import time import torch.functional as F from sklearn.manifold import TSNE from sklearn.decomposition import PCA #####----绘制混淆矩阵图----# from sklearn.metrics import confusion_matrix from matplotlib.pylab import style # style.use('ggplot') from matplotlib import rcParams config = { "font.family": 'serif', # 衬线字体 "font.size": 10, # 相当于小四大小 "font.serif": ['SimSun'], # 宋体 "mathtext.fontset": 'stix', # matplotlib渲染数学字体时使用的字体,和Times New Roman差别不大 'axes.unicode_minus': False # 处理负号,即-号 } rcParams.update(config)
def data_read(file_path):
"""
:fun: 读取cwru mat格式数据
:param file_path: .mat文件路径 eg: r'D:.../01_示例数据/1750_12k_0.021-OuterRace3.mat'
:return accl_data: 读取到的加速度数据
"""
import scipy.io as scio
data = scio.loadmat(file_path) # 加载mat数据
data_key_list = list(data.keys()) # mat文件为字典类型,将key变为list类型
accl_key = data_key_list[3] # mat文件为字典类型,其加速度列在key_list的第4个
accl_data = data[accl_key].flatten() # 获取加速度信号,并展成1维数据
accl_data = (accl_data-np.mean(accl_data))/np.std(accl_data) #Z-score标准化数据集
return accl_data
def data_spilt(data, num_2_generate=20, each_subdata_length=1024): """ :Desription: 将数据分割成n个小块。输入数据data采样点数是400000,分成100个子样本数据,每个子样本数据就是4000个数据点 :param data: 要输入的数据 :param num_2_generate: 要生成的子样本数量 :param each_subdata_length: 每个子样本长度 :return spilt_datalist: 分割好的数据,类型为2维list """ data = list(data) total_length = len(data) start_num = 0 # 子样本起始值 end_num = each_subdata_length # 子样本终止值 step_length = int((total_length - each_subdata_length) / (num_2_generate - 1)) # step_length: 向前移动长度 i = 1 spilt_datalist = [] while i <= num_2_generate: each_data = data[start_num: end_num] each_data = (each_data-np.mean(each_data))/(np.std(each_data)) # 做Z-score归一化 spilt_datalist.append(each_data) start_num = 0 + i * step_length; end_num = each_subdata_length + i * step_length i = i + 1 spilt_data_arr = np.array(spilt_datalist) return spilt_data_arr
data_base_dir = r'D:\22-学习记录\01_自己学习积累\03_基于CNN的轴承故障诊断(4分类)\dataset/train_data' fault_type_list = os.listdir(data_base_dir) file_path_list = list() train_data = [] train_label = [] for fault_type in fault_type_list: file_name_list = os.listdir(os.path.join(data_base_dir, fault_type)) for file_name in file_name_list: print(file_name) num_2_generate = 60 # 每个mat文件生成60个子样本 if 'Normal' in file_name: num_2_generate = num_2_generate*6 # Normal状态,每个mat文件生成360个子样本 file_path = os.path.join(data_base_dir, fault_type, file_name) fault_type2fault_label = {'BF':'0', 'OF':'1', 'IF':'2', 'Normal':'3'} ##=========获取数据===========## data = data_read(file_path) # 读取单个mat数据 ##======基于滑动窗方法划分获取更多子样本数据======## sub_data_list = data_spilt(data=data, num_2_generate=num_2_generate) train_label.extend(list(fault_type2fault_label[fault_type]*len(sub_data_list))) # 训练标签 eg: ['0', '0', ...,'3',] train_data.extend(sub_data_list) # 训练数据
输出结果:
1730_12k_0.007-Ball.mat
1730_12k_0.014-Ball.mat
1730_12k_0.021-Ball.mat
1730_48k_0.007-Ball.mat
1730_48k_0.014-Ball.mat
1730_48k_0.021-Ball.mat
...
1772_12k_0.021-OuterRace3.mat
1772_48k_0.007-OuterRace3.mat
1772_48k_0.014-OuterRace6.mat
1772_48k_0.021-OuterRace3.mat
data_base_dir = r'D:\22-学习记录\01_自己学习积累\03_基于CNN的轴承故障诊断(4分类)/dataset/test_data' fault_type_list = os.listdir(data_base_dir) file_path_list = list() test_data = [] test_label = [] for fault_type in fault_type_list: file_name_list = os.listdir(os.path.join(data_base_dir, fault_type)) for file_name in file_name_list: print(file_name) num_2_generate = 30 # 每个mat文件生成30个子样本 if 'Normal' in file_name: num_2_generate = num_2_generate*6 # Normal状态,每个mat文件生成180个子样本 file_path = os.path.join(data_base_dir, fault_type, file_name) fault_type2fault_label = {'BF':'0', 'OF':'1', 'IF':'2', 'Normal':'3'} ##=========获取数据===========## data = data_read(file_path) # 读取单个mat数据 ##======基于滑动窗方法划分获取更多子样本数据======## sub_data_list = data_spilt(data=data, num_2_generate=num_2_generate) test_label.extend(list(fault_type2fault_label[fault_type]*len(sub_data_list))) # 测试标签 eg: ['0', '0', ...,'3',] test_data.extend(sub_data_list) # 测试数据
输出结果:
1797_12k_0.007-Ball.mat
1797_12k_0.014-Ball.mat
1797_12k_0.021-Ball.mat
1797_48k_0.007-Ball.mat
...
1797_12k_0.021-OuterRace3.mat
1797_48k_0.007-OuterRace3.mat
1797_48k_0.014-OuterRace6.mat
1797_48k_0.021-OuterRace3.mat
from torch.utils.data import DataLoader from torch.utils.data import TensorDataset train_data = np.array(train_data) #训练数据 train_label = np.array((train_label), dtype=int) #训练标签 array([0, 0, 0, ..., 1, 1, 1]) train_x = torch.from_numpy(train_data).type(torch.float32) ##torch.Size([2880, 1024]) train_y = torch.from_numpy(train_label).type(torch.LongTensor) ##torch.Size([2880]) train_x = torch.unsqueeze(train_x, dim=1) ##扩展成3维 torch.Size([2880, 1, 1024]) test_data = np.array(test_data) #测试数据 test_label = np.array((test_label), dtype=int) #测试标签array([0, 0, 0, ..., 1, 1, 1]) test_x = torch.from_numpy(test_data).type(torch.float32) ##torch.Size([720, 1024]) test_y = torch.from_numpy(test_label).type(torch.LongTensor) ##torch.Size([720]) test_x = torch.unsqueeze(test_x, dim=1) ##torch.Size([720, 1, 1024])
看看train_data的维度
train_data.shape
# 输出结果
(4320, 1024)
#创建dataset
from sklearn.model_selection import train_test_split
train_ds = TensorDataset(train_x, train_y)
train_ds,valid_ds = train_test_split(train_ds, test_size = 0.2,random_state = 42) #将训练集分隔维训练集和测试集,分割比例5:1
test_ds = TensorDataset(test_x, test_y)
#创建dataloader
batch_size = 64 # 批次大小
train_dl = DataLoader(dataset = train_ds, batch_size = batch_size, shuffle=True) #shuffle是进行乱序
valid_dl = DataLoader(dataset = valid_ds, shuffle=True, batch_size = batch_size)
test_dl = DataLoader(dataset = test_ds, batch_size = batch_size, shuffle=False)
for i, (imgs, labels) in enumerate(train_dl):
print(i, imgs, labels)
## 输出结果 0 tensor([[[-0.3515, 0.3796, 1.1076, ..., -1.5466, -1.2220, -0.3242]], [[ 1.3776, 1.4640, 0.7423, ..., 0.1370, 0.3632, 0.6326]], [[-0.4793, -0.6978, -0.5840, ..., -0.6917, -1.3257, -1.4273]], ..., [[-0.2387, -0.5382, -1.1767, ..., 0.8166, 0.7749, -2.3905]], [[-0.9560, -1.3880, -1.7271, ..., 0.4720, 0.2392, 0.1732]], [[ 0.2588, 0.0780, -0.5720, ..., -0.0647, 0.4554, 1.2545]]]) tensor([3, 3, 3, 1, 2, 3, 3, 0, 2, 3, 2, 1, 3, 2, 2, 2, 3, 2, 2, 0, 2, 1, 2, 2, 1, 2, 3, 3, 1, 2, 1, 0, 2, 1, 3, 0, 1, 3, 1, 3, 3, 1, 1, 3, 2, 0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 3, 1, 3, 3, 0, 2, 2, 0, 3]) ...
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。