当前位置:   article > 正文

keras实战-入门之RNN(GRU)自编码器_gru自编码器

gru自编码器

@TOC

RNN(GRU)自编码器

用GRU做自编码器,复原语音。就是将语音信号输入到RNN里,进行编码,然后再解码。
样本链接,提取码:vive

import keras
import numpy as np
import librosa
import librosa.display
import os
import os.path
import random
import time
import sys
import matplotlib.pyplot as plt
font = {'family' : 'SimHei',
        'size'   : '15'}
plt.rc('font', **font)               # 步骤一(设置字体的更多属性)
plt.rc('axes', unicode_minus=False)  # 步骤二(解决坐标轴负数的负号显示问题)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
Using TensorFlow backend.
  • 1
#获取音频文件路径
def get_wav_path(filePath):
    wavPath = []
    files = os.listdir(filePath)
    for file in files:
        wav = os.listdir(filePath+"/"+file)
        for j in range(len(wav)):
            fileType = wav[j].split(".")[1]
            if fileType=="wav":
                wavPath.append(filePath+"/"+file+"/"+wav[j])
    return wavPath
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
#获取3秒时间序列
def get_data(wav_files,second=3,sampleRate=16000):
    """
    :param wav_files:测试文件
    :param second: 采样时间
    :param sampleRate:采样率
    :return
    """
    #保存序列
    x=[]
    begin_time = time.time()
    print('开始预处理文件')
    for index, wav in enumerate(wav_files):
        #根据采样率获取音频序列和采样率
        signal, srate = librosa.load(wav, sr=sampleRate)
        if len(signal) <second*srate:
            continue
        

        # 
        #超过3秒取三秒
        if len(signal) >= second * srate:
            signal = signal[0:int(second * srate)]
        #
        else:
            #不足3秒补0
            signal = signal.tolist()
            for j in range(second * srate - len(signal)):
                signal.append(0)
            signal = np.array(signal)
        x.append(signal)
        gaptime = time.time() - begin_time
        percent = float(index+1) * 100 / len(wav_files)
        eta_time = gaptime * 100 / (percent + 0.01) - gaptime
        strprogress = "[" + "=" * int(percent // 2) + ">" + "-" * int(50 - percent // 2) + "]"
        
        str_log = ("%.2f %% %s %s/%s \t 已用时间:%ds 剩余时间:%d s" % (percent,strprogress,index+1,len(wav_files),gaptime,eta_time))
        sys.stdout.write('\r' + str_log)

    x= np.array(x)

    return x
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD, Adam
from keras.models import Model
from keras.callbacks import ModelCheckpoint,TensorBoard
from keras.models import load_model
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
filePath = "rnn_wavs/train"
wavPath = get_wav_path(filePath)
  • 1
  • 2
x=get_data(wavPath)
  • 1
开始预处理文件
100.00 % [==================================================>] 50/50 	 已用时间:2s 剩余时间:0 s
  • 1
  • 2
print(x.shape)
print(x[0])
  • 1
  • 2
(50, 48000)
[-0.0090332  -0.01293945 -0.01196289 ... -0.03439331 -0.0390625
 -0.03811646]
  • 1
  • 2
  • 3
#显示波形图
def show_wave(x,title='waveplot'):
    librosa.display.waveplot(x , sr=16000)
    plt.title(title)
  • 1
  • 2
  • 3
  • 4
show_wave(x[0])
  • 1

在这里插入图片描述

from keras import Model
from keras.layers import Dense,Input,CuDNNGRU
from keras.callbacks import TensorBoard
  • 1
  • 2
  • 3
#将一个音频序列切分成N个向量,每个向量暂时定为100维,就是输入到RNN里的,可以看成每个音频是一个句,每个句子里有多个单词,每个单词都是100维
seq_vector=100
x_train=x.reshape(x.shape[0],-1,seq_vector)
print(x_train.shape)
print(x_train[0])
  • 1
  • 2
  • 3
  • 4
  • 5
(50, 480, 100)
[[-0.0090332  -0.01293945 -0.01196289 ... -0.01239014 -0.01223755
  -0.01193237]
 [-0.01205444 -0.01248169 -0.01263428 ... -0.01290894 -0.0128479
  -0.01260376]
 [-0.01263428 -0.01239014 -0.01248169 ... -0.01202393 -0.01217651
  -0.01254272]
 ...
 [-0.0100708  -0.00872803 -0.00650024 ... -0.01254272 -0.01089478
  -0.01132202]
 [-0.01348877 -0.00967407 -0.00897217 ... -0.01647949 -0.01019287
  -0.00961304]
 [-0.0055542  -0.00222778  0.00445557 ... -0.03439331 -0.0390625
  -0.03811646]]
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
#函数式要定义输入
input_image=Input((None,seq_vector))
#编码器 用CuDNNGRU可以用GPU,当然也可以用GRU
encoder=CuDNNGRU(units=512, return_sequences=True, name="gru1")(input_image)
encoder=CuDNNGRU(units=256, return_sequences=True,name="gru2")(encoder)
encoder=Dense(256,activation='tanh')(encoder)
encoder_out=Dense(32,activation='tanh')(encoder) 

encoder_model = Model(inputs=input_image, outputs=encoder_out)

#解码器,反过来  
decoder=Dense(256,activation='tanh')(encoder_out)
decoder=CuDNNGRU(units=256, return_sequences=True,name="de_gru1")(decoder)
decoder=CuDNNGRU(units=512, return_sequences=True, name="de_gru2")(decoder)
decoder_out=Dense(seq_vector,activation='tanh')(decoder)
                                                             
autoencoder=Model(input_image,decoder_out)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
autoencoder.summary()
  • 1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_4 (InputLayer)         (None, None, 100)         0         
_________________________________________________________________
gru1 (CuDNNGRU)              (None, None, 512)         943104    
_________________________________________________________________
gru2 (CuDNNGRU)              (None, None, 256)         591360    
_________________________________________________________________
dense_18 (Dense)             (None, None, 256)         65792     
_________________________________________________________________
dense_19 (Dense)             (None, None, 32)          8224      
_________________________________________________________________
dense_20 (Dense)             (None, None, 256)         8448      
_________________________________________________________________
de_gru1 (CuDNNGRU)           (None, None, 256)         394752    
_________________________________________________________________
de_gru2 (CuDNNGRU)           (None, None, 512)         1182720   
_________________________________________________________________
dense_21 (Dense)             (None, None, 100)         51300     
=================================================================
Total params: 3,245,700
Trainable params: 3,245,700
Non-trainable params: 0
_________________________________________________________________
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(x_train, x_train, epochs=50, batch_size=2,verbose=1,               
                callbacks=[TensorBoard(log_dir='log_RNN声音自编码器'),
                           ModelCheckpoint("rnn.h5",
                                            monitor='loss', verbose=1, save_best_only=True, mode='min', period=1)])
  • 1
  • 2
  • 3
  • 4
  • 5
Epoch 1/50
50/50 [==============================] - 5s 103ms/step - loss: 0.0060

Epoch 00001: loss improved from inf to 0.00604, saving model to rnn.h5
Epoch 2/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0053

Epoch 00002: loss improved from 0.00604 to 0.00532, saving model to rnn.h5
Epoch 3/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0039

Epoch 00003: loss improved from 0.00532 to 0.00385, saving model to rnn.h5
Epoch 4/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0028

Epoch 00004: loss improved from 0.00385 to 0.00277, saving model to rnn.h5
Epoch 5/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0021

Epoch 00005: loss improved from 0.00277 to 0.00211, saving model to rnn.h5
Epoch 6/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0015

Epoch 00006: loss improved from 0.00211 to 0.00148, saving model to rnn.h5
Epoch 7/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0012

Epoch 00007: loss improved from 0.00148 to 0.00117, saving model to rnn.h5
Epoch 8/50
50/50 [==============================] - 4s 85ms/step - loss: 0.0010

Epoch 00008: loss improved from 0.00117 to 0.00102, saving model to rnn.h5
Epoch 9/50
50/50 [==============================] - 4s 85ms/step - loss: 9.1125e-04

Epoch 00009: loss improved from 0.00102 to 0.00091, saving model to rnn.h5
Epoch 10/50
50/50 [==============================] - 4s 85ms/step - loss: 8.4070e-04

Epoch 00010: loss improved from 0.00091 to 0.00084, saving model to rnn.h5
Epoch 11/50
50/50 [==============================] - 4s 85ms/step - loss: 7.7816e-04

Epoch 00011: loss improved from 0.00084 to 0.00078, saving model to rnn.h5
Epoch 12/50
50/50 [==============================] - 4s 85ms/step - loss: 6.8798e-04

Epoch 00012: loss improved from 0.00078 to 0.00069, saving model to rnn.h5
Epoch 13/50
50/50 [==============================] - 4s 85ms/step - loss: 6.0970e-04

Epoch 00013: loss improved from 0.00069 to 0.00061, saving model to rnn.h5
Epoch 14/50
50/50 [==============================] - 4s 85ms/step - loss: 5.6702e-04

Epoch 00014: loss improved from 0.00061 to 0.00057, saving model to rnn.h5
Epoch 15/50
50/50 [==============================] - 4s 85ms/step - loss: 5.2083e-04

Epoch 00015: loss improved from 0.00057 to 0.00052, saving model to rnn.h5
Epoch 16/50
50/50 [==============================] - 4s 85ms/step - loss: 4.7999e-04

Epoch 00016: loss improved from 0.00052 to 0.00048, saving model to rnn.h5
Epoch 17/50
50/50 [==============================] - 4s 85ms/step - loss: 4.6281e-04

Epoch 00017: loss improved from 0.00048 to 0.00046, saving model to rnn.h5
Epoch 18/50
50/50 [==============================] - 4s 85ms/step - loss: 4.4957e-04

Epoch 00018: loss improved from 0.00046 to 0.00045, saving model to rnn.h5
Epoch 19/50
50/50 [==============================] - 4s 85ms/step - loss: 4.5229e-04

Epoch 00019: loss did not improve
Epoch 20/50
50/50 [==============================] - 4s 85ms/step - loss: 4.4043e-04

Epoch 00020: loss improved from 0.00045 to 0.00044, saving model to rnn.h5
Epoch 21/50
50/50 [==============================] - 4s 85ms/step - loss: 4.2521e-04: 1s - loss: 4.07

Epoch 00021: loss improved from 0.00044 to 0.00043, saving model to rnn.h5
Epoch 22/50
50/50 [==============================] - 4s 85ms/step - loss: 4.1621e-04

Epoch 00022: loss improved from 0.00043 to 0.00042, saving model to rnn.h5
Epoch 23/50
50/50 [==============================] - 4s 85ms/step - loss: 4.0283e-04

Epoch 00023: loss improved from 0.00042 to 0.00040, saving model to rnn.h5
Epoch 24/50
50/50 [==============================] - 4s 85ms/step - loss: 3.9148e-04

Epoch 00024: loss improved from 0.00040 to 0.00039, saving model to rnn.h5
Epoch 25/50
50/50 [==============================] - 4s 85ms/step - loss: 3.9139e-04

Epoch 00025: loss improved from 0.00039 to 0.00039, saving model to rnn.h5
Epoch 26/50
50/50 [==============================] - 4s 85ms/step - loss: 3.7617e-04

Epoch 00026: loss improved from 0.00039 to 0.00038, saving model to rnn.h5
Epoch 27/50
50/50 [==============================] - 4s 85ms/step - loss: 3.7279e-04

Epoch 00027: loss improved from 0.00038 to 0.00037, saving model to rnn.h5
Epoch 28/50
50/50 [==============================] - 4s 85ms/step - loss: 3.5802e-04

Epoch 00028: loss improved from 0.00037 to 0.00036, saving model to rnn.h5
Epoch 29/50
50/50 [==============================] - 4s 85ms/step - loss: 3.4530e-04

Epoch 00029: loss improved from 0.00036 to 0.00035, saving model to rnn.h5
Epoch 30/50
50/50 [==============================] - 4s 85ms/step - loss: 3.5566e-04

Epoch 00030: loss did not improve
Epoch 31/50
50/50 [==============================] - 4s 85ms/step - loss: 3.4619e-04

Epoch 00031: loss did not improve
Epoch 32/50
50/50 [==============================] - 4s 85ms/step - loss: 3.3743e-04

Epoch 00032: loss improved from 0.00035 to 0.00034, saving model to rnn.h5
Epoch 33/50
50/50 [==============================] - 4s 85ms/step - loss: 3.2660e-04

Epoch 00033: loss improved from 0.00034 to 0.00033, saving model to rnn.h5
Epoch 34/50
50/50 [==============================] - 4s 85ms/step - loss: 3.1028e-04

Epoch 00034: loss improved from 0.00033 to 0.00031, saving model to rnn.h5
Epoch 35/50
50/50 [==============================] - 4s 85ms/step - loss: 2.9729e-04

Epoch 00035: loss improved from 0.00031 to 0.00030, saving model to rnn.h5
Epoch 36/50
50/50 [==============================] - 4s 85ms/step - loss: 2.9330e-04

Epoch 00036: loss improved from 0.00030 to 0.00029, saving model to rnn.h5
Epoch 37/50
50/50 [==============================] - 4s 85ms/step - loss: 2.9591e-04

Epoch 00037: loss did not improve
Epoch 38/50
50/50 [==============================] - 4s 85ms/step - loss: 2.8852e-04

Epoch 00038: loss improved from 0.00029 to 0.00029, saving model to rnn.h5
Epoch 39/50
50/50 [==============================] - 4s 85ms/step - loss: 2.7403e-04

Epoch 00039: loss improved from 0.00029 to 0.00027, saving model to rnn.h5
Epoch 40/50
50/50 [==============================] - 4s 85ms/step - loss: 2.7536e-04

Epoch 00040: loss did not improve
Epoch 41/50
50/50 [==============================] - 4s 85ms/step - loss: 2.7173e-04

Epoch 00041: loss improved from 0.00027 to 0.00027, saving model to rnn.h5
Epoch 42/50
50/50 [==============================] - 4s 85ms/step - loss: 2.6409e-04

Epoch 00042: loss improved from 0.00027 to 0.00026, saving model to rnn.h5
Epoch 43/50
50/50 [==============================] - 4s 85ms/step - loss: 2.6014e-04

Epoch 00043: loss improved from 0.00026 to 0.00026, saving model to rnn.h5
Epoch 44/50
50/50 [==============================] - 4s 85ms/step - loss: 2.5614e-04

Epoch 00044: loss improved from 0.00026 to 0.00026, saving model to rnn.h5
Epoch 45/50
50/50 [==============================] - 4s 85ms/step - loss: 2.5297e-04

Epoch 00045: loss improved from 0.00026 to 0.00025, saving model to rnn.h5
Epoch 46/50
50/50 [==============================] - 4s 85ms/step - loss: 2.5371e-04

Epoch 00046: loss did not improve
Epoch 47/50
50/50 [==============================] - 4s 85ms/step - loss: 2.5559e-04

Epoch 00047: loss did not improve
Epoch 48/50
50/50 [==============================] - 4s 85ms/step - loss: 2.6840e-04

Epoch 00048: loss did not improve
Epoch 49/50
50/50 [==============================] - 4s 85ms/step - loss: 2.7194e-04

Epoch 00049: loss did not improve
Epoch 50/50
50/50 [==============================] - 4s 85ms/step - loss: 2.6571e-04

Epoch 00050: loss did not improve





<keras.callbacks.History at 0x7380fc88>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
filePath = "rnn_wavs/test"

testPath = get_wav_path(filePath)

x_test_ori=get_data(testPath)

x_test=x_test_ori.reshape(x_test_ori.shape[0],-1,seq_vector)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
开始预处理文件
100.00 % [==================================================>] 4/4 	 已用时间:0s 剩余时间:0 s
  • 1
  • 2
decoded_seq = autoencoder.predict(x_test)
print(decoded_seq.shape)
  • 1
  • 2
(4, 480, 100)
  • 1
d_shape=decoded_seq[0].shape
print(d_shape)
  • 1
  • 2
(480, 100)
  • 1
decoded_seq[0]
  • 1
array([[-0.01014584, -0.01277211, -0.01327355, ..., -0.01386975,
        -0.0145765 , -0.01456202],
       [-0.01393538, -0.01367248, -0.01350442, ..., -0.01421544,
        -0.01455196, -0.0140023 ],
       [-0.01335981, -0.01219707, -0.01206582, ..., -0.0150555 ,
        -0.0149645 , -0.01383976],
       ...,
       [-0.09125455, -0.10162869, -0.12252352, ..., -0.01989237,
        -0.01427485,  0.01067922],
       [-0.00659992, -0.00485723, -0.0141265 , ...,  0.26765165,
         0.37300578,  0.42874205],
       [ 0.5345399 ,  0.59415686,  0.611799  , ...,  0.20478293,
         0.2866281 ,  0.3082694 ]], dtype=float32)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
index=0
seq=decoded_seq[index].flatten()
show_wave(seq,'rnn 复原')
  • 1
  • 2
  • 3

在这里插入图片描述

show_wave(x_test_ori[index],'原始')
  • 1

在这里插入图片描述

#获取特征向量,可转成特征图片
encoded_latent = encoder_model.predict(x_test)
print(encoded_latent.shape)
  • 1
  • 2
  • 3
(4, 480, 32)
  • 1
#特征图片
def show_latent_images(start=0,end=4):
    plt.figure(figsize=(20, 10))
    for i in range(start,end):
        ax = plt.subplot(2,end, i+1)
        plt.imshow(encoded_latent[i].reshape(160, -1).T,cmap='binary')
    plt.show()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
show_latent_images()
  • 1

在这里插入图片描述

testPath[0]
  • 1
'rnn_wavs/test/A2/A2_239.wav'
  • 1
#播放音频rnn_wavs/test/A2/A2_239.wav
import IPython
out_wav=testPath[0]
signal, srate = librosa.load(out_wav, sr=16000)
IPython.display.Audio(out_wav)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

在这里插入图片描述

#取3秒
signal=signal[0:3*16000]
  • 1
  • 2
#播放前3秒的
out_new='3s.wav'
librosa.output.write_wav(out_new,signal,16000)
IPython.display.Audio(out_new)
  • 1
  • 2
  • 3
  • 4

在这里插入图片描述

#播放复原的3秒音频
out_new='new_3s.wav'
librosa.output.write_wav(out_new,seq,16000)
IPython.display.Audio(out_new)
  • 1
  • 2
  • 3
  • 4

在这里插入图片描述
好了,今天就到这里了,希望对学习理解有帮助,大神看见勿喷,仅为自己的学习理解,能力有限,请多包涵,侵删。

本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/小丑西瓜9/article/detail/347759
推荐阅读
相关标签
  

闽ICP备14008679号