赞
踩
- """
- 把原音频,按csv中的标注结果分割成几个音频,如text是无,忽略。否则从sDate到eDate 进行分割。
- """
- import os
- import pandas as pd
- import json
- from pydub import AudioSegment
-
- csv_url = "D:\csv\\"
- wav_url = "D:\标注音频与示例\\"
- save_wav = "D:\wav\\new_wav\\"
-
- for path in os.listdir(csv_url):
- data_frame = pd.read_csv(csv_url + path, encoding='utf-8')
- name_list = data_frame["storeFileName"]
- result_list = data_frame["finalResult"]
- for nl, rl in zip(name_list, result_list):
- new_wav_url = save_wav + nl.split(".")[0] + "\\"
- if not os.path.exists(new_wav_url):
- os.makedirs(new_wav_url)
- result = json.loads(rl)
- txt_url = new_wav_url + nl.split(".")[0]+".txt"
- with open(txt_url, "w", encoding='utf-8') as fn:
- fn.write(nl+"\n")
- fn.write(rl)
- fn.close()
- audio = AudioSegment.from_wav(wav_url + nl)
- for text in result['text']:
- if text['defData']['text'] != '无':
- sTime = text['defData']['sTime'] * 1000
- dTime = text['defData']['dTime'] * 1000
- eTime = sTime + dTime
- # 音频切割按开始时间到结束时间切割
- audio_chunk = audio[sTime:eTime]
-
- audio_chunk.export(new_wav_url + nl.split(".")[0] + "-" + str(text['id']) + ".wav", format="wav")

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。