赞
踩
前言:前几天看到一篇《用Python解决女朋友看电影没字幕的需求》,觉得很有意思。但是有道的识别没有做过,找了下其他的接口,发现讯飞的识别比较准确。就做了这个小程序。
讯飞接口:https://console.xfyun.cn/services/lfasr
申请api和SK。
免费时长:新用户可识别的视频时长能达到5个小时。用多少算多少。一个月的有效期。
下载调用示例demo:https://www.xfyun.cn/doc/asr/lfasr/API.html#%E8%B0%83%E7%94%A8%E7%A4%BA%E4%BE%8B
下载demo就可以直接利用其接口生产识别文件,不用自己去写识别程序了。
主要程序:
1、窗体文件
class PyWinDesign: def __init__(self, 启动窗口): self.启动窗口 = 启动窗口 self.appid="" self.secret_key="" self.video_path="" self.finish_video_path="" self.start_time="" self.end_time="" self.videoclip="" self.out_video_path="" self.srt_files_path="" self.sum_video_path="" self.启动窗口 = 启动窗口 self.启动窗口.title ('视频字幕识别和合成') self.启动窗口.resizable (width=False, height=False) screenwidth = self.启动窗口.winfo_screenwidth () screenheight = self.启动窗口.winfo_screenheight () size = '%dx%d+%d+%d' % (644, 400, (screenwidth - 644) / 2, (screenheight - 400) / 2) self.启动窗口.geometry (size) self.标签1_标题 = tk.StringVar () self.标签1_标题.set ('讯飞接口:') self.标签1 = tk.Label (self.启动窗口, textvariable=self.标签1_标题, anchor=tk.W) self.标签1.place (x=19, y=22, width=68, height=31) self.标签2_标题 = tk.StringVar () self.标签2_标题.set ('Appid:') self.标签2 = tk.Label (self.启动窗口, textvariable=self.标签2_标题) self.标签2.place (x=20, y=71, width=64, height=28) self.编辑框1_内容 = tk.StringVar () self.编辑框1_内容.set ('') self.编辑框1 = ttk.Entry (self.启动窗口, textvariable=self.编辑框1_内容, justify=tk.LEFT) self.编辑框1.place (x=98, y=71, width=141, height=32) self.标签3_标题 = tk.StringVar () self.标签3_标题.set ('Secret_key:') self.标签3 = tk.Label (self.启动窗口, textvariable=self.标签3_标题) self.标签3.place (x=21, y=106, width=67, height=39) self.编辑框2_内容 = tk.StringVar () self.编辑框2_内容.set ('') self.编辑框2 = tk.Entry (self.启动窗口, textvariable=self.编辑框2_内容, justify=tk.LEFT) self.编辑框2.place (x=99, y=113, width=504, height=35) self.按钮1_标题 = tk.StringVar () self.按钮1_标题.set ('选择待识别视频地址') self.按钮1 = tk.Button (self.启动窗口, textvariable=self.按钮1_标题, command=self.按钮1_被鼠标左键单击) self.按钮1.place (x=22, y=186, width=120, height=39) self.编辑框3_内容 = tk.StringVar () self.编辑框3_内容.set ('') self.编辑框3 = tk.Entry (self.启动窗口, textvariable=self.编辑框3_内容, justify=tk.LEFT) self.编辑框3.place (x=158, y=192, width=443, height=31) self.标签4_标题 = tk.StringVar () self.标签4_标题.set ('视频识别开始时间:') self.标签4 = tk.Label (self.启动窗口, textvariable=self.标签4_标题, anchor=tk.W) self.标签4.place (x=156, y=226, width=106, height=35) self.编辑框4_内容 = tk.StringVar () self.编辑框4_内容.set ('') self.编辑框4 = tk.Entry (self.启动窗口, textvariable=self.编辑框4_内容, justify=tk.LEFT) self.编辑框4.place (x=268, y=225, width=43, height=31) self.标签5_标题 = tk.StringVar () self.标签5_标题.set ('秒') self.标签5 = tk.Label (self.启动窗口, textvariable=self.标签5_标题, anchor=tk.W) self.标签5.place (x=321, y=231, width=27, height=24) self.标签6_标题 = tk.StringVar () self.标签6_标题.set ('视频识别结束时间:') self.标签6 = tk.Label (self.启动窗口, textvariable=self.标签6_标题, anchor=tk.W) self.标签6.place (x=157, y=266, width=103, height=28) self.编辑框5_内容 = tk.StringVar () self.编辑框5_内容.set ('') self.编辑框5 = tk.Entry (self.启动窗口, textvariable=self.编辑框5_内容, justify=tk.LEFT) self.编辑框5.place (x=267, y=264, width=43, height=28) self.标签7_标题 = tk.StringVar () self.标签7_标题.set ('秒') self.标签7 = tk.Label (self.启动窗口, textvariable=self.标签7_标题, anchor=tk.W) self.标签7.place (x=321, y=265, width=30, height=25) self.按钮2_标题 = tk.StringVar () self.按钮2_标题.set ('开始截取视频') self.按钮2 = tk.Button (self.启动窗口, textvariable=self.按钮2_标题, command=self.按钮2_被鼠标左键单击) self.按钮2.place (x=356, y=236, width=97, height=47) self.按钮4_标题 = tk.StringVar () self.按钮4_标题.set ('对截取的视频进行字幕识别') self.按钮4 = tk.Button (self.启动窗口, textvariable=self.按钮4_标题, command=self.按钮4_被鼠标左键单击) self.按钮4.place (x=470, y=239, width=150, height=42) self.按钮5_标题 = tk.StringVar () self.按钮5_标题.set ('合成视频和字幕') self.按钮5 = tk.Button (self.启动窗口, textvariable=self.按钮5_标题, command=self.按钮5_被鼠标左键单击) self.按钮5.place (x=164, y=308, width=232, height=40) self.进度条=ttk.Progressbar(self.启动窗口,length=200, mode="indeterminate", maximum=200,orient=tk.HORIZONTAL) self.进度条.place(x=164, y=360, width=232, height=40)
生产示例:
视频地址选择:
def 按钮1_被鼠标左键单击(self):
file_path = filedialog.askopenfilename (title=u'选择文件', initialdir=(os.path.expanduser ('F:/迅雷下载/')))
print (file_path)
self.编辑框3_内容.set(" ")
self.编辑框3_内容.set(file_path)
视频截取:
def 按钮2_被鼠标左键单击(self): self.appid = self.编辑框1.get () self.secret_key = self.编辑框2.get () self.video_path = str((self.编辑框3.get ())).strip() self.start_time = int(self.编辑框4.get ()) self.end_time= int(self.编辑框5.get ()) print (len(self.video_path)) #self.video_path='F:/迅雷下载/闪闪的红星1974.1080p.HDTV.x264.AAC-HQC.mp4' #print (len(self.video_path)) self.进度条.start () self.videoclip = VideoFileClip (self.video_path).subclip(self.start_time, self.end_time) video = CompositeVideoClip ([self.videoclip]) # 把最后生成的视频导出到文件内 self.out_video_path=filedialog.asksaveasfilename(title=u'保存文件,请输入文件名', initialdir=(os.path.expanduser ('F:/迅雷下载/'))) tkinter.messagebox.showwarning ('提示','请等待视频截取,直到弹出成功截取的对话框') video.write_videofile (self.out_video_path+".mp4") tkinter.messagebox.showwarning ('提示','视频截取成功') self.进度条.stop ()
截取视频的字幕识别:
def 按钮4_被鼠标左键单击(self): audioclip = self.videoclip.audio mp3_path=self.out_video_path+".wav" appid=self.编辑框1_内容.get() sk=self.编辑框2_内容.get() self.进度条.start () tkinter.messagebox.showwarning ('提示','请等待字幕提取与识别,直到弹出识别成功对话框') audioclip.write_audiofile(mp3_path) api = RequestApi (appid=appid, secret_key=sk,upload_file_path=mp3_path) #调用讯飞接口 data=api.all_api_request () tkinter.messagebox.showwarning ('提示','字幕提取和识别成功!') self.进度条.stop () result = data['data'] result = eval (result) #使用eval函数将字符串转变为可操作的表达式 print (result) self.get_srt(result) #调取字幕生产程序
使用ffmpeg字幕和视频的合并:
def 按钮5_被鼠标左键单击(self):
self.sum_video_path=self.out_video_path+"sum.mp4"
mp4_path=self.out_video_path + ".mp4"
cmdLine = "ffmpeg -i " + mp4_path + " -vf subtitles=" + "\\\'" + self.srt_files_path + "\\\'" + " -y " + self.sum_video_path
#subprocess.call(cmdLine, shell=False)
self.进度条.start ()
tkinter.messagebox.showwarning ('提示','请等待视频与字幕合并,直到弹出合并成功对话框')
os.system (cmdLine)
tkinter.messagebox.showwarning ('提示','合并成功,请观看视频!视频保持在截取的视频同一目录下!')
self.进度条.stop ()
字幕的生成:
def get_srt(self,result): self.srt_files_path=self.out_video_path+'.srt' file = '' # 这个变量用来保存数据 i = 1 for each in list(result): print(each) start = float(each['bg'])/1000 # 获取开始时间 stop = float(each['ed'])/1000 # 获取结束时间 content = each['onebest'] # 获取字幕内容 file += '{}\n'.format (i) # 加入序号 hour = math.floor (start) // 3600 minute = (math.floor (start) - hour * 3600) // 60 sec = math.floor (start) - hour * 3600 - minute * 60 minisec = int (math.modf (start)[0] * 100) # 处理开始时间 file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str ( minisec).zfill (2) # 将数字填充0并按照格式写入 file += ' --> ' hour = math.floor (stop) // 3600 minute = (math.floor (stop) - hour * 3600) // 60 sec = math.floor (stop) - hour * 3600 - minute * 60 minisec = abs (int (math.modf (stop)[0] * 100 - 1)) # 此处减1是为了防止两个字幕同时出现 file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str ( minisec).zfill (2) file += '\n' + content + '\n\n' # 加入字幕文字 i += 1 with open (self.srt_files_path, 'w', encoding='utf-8') as f: f.write (file) # 将数据写入文件
结语:
1、学习使用了Movepy 模块
2、字符串转表达式的eval函数使用
3、讯飞接口使用。
4、滚动条还是不会用。。。
成品下载地址:https://download.csdn.net/download/zxl7725103/12966151
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。