当前位置:   article > 正文

利用python通过字幕文件.srt来实现对视频片段的截取(2)_根据字幕中的关键词剪切视频

根据字幕中的关键词剪切视频
  1. """
  2. 用于获取多个关键词的视频摘要
  3. 本代码还是有缺陷的,因为选择了几个for循环,导致时间还是挺慢的。
  4. """
  5. import cv2 as cv
  6. import numpy as np
  7. import re
  8. import os
  9. from PIL import Image
  10. video_file = cv.VideoCapture(r"E:\900秒嫦娥落月.mp4")
  11. fps = video_file.get(5)
  12. print("该视频的帧为{}".format(fps))
  13. #==================================================================================#
  14. """
  15. 实验阶段:该内容是用来获取想要寻找的关键字段,在视频中时间位置,并进行截取视频
  16. 最终阶段:进行文本匹配,谁的相似度最高,就选择哪一段进行截取,
  17. 而在此任务中涉及到的知识很多:文本相似度匹配、近义词训练、如何将txt文件的字幕和时间分割再还原等等问题
  18. """
  19. #name=input("请输入提取内容:")
  20. # name=input("请输入你所需要寻找的关键字段或者关键内容:")
  21. # with open("CHS_嫦娥奔月.txt","r",encoding="utf-8") as f:
  22. # flie_data=f.readlines()
  23. # for i,readline in enumerate(flie_data):
  24. # #print(readline)
  25. # # matchObj=re.match(name,readline)#匹配时,如果第一个字符不匹配,就会发生匹配失败
  26. # matchObj = re.search(name, readline)
  27. # print(matchObj)
  28. # if matchObj is not None:
  29. # print("匹配字符所在的位置{}".format(i))
  30. # print("所在的时间段为{}".format(flie_data[i-1]))
  31. # break
  32. list_time=[]
  33. list_frames_time=[]
  34. name_all=input("请输入需要提取的关键词(用|分开):")
  35. name_all=name_all.split("|")
  36. # name_all=["燃料","失败"]
  37. with open(r"D:\PycharmProjects\pythonProject\CHS_嫦娥奔月.txt","r",encoding="utf-8") as f:
  38. flie_data=f.readlines()
  39. for name in name_all:
  40. for i,readline in enumerate(flie_data):
  41. #print(readline)
  42. # matchObj=re.match(name,readline)#匹配时,如果第一个字符不匹配,就会发生匹配失败
  43. matchObj = re.search(name, readline)
  44. print(matchObj)
  45. if matchObj is not None:
  46. list_time.append(flie_data[i-1])
  47. print("匹配字符所在的位置{}".format(i))
  48. print("所在的时间段为{}".format(flie_data[i-1]))
  49. list_time.sort()#将时间进行排序(从小到大)
  50. print("需要截取的视频时间",list_time)
  51. # =============================================================================#
  52. # 首先弄人工输入,并将帧和时间进行转换
  53. # start_frame = input("请输入开始帧数,(输入格式为时:分:秒,毫秒或者帧):")
  54. for start_frame in list_time:
  55. #start_frame=flie_data[i-1]
  56. if "-->" in start_frame:
  57. start_frame_1 = start_frame.split(" --> ")
  58. start_frame = start_frame_1[0]
  59. end_frame = start_frame_1[1]
  60. print("起始时间-->终止时间")
  61. else:
  62. end_frame = input("请结束开始帧数,(输入格式为时:分:秒,毫秒或者帧):")
  63. """
  64. 如果得到了是时间格式就进行转换成帧格式
  65. 输入的是时:分:秒.毫秒
  66. 1时=60分=3600秒=3600000毫秒
  67. 如果得到的是帧格式,那正好不用转换
  68. """
  69. # start_frame_spilt=start_frame.split(":")
  70. # ==============================================================================#
  71. if ":" in start_frame:
  72. print("输入的是时间制!")
  73. name_str = start_frame.split(":")
  74. start_frame = (int(name_str[0]) * 3600 + int(name_str[1]) * 60 + float(name_str[2].replace(",", "."))) * fps
  75. list_frames_time.append(start_frame)
  76. else:
  77. print("输入的值十进制(帧)!")
  78. start_frame = int(start_frame)
  79. if ":" in end_frame:
  80. print("输入的是时间制!")
  81. name_str = end_frame.split(":")
  82. end_frame = (int(name_str[0]) * 3600 + int(name_str[1]) * 60 + float(name_str[2].replace(",", "."))) * fps
  83. list_frames_time.append(end_frame)
  84. else:
  85. print("输入的值十进制(帧)!")
  86. end_frame = int(end_frame)
  87. # ==============================================================================================#
  88. """
  89. 如果前后的时间比较小,就是帧数小于1,那么无法截到图,所以直接截前后5帧来进行填充
  90. """
  91. if (end_frame - start_frame) < 1:
  92. start_frame = start_frame - 5
  93. end_frame = end_frame + 5
  94. else:
  95. pass
  96. print(list_frames_time)
  97. count_frame=0
  98. for i in range(0,len(list_frames_time),2):
  99. start_frame=list_frames_time[i]
  100. end_frame=list_frames_time[i+1]
  101. success, frames = video_file.read()
  102. while success:
  103. success, frames = video_file.read()
  104. count_frame += 1
  105. if count_frame >= start_frame and count_frame <= end_frame:
  106. print("截取第{}帧".format(count_frame))
  107. cv.imwrite(r"D:\PycharmProjects\pythonProject\feiji\frames\image{}.jpg".format(count_frame), frames)
  108. if count_frame > end_frame:
  109. break
  110. print("开始合成视频")
  111. im_list = os.listdir("D:/PycharmProjects/pythonProject/feiji/frames/")
  112. #im_list.sort(key=lambda x: int(x.replace("frame", "").split('.')[0])) # 最好再看看图片顺序对不
  113. img = Image.open(os.path.join("D:/PycharmProjects/pythonProject/feiji/frames/", im_list[0]))
  114. img_size = img.size # 获得图片分辨率,im_dir文件夹下的图片分辨率需要一致
  115. # fourcc = cv2.cv.CV_FOURCC('M','J','P','G') #opencv版本是2
  116. fourcc = cv.VideoWriter_fourcc(*'MJPG') # opencv版本是3
  117. videoWriter = cv.VideoWriter(r"D:\PycharmProjects\pythonProject\feiji\frames\test.avi", fourcc, fps, img_size)
  118. # count = 1
  119. for i in im_list:
  120. im_name = os.path.join("D:/PycharmProjects/pythonProject/feiji/frames/" + i)
  121. frame = cv.imdecode(np.fromfile(im_name, dtype=np.uint8), -1)
  122. videoWriter.write(frame)
  123. # count+=1
  124. # if (count == 200):
  125. # print(im_name)
  126. # break
  127. videoWriter.release()
  128. print('finish')

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/羊村懒王/article/detail/288856
推荐阅读
相关标签
  

闽ICP备14008679号