赞
踩
参考:https://cloud.tencent.com/developer/article/1768680
参考的代码有点问题,请求头需要修改,上代码:
- import requests
- import re # 正则表达式
- import pprint
- import json
- from moviepy.editor import AudioFileClip, VideoFileClip
- from bs4 import BeautifulSoup as bs
-
- headers = {
- # 防盗链 告诉服务器 我们请求的url网址是从哪里跳转过来的
- 'referer': 'https://www.bilibili.com/a',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
- }
-
- def send_request(url):
- response = requests.get(url=url, headers=headers)
- return response
-
- def get_video_data(html_data):
- """解析视频数据"""
-
- # 提取视频的标题
- soup = bs(html_data, 'lxml')
- title = soup.find_all(name='h1',attrs={"class":"video-title special-text-indent"})[0].get_text()
- # print(title)
-
- # 提取视频对应的json数据
- json_data = re.findall('<script>window\.__playinfo__=(.*?)</script>', html_data)[0]
- # print(json_data) # json_data 字符串
- json_data = json.loads(json_data)
- pprint.pprint(json_data)
-
- # 提取音频的url地址
- audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0]
- print('解析到的音频地址:', audio_url)
-
- # 提取视频画面的url地址
- video_url = json_data['data']['dash']['video'][0]['backupUrl'][0]
- print('解析到的视频地址:', video_url)
-
- video_data = [title, audio_url, video_url]
- return video_data
-
- def save_data(file_name, audio_url, video_url):
- # 请求数据
- print('正在请求音频数据')
- audio_data = send_request(audio_url).content
- print('正在请求视频数据')
- video_data = send_request(video_url).content
- with open(file_name + '.mp3', mode='wb') as f:
- f.write(audio_data)
- print('正在保存音频数据')
- with open(file_name + '.mp4', mode='wb') as f:
- f.write(video_data)
- print('正在保存视频数据')
-
- def merge_data(video_name):
- print('视频合成开始:', video_name)
- audioclip = AudioFileClip(video_name+'.mp3')
- videoclip = VideoFileClip(video_name+'.mp4')
- # 3.获取视频和音频的时长
- video_time = videoclip.duration
- audio_time = audioclip.duration
- # 4.对视频或者音频进行裁剪
- if video_time > audio_time:
- # 视频时长>音频时长,对视频进行截取
- videoclip_new = videoclip.subclip(0, audio_time)
- audioclip_new = audioclip
- else:
- # 音频时长>视频时长,对音频进行截取
- videoclip_new = videoclip
- audioclip_new = audioclip.subclip(0, video_time)
- # 5.视频中加入音频
- video_with_new_audio = videoclip_new.set_audio(audioclip_new)
- # 6.写入到新的视频文件中
- video_with_new_audio.write_videofile("output.mp4",
- codec='libx264',
- audio_codec='aac',
- temp_audiofile='temp-video.m4a',
- remove_temp=True
- )
- print('视频合成结束:', video_name)
-
-
- url = 'https://www.bilibili.com/video/BV1bK421a7qG/?spm_id_from=333.1007.tianma.6-4-22.click'
- response = send_request(url)
- response.encoding = requests.utils.get_encodings_from_content(response.text)[0]
- html_data = response.text
- video_data = get_video_data(html_data)
- save_data(video_data[0], video_data[1], video_data[2])
- merge_data(video_data[0])
效果
小姐姐挺靓,就是左下角有水印,想办法去除水印,参考:python实战之去除视频水印&字幕_python 去除视频水印-CSDN博客
- import os
- import sys
- import cv2
- import numpy
- from moviepy import editor
-
- TEMP_VIDEO = 'temp.mp4'
-
-
- class WatermarkRemover():
-
- def __init__(self, video_path, output, threshold: int, kernel_size: int):
- self.threshold = threshold # 阈值分割所用阈值
- self.kernel_size = kernel_size # 膨胀运算核尺寸
- self.video_path = video_path
- self.output = output
-
-
- #根据用户手动选择的ROI(Region of Interest,感兴趣区域)框选水印或字幕位置。
- def select_roi(self, img: numpy.ndarray, hint: str) -> list:
- '''
- 框选水印或字幕位置,SPACE或ENTER键退出
- :param img: 显示图片
- :return: 框选区域坐标
- '''
- COFF = 0.7
- w, h = int(COFF * img.shape[1]), int(COFF * img.shape[0])
- resize_img = cv2.resize(img, (w, h))
- roi = cv2.selectROI(hint, resize_img, False, False)
- cv2.destroyAllWindows()
- watermark_roi = [int(roi[0] / COFF), int(roi[1] / COFF), int(roi[2] / COFF), int(roi[3] / COFF)]
- return watermark_roi
-
-
- #对输入的蒙版进行膨胀运算,扩大蒙版的范围
- def dilate_mask(self, mask: numpy.ndarray) -> numpy.ndarray:
-
- '''
- 对蒙版进行膨胀运算
- :param mask: 蒙版图片
- :return: 膨胀处理后蒙版
- '''
- kernel = numpy.ones((self.kernel_size, self.kernel_size), numpy.uint8)
- mask = cv2.dilate(mask, kernel)
- return mask
-
- #根据手动选择的ROI区域,在单帧图像中生成水印或字幕的蒙版。
- def generate_single_mask(self, img: numpy.ndarray, roi: list, threshold: int) -> numpy.ndarray:
- '''
- 通过手动选择的ROI区域生成单帧图像的水印蒙版
- :param img: 单帧图像
- :param roi: 手动选择区域坐标
- :param threshold: 二值化阈值
- :return: 水印蒙版
- '''
- # 区域无效,程序退出
- if len(roi) != 4:
- print('NULL ROI!')
- sys.exit()
-
- # 复制单帧灰度图像ROI内像素点
- roi_img = numpy.zeros((img.shape[0], img.shape[1]), numpy.uint8)
- start_x, end_x = int(roi[1]), int(roi[1] + roi[3])
- start_y, end_y = int(roi[0]), int(roi[0] + roi[2])
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- roi_img[start_x:end_x, start_y:end_y] = gray[start_x:end_x, start_y:end_y]
-
- # 阈值分割
- _, mask = cv2.threshold(roi_img, threshold, 255, cv2.THRESH_BINARY)
- return mask
-
- #通过截取视频中多帧图像生成多张水印蒙版,并通过逻辑与计算生成最终的水印蒙版
- def generate_watermark_mask(self, video_path: str) -> numpy.ndarray:
- '''
- 截取视频中多帧图像生成多张水印蒙版,通过逻辑与计算生成最终水印蒙版
- :param video_path: 视频文件路径
- :return: 水印蒙版
- '''
- video = cv2.VideoCapture(video_path)
- success, frame = video.read()
- roi = self.select_roi(frame, 'select watermark ROI')
- mask = numpy.ones((frame.shape[0], frame.shape[1]), numpy.uint8)
- mask.fill(255)
-
- step = video.get(cv2.CAP_PROP_FRAME_COUNT) // 5
- index = 0
- while success:
- if index % step == 0:
- mask = cv2.bitwise_and(mask, self.generate_single_mask(frame, roi, self.threshold))
- success, frame = video.read()
- index += 1
- video.release()
-
- return self.dilate_mask(mask)
-
- #根据手动选择的ROI区域,在单帧图像中生成字幕的蒙版。
- def generate_subtitle_mask(self, frame: numpy.ndarray, roi: list) -> numpy.ndarray:
- '''
- 通过手动选择ROI区域生成单帧图像字幕蒙版
- :param frame: 单帧图像
- :param roi: 手动选择区域坐标
- :return: 字幕蒙版
- '''
- mask = self.generate_single_mask(frame, [0, roi[1], frame.shape[1], roi[3]], self.threshold) # 仅使用ROI横坐标区域
- return self.dilate_mask(mask)
-
- def inpaint_image(self, img: numpy.ndarray, mask: numpy.ndarray) -> numpy.ndarray:
- '''
- 修复图像
- :param img: 单帧图像
- :parma mask: 蒙版
- :return: 修复后图像
- '''
- telea = cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
- return telea
-
-
- def merge_audio(self, input_path: str, output_path: str, temp_path: str):
- '''
- 合并音频与处理后视频
- :param input_path: 原视频文件路径
- :param output_path: 封装音视频后文件路径
- :param temp_path: 无声视频文件路径
- '''
- with editor.VideoFileClip(input_path) as video:
- audio = video.audio
- with editor.VideoFileClip(temp_path) as opencv_video:
- clip = opencv_video.set_audio(audio)
- clip.to_videofile(output_path)
-
- def remove_video_watermark(self):
- '''
- 去除视频水印
- '''
- if not os.path.exists(self.output):
- os.makedirs(self.output)
-
- filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
- mask = None
-
- for i, name in enumerate(filenames):
- if i == 0:
- # 生成水印蒙版
- mask = self.generate_watermark_mask(name)
-
- # 创建待写入文件对象
- video = cv2.VideoCapture(name)
- fps = video.get(cv2.CAP_PROP_FPS)
- size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
- video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
-
- # 逐帧处理图像
- success, frame = video.read()
-
- while success:
- frame = self.inpaint_image(frame, mask)
- video_writer.write(frame)
- success, frame = video.read()
-
- video.release()
- video_writer.release()
-
- # 封装视频
- (_, filename) = os.path.split(name)
- output_path = os.path.join(self.output, filename.split('.')[0] + '_no_watermark.mp4') # 输出文件路径
- self.merge_audio(name, output_path, TEMP_VIDEO)
-
- if os.path.exists(TEMP_VIDEO):
- os.remove(TEMP_VIDEO)
-
- def remove_video_subtitle(self):
- '''去除视频字幕'''
- if not os.path.exists(self.output):
- os.makedirs(self.output)
-
- filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
- roi = []
-
- for i, name in enumerate(filenames):
- # 创建待写入文件对象
- video = cv2.VideoCapture(name)
- fps = video.get(cv2.CAP_PROP_FPS)
- size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
- video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
-
- # 逐帧处理图像
- success, frame = video.read()
- if i == 0:
- roi = self.select_roi(frame, 'select subtitle ROI')
-
- while success:
- mask = self.generate_subtitle_mask(frame, roi)
- frame = self.inpaint_image(frame, mask)
- video_writer.write(frame)
- success, frame = video.read()
-
- video.release()
- video_writer.release()
-
- # 封装视频
- (_, filename) = os.path.split(name)
- output_path = os.path.join(OUTPUT_PATH, filename.split('.')[0] + '_no_sub.mp4') # 输出文件路径
- self.merge_audio(name, output_path, TEMP_VIDEO)
-
- if os.path.exists(TEMP_VIDEO):
- os.remove(TEMP_VIDEO)
-
- # 去水印
- video_path = 'video'
- output_path = 'output'
- remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
- remover.remove_video_watermark()
- #去字幕
- # remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
- # remover.remove_video_subtitle()
效果一般吧:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。