赞
踩
Package Version
--------------- ---------
baidu-aip 2.2.18.0
jieba 0.42.1
moviepy 1.0.3
numpy 1.20.2
opencv-python 4.5.1.48
Pillow 8.2.0
requests 2.25.1
wordcloud 1.8.1
you-get 0.4.1520
通过视频BV号请求cid,再使用cid请求弹幕文件,最后使用正则表达式去匹配弹幕文本,将匹配出来的结果保存在本地供之后使用,代码及思路比较简单,就不做过多赘述
cid请求链接:https://api.bilibili.com/x/web-interface/view?bvid=
弹幕请求链接:https://api.bilibili.com/x/v1/dm/list.so?oid=
def get_cid(cls, bv): url = "https://api.bilibili.com/x/web-interface/view?bvid=" + str(bv) response = requests.get(url) dirt = json.loads(response.text) aid = dirt['data']['cid'] return str(aid) def get_barrage(cls, bv, to_file_path): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36", } cid = cls.get_cid(bv) response = requests.get("https://api.bilibili.com/x/v1/dm/list.so?oid=" + cid, headers=headers) html_doc = response.content.decode('utf-8') regex = re.compile("<d.*?>(.*?)</d>") DanMu = regex.findall(html_doc) with open(to_file_path, "w", encoding="utf_8")as f: for i in DanMu: f.write(i) f.write("\n")
使用第三方开源库you-get进行下载
you-get支持的音视频网站
Site | URL | Videos? | Images? | Audios? |
---|---|---|---|---|
YouTube | https://www.youtube.com/ | ✓ | ||
https://twitter.com/ | ✓ | ✓ | ||
VK | http://vk.com/ | ✓ | ✓ | |
Vine | https://vine.co/ | ✓ | ||
Vimeo | https://vimeo.com/ | ✓ | ||
Veoh | http://www.veoh.com/ | ✓ | ||
Tumblr | https://www.tumblr.com/ | ✓ | ✓ | ✓ |
TED | http://www.ted.com/ | ✓ | ||
SoundCloud | https://soundcloud.com/ | ✓ | ||
SHOWROOM | https://www.showroom-live.com/ | ✓ | ||
https://www.pinterest.com/ | ✓ | |||
MTV81 | http://www.mtv81.com/ | ✓ | ||
Mixcloud | https://www.mixcloud.com/ | ✓ | ||
Metacafe | http://www.metacafe.com/ | ✓ | ||
Magisto | http://www.magisto.com/ | ✓ | ||
Khan Academy | https://www.khanacademy.org/ | ✓ | ||
Internet Archive | https://archive.org/ | ✓ | ||
https://instagram.com/ | ✓ | ✓ | ||
InfoQ | http://www.infoq.com/presentations/ | ✓ | ||
Imgur | http://imgur.com/ | ✓ | ||
Heavy Music Archive | http://www.heavy-music.ru/ | ✓ | ||
Freesound | http://www.freesound.org/ | ✓ | ||
Flickr | https://www.flickr.com/ | ✓ | ✓ | |
FC2 Video | http://video.fc2.com/ | ✓ | ||
https://www.facebook.com/ | ✓ | |||
eHow | http://www.ehow.com/ | ✓ | ||
Dailymotion | http://www.dailymotion.com/ | ✓ | ||
Coub | http://coub.com/ | ✓ | ||
CBS | http://www.cbs.com/ | ✓ | ||
Bandcamp | http://bandcamp.com/ | ✓ | ||
AliveThai | http://alive.in.th/ | ✓ | ||
interest.me | http://ch.interest.me/tvn | ✓ | ||
755 ナナゴーゴー | http://7gogo.jp/ | ✓ | ✓ | |
niconico ニコニコ動画 | http://www.nicovideo.jp/ | ✓ | ||
163 网易视频 网易云音乐 | http://v.163.com/ http://music.163.com/ | ✓ | ✓ | |
56网 | http://www.56.com/ | ✓ | ||
AcFun | http://www.acfun.cn/ | ✓ | ||
Baidu 百度贴吧 | http://tieba.baidu.com/ | ✓ | ✓ | |
爆米花网 | http://www.baomihua.com/ | ✓ | ||
bilibili 哔哩哔哩 | http://www.bilibili.com/ | ✓ | ✓ | ✓ |
豆瓣 | http://www.douban.com/ | ✓ | ✓ | |
斗鱼 | http://www.douyutv.com/ | ✓ | ||
凤凰视频 | http://v.ifeng.com/ | ✓ | ||
风行网 | http://www.fun.tv/ | ✓ | ||
iQIYI 爱奇艺 | http://www.iqiyi.com/ | ✓ | ||
激动网 | http://www.joy.cn/ | ✓ | ||
酷6网 | http://www.ku6.com/ | ✓ | ||
酷狗音乐 | http://www.kugou.com/ | ✓ | ||
酷我音乐 | http://www.kuwo.cn/ | ✓ | ||
乐视网 | http://www.le.com/ | ✓ | ||
荔枝FM | http://www.lizhi.fm/ | ✓ | ||
懒人听书 | http://www.lrts.me/ | ✓ | ||
秒拍 | http://www.miaopai.com/ | ✓ | ||
MioMio弹幕网 | http://www.miomio.tv/ | ✓ | ||
MissEvan 猫耳FM | http://www.missevan.com/ | ✓ | ||
痞客邦 | https://www.pixnet.net/ | ✓ | ||
PPTV聚力 | http://www.pptv.com/ | ✓ | ||
齐鲁网 | http://v.iqilu.com/ | ✓ | ||
QQ 腾讯视频 | http://v.qq.com/ | ✓ | ||
企鹅直播 | http://live.qq.com/ | ✓ | ||
Sina 新浪视频 微博秒拍视频 | http://video.sina.com.cn/ http://video.weibo.com/ | ✓ | ||
Sohu 搜狐视频 | http://tv.sohu.com/ | ✓ | ||
Tudou 土豆 | http://www.tudou.com/ | ✓ | ||
阳光卫视 | http://www.isuntv.com/ | ✓ | ||
Youku 优酷 | http://www.youku.com/ | ✓ | ||
战旗TV | http://www.zhanqi.tv/lives | ✓ | ||
央视网 | http://www.cntv.cn/ | ✓ | ||
Naver 네이버 | http://tvcast.naver.com/ | ✓ | ||
芒果TV | http://www.mgtv.com/ | ✓ | ||
火猫TV | http://www.huomao.com/ | ✓ | ||
阳光宽频网 | http://www.365yg.com/ | ✓ | ||
西瓜视频 | https://www.ixigua.com/ | ✓ | ||
新片场 | https://www.xinpianchang.com/ | ✓ | ||
快手 | https://www.kuaishou.com/ | ✓ | ✓ | |
抖音 | https://www.douyin.com/ | ✓ | ||
TikTok | https://www.tiktok.com/ | ✓ | ||
中国体育(TV) | http://v.zhibo.tv/ http://video.zhibo.tv/ | ✓ | ||
知乎 | https://www.zhihu.com/ | ✓ |
# 获取视频信息
you-get -i https://www.bilibili.com/video/BV1f4411M7QC
# 下载视频
you-get --format=flv -o E:\Desktop\output https://www.bilibili.com/video/BV1f4411M7QC
更多使用方法请参考you-get项目地址
这部分的需求非常简单,就是剪下视频或者音频中的某一段并保存
Python有一个叫moviepy的第三方库,可以实现视频的剪辑、拼接,音频的剪辑、拼接、提取,以及音视频的合并等操作
def cut_video(cls, origin_file_path, to_file_path, start, end): """ 视频剪辑 :param origin_file_path: 原视频文件路径 :param to_file_path: 保存路径 :param start: 起始时间点 :param end: 结束时间点 """ clip = VideoFileClip(origin_file_path).subclip(start, end) clip.write_videofile(to_file_path) def cut_audio(cls, origin_file_path, to_file_path, start, end): """ 音频剪辑 :param origin_file_path: 原视频文件路径 :param to_file_path: 保存路径 :param start: 起始时间点 :param end: 结束时间点 """ clip = AudioFileClip(origin_file_path).subclip(start, end) clip.write_audiofile(to_file_path) def get_audio_from_video(cls, video_file_path, to_file_path): """ 音频提取 :param video_file_path: 视频文件路径 :param to_file_path: 音频文件路径 """ video = VideoFileClip(video_file_path) video.audio.write_audiofile(to_file_path)
使用opencv-python(cv2)打开视频文件并按帧读取,再将每一帧保存到文件夹中
def split(cls, from_file_path, to_folder_path, frames=0): """ 视频按帧读取并保存 :param from_file_path: 视频路径 :param to_folder_path: 保存路径 :param frames: 保存帧数(张数),为0则保存所有帧 """ vc = cv2.VideoCapture(from_file_path) # cv2打开视频文件 frames_count = vc.get(7) # 获取视频总帧数 c = 0 if vc.isOpened(): ret, frame = vc.read() # 按帧读取视频 else: ret = False while ret: if 0 < frames == c: break ret, frame = vc.read() # 读取每一视频帧,并保存至图片中 cv2.imwrite(os.path.join(to_folder_path, '{}.jpg'.format(c)), frame) c += 1 if c == frames_count - 1: break print('第 {} 张图片存放成功!'.format(c))
图片二值化这里有两种思路,一种是使用opencv,还有一种方法是使用百度智能云的人像分割接口。
两种方法各有优劣:
所以具体使用时需要根据视频的情况进行切换
下面为两种处理方法的不同效果(图一为cv2,图二为百度人像分割)
def binary_option_cv2(cls, from_file_path, to_file_path): """ 图片二值化并保存(使用cv2) :param from_file_path: 原图路径 :param to_file_path: 二值化图路径 """ img = cv2.imread(from_file_path) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) new_gray = np.uint8((255 * (gray / 255.0) ** 1.4)) dst = cv2.adaptiveThreshold(new_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 1) cv2.medianBlur(dst, 5) cv2.imwrite(to_file_path, dst) def binary_option_baidu(cls, from_file_path, to_file_path): """ 图片二值化并保存(使用百度人像分割) :param from_file_path: 原图路径 :param to_file_path: 二值化图路径 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() height, width, bgr = cv2.imread(from_file_path).shape image = get_file_content(from_file_path) cls.client.bodySeg(image) res = cls.client.bodySeg(image) labelmap = base64.b64decode(res['labelmap']) labelimg = np.frombuffer(labelmap, np.uint8) # 转化为np数组 0-255 labelimg = cv2.imdecode(labelimg, 1) labelimg = cv2.resize(labelimg, (width, height), interpolation=cv2.INTER_NEAREST) img_new = np.where(labelimg == 1, 255, labelimg) # 将 1 转化为 255 cv2.imwrite(to_file_path, img_new)
使用wordcloud库,并使用前面爬取的B站弹幕作为词云内容,二值化图片作为遮罩
def word_cloud(cls, frequencies, mask_file_path, to_file_path): """ 生成词云图片 :param frequencies: 词频dict :param mask_file_path: 遮罩图片路径 :param to_file_path: 保存位置 """ pic_mask = 255 - np.array(Image.open(mask_file_path)) wc = WordCloud( background_color='white', max_font_size=100, min_font_size=10, repeat=True, max_words=1000, mode='RGB', mask=pic_mask ) wc.generate_from_frequencies(frequencies) # wc.generate(words) wc.to_file(to_file_path)
使用numpy拼接图片,使用cv2将拼接的图片写入视频流并保存
为了将视频与音轨对齐,生成视频时需要设置合适的视频帧率(与原视频保持一致),原视频帧率可以使用播放器查看,也可以使用cv2获取
def joint(cls, origin_folder, word_cloud_folder, to_file_path): """ 批量拼接图片并合成视频 :param origin_folder: 原图文件夹 :param word_cloud_folder: 词云图片文件夹 :param to_file_path: 保存路径 """ num_list = [int(str(i).split('.')[0]) for i in os.listdir(origin_folder)] fps = 30 # 视频帧率,需要根据原视频帧率做调整 height, width, _ = cv2.imread(os.path.join(origin_folder, '{}.jpg'.format(num_list[0]))).shape # 视频高度和宽度 width = width * 2 # 创建一个写入操作; video_writer = cv2.VideoWriter(to_file_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) for i in sorted(num_list): i = '{}.jpg'.format(i) ori_jpg = os.path.join(origin_folder, str(i)) word_jpg = os.path.join(word_cloud_folder, str(i)) # com_jpg = os.path.join(Composite_path,str(i)) ori_arr = cv2.imread(ori_jpg) word_arr = cv2.imread(word_jpg) # 利用 Numpy 进行拼接 com_arr = np.hstack((ori_arr, word_arr)) video_writer.write(com_arr) # 将每一帧画面写入视频流中 print("{}写入视频流成功".format(ori_jpg))
与前面 视频、音频剪辑和音频提取 思路相似
def set_audio_for_video(cls, video_file_path, audio_file_path, to_file_path):
"""
音视频合并
:param video_file_path: 视频文件路径
:param audio_file_path: 音频文件路径
:param to_file_path: 保存路径
"""
video = VideoFileClip(video_file_path)
audio = AudioFileClip(audio_file_path)
new_video = video.set_audio(audio)
new_video.write_videofile(to_file_path)
词云对照
词云对照
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。