赞
踩
这篇文章我们将借助b站的api来爬取B站视频的弹幕,这将是这个系列的第四篇文章。
没什么好分析的,就是调用网上找到的弹幕池接口,和bv号转弹幕池编号的接口。
开发环境:win10 python3.6.8
使用工具:pycharm
使用第三方库:requests、os、BeatutifulSoup
import requests
import json
from bs4 import BeautifulSoup
import re
def bvid2cid(bvid): # 获取视频cid
url = "https://api.bilibili.com/x/player/pagelist?bvid=" + str(bvid) + "&jsonp=jsonp"
r = requests.get(url)
dirt = json.loads(r.text)
cid = dirt['data'][0]['cid']
return cid
def cid2data(cid):
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + str(cid)
r = requests.get(url=url)
r.encoding = 'utf-8'
html = BeautifulSoup(r.text, 'html5lib')
ds = html.find_all('d')
said = '.*">(.*)</d>.*'
for d in ds:
with open(str(cid) + '.txt', 'a', encoding='utf-8') as f:
f.write(re.findall(said, str(d))[0] + '\n')
import requests import json from bs4 import BeautifulSoup import re def bvid2cid(bvid): # 获取视频cid url = "https://api.bilibili.com/x/player/pagelist?bvid=" + str(bvid) + "&jsonp=jsonp" r = requests.get(url) dirt = json.loads(r.text) cid = dirt['data'][0]['cid'] return cid def cid2data(cid): url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=' + str(cid) r = requests.get(url=url) r.encoding = 'utf-8' html = BeautifulSoup(r.text, 'html5lib') ds = html.find_all('d') said = '.*">(.*)</d>.*' for d in ds: with open(str(cid) + '.txt', 'a', encoding='utf-8') as f: f.write(re.findall(said, str(d))[0] + '\n') cid = bvid2cid('BV1gp4y1e7cE') cid2data(cid)
可以看到,这次爬取非常成功
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。