赞
踩
(注意:为了方便生成对应文件名,所以粘贴内容必须带有“【" xxx "】"这两个符号,中间为文件命名名称)网址为自动识别。
- import requests
- import re
- import json
- import io
-
- headers={
- "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
- "referer": "https://message.bilibili.com/",#这段代码的意思是你从哪儿获得这个网址的(换一句话讲,谁推荐你去访问这个网址的).有了它就能够正常访问.
- }
-
- def extract_content(text, symbol):
- pattern = r'%s(.*?)%s' % (symbol, symbol)
- result = re.findall(pattern, text)
- return result
-
-
- text=input('请输入b站视频网址:')
-
- symbol = ["【","】"]
- result = (extract_content(text, symbol))[0]
- print('文件名为:'+result) # 输出: ['World']
-
-
- url_regex = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
- urls = (re.findall(url_regex, text))[0]
-
- print('网址:'+urls)
-
-
- mp4=result+'.mp4'
-
- mp3=result+'.mp3'
-
-
- def get_url_html(url):
- req=requests.get(url,headers=headers)
- htmltext=req.text
- get_json(htmltext)
-
- def get_json(htmltxt):
- r=re.findall(r'<script>window.__playinfo__=(.*?)</script>',htmltxt)[0]
- js=json.loads(r)
- audiourl=js["data"]["dash"]["audio"][0]["base_url"]
- videourl=js["data"]["dash"]["video"][0]["base_url"]
- download(audiourl,videourl)
-
- def download(audiourl,videourl):
- res=requests.get(url=audiourl,headers=headers)
- print('爬取中,等待....')
- with open(mp3,"wb") as f:
- f.write(res.content)
-
- res=requests.get(url=videourl,headers=headers)
- with open(mp4,"wb") as f:
- f.write(res.content)
- print('爬取完毕!')
-
- if __name__=="__main__":
- get_url_html(urls)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。