赞
踩
import requests
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
def get_track_list(albumId,pageNum):
#声明变量
trackList = []
# 音频列地址
url = 'https://www.ximalaya.com/revision/album/getTracksList?albumId='+str(albumId
)+'&pageNum='+str(pageNum)
#请求地址
resp = requests.get(url,headers = headers)
#获取内容json
result = resp.json()
#取值判断是否200
if result['ret'] == 200:
#json取值
tracks = result['data']['tracks']
#循环数组
for track in tracks:
#拼接字符
trackList.append({'trackId':track['trackId'],'title': track['title'] })
return trackList
#获得每个连接的json
def get_track_url(trackId):
url = 'https://www.ximalaya.com/revision/play/tracks?trackIds='+str(trackId)
resp = requests.get(url,headers = headers)
result = resp.json()
if result['ret'] == 200:
tracksForAudioPlay = result['data']['tracksForAudioPlay']
if len(tracksForAudioPlay) > 0:
#对每个连接获取src音频路径
return tracksForAudioPlay[0]['src']
#下载
def download_track(url,file):
resp = requests.get(url,headers = headers,stream = True)
# 将他拷贝到本地文件 w 写 b 二进制 wb代表写入二进制文本
with open(file,'wb') as f:
for data in resp.iter_content(chunk_size = 1024):
if data:
f.write(data)
#主程序入口
#本次爬虫是通过接口爬取,参考文章:cnblogs.com/wuliqv/p/9386143.html
if __name__ == '__main__':
albumId = 7620048
pageNum = 1
dir = 'C:/Users/21at/Desktop/python/temp/'
#获取url连接json
trackList = get_track_list(albumId,pageNum)
print('trackList'+str(trackList))
for track in trackList:
#通过获取每个id拼接获取路径后截取src
trackUrl = get_track_url(track['trackId'])
if trackUrl:
#截取
ext = trackUrl[trackUrl.rindex('.'):]
file_path = dir + track['title'] + ext
print('正在下载'+trackUrl)
#下载
download_track(trackUrl,file_path)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。