赞
踩
- import requests
- from bs4 import BeautifulSoup
- # 1.请求url
- def getData(url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.67'
- }
response = requests.get(url=url, headers=headers)
bs = BeautifulSoup(response.content, 'html5lib')
short_list = bs.find_all("span", attrs={"class": "short"})
for short in short_list:
- content = short.text
- print(content)
- if __name__ == '__main__':
- for i in range(1): # 打印的次数
- baseurl = 'https://movie.douban.com/subject/30174085/comments?sort=new_score&status=P'
- baseurl = baseurl.format(i * 20) # 打印的范围(条数)
- # 循环调用爬取的方法
- getData(baseurl)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。