赞
踩
- import requests
- from lxml import etree
- def get_one_page(url):#得到1页的数据
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
- }
- response=requests.get(url)
- if response.status_code == 200: # 页面正常响应
- return response.text # 返回页面源代码
- return None
- def save_data():
- for i in range(0,10):#多页爬取
- url = 'https://maoyan.com/board/4?offset='+str(i*10)
- html = get_one_page(url)
- s = etree.HTML(html)
- titles = s.xpath('//*[@id="app"]/div/div/div/dl/dd/div/div/div[1]/p[1]/a/text()')#返回的是一个列表
- for title in titles:#把数据每一条提取出来并保存数据
- print(title)
- with open('manyan1.txt','a',encoding='utf8') as f:#保存爬取的数据到txt文档
- f.write(title+'\n')
- if __name__ =='__main__':
- save_data()#开始爬虫
1.本文利用Requests与XPATH爬取猫眼电影TOP100的榜单
2.由于本人初学爬虫有什么不对的地方请指正
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。