赞
踩
效果如图:
和驴妈妈的同理,修改data中id就可以对其他景点进行爬虫
- from gevent import monkey
- monkey.patch_all()
- import gevent
- import openpyxl
- import requests
- import time
-
- finishPage = 0
- allList = []
- page = 2
- def comment(sightId,page):
- url = "https://piao.qunar.com/ticket/detailLight/sightCommentList.json"
- params = {
- "sightId":str(sightId),
- "index":str(page),
- "page":str(page),
- "pageSize":"10",
- "tagType":"0",
- }
- headers = {
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
- }
- res = requests.get(url=url,headers=headers,params=params,timeout=5)
- #判断服务器返回数据是否正确
- while res.text[1] == "r":
- res = requests.get(url=url,headers=headers,params=params,timeout=5)
- else:
- pass
-
- results = res.json()["data"]
- for result in results["commentList"]:
- #评论者id
- author = result["author"]
- #评论日期
- publishedDate = result["date"]
- #总评分
- score = result["score"]
- #图片数量
- imgNum = len(result["imgs"])
- #评论内容
- text = result["content"]
-
- commentList = [author,publishedDate,score,imgNum,text]
- allList.append(commentList)
- print(commentList)
- time.sleep(5)
-
- def storage(name,reviewsList):
- header = ['评论者ID','评论日期','总评分','图片数量','文本评论']
- wb = openpyxl.Workbook()
- sheet = wb.active
- sheet.title = "commentInfo"
- sheet.append(header)
- for reviewList in reviewsList:
- sheet.append(reviewList)
- wb.save("存储/去哪儿 " + name + "'s "+ 'comment.xlsx')
-
- if __name__ == "__main__":
- taskList = []
- for i in range(1,301):
- try:
- # task = gevent.spawn(comment,191026,i)
- # taskList.append(task)
- # gevent.joinall(taskList)
- comment(191026,i)
- except:
- pass
-
- storage("windows of the world",allList)
- print(len(allList))
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。