当前位置:   article > 正文

【Python】基于requests库对去哪儿景点评论信息的爬取_爬去哪儿网评论

爬去哪儿网评论

效果如图:

和驴妈妈的同理,修改data中id就可以对其他景点进行爬虫

  1. from gevent import monkey
  2. monkey.patch_all()
  3. import gevent
  4. import openpyxl
  5. import requests
  6. import time
  7. finishPage = 0
  8. allList = []
  9. page = 2
  10. def comment(sightId,page):
  11. url = "https://piao.qunar.com/ticket/detailLight/sightCommentList.json"
  12. params = {
  13. "sightId":str(sightId),
  14. "index":str(page),
  15. "page":str(page),
  16. "pageSize":"10",
  17. "tagType":"0",
  18. }
  19. headers = {
  20. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
  21. }
  22. res = requests.get(url=url,headers=headers,params=params,timeout=5)
  23. #判断服务器返回数据是否正确
  24. while res.text[1] == "r":
  25. res = requests.get(url=url,headers=headers,params=params,timeout=5)
  26. else:
  27. pass
  28. results = res.json()["data"]
  29. for result in results["commentList"]:
  30. #评论者id
  31. author = result["author"]
  32. #评论日期
  33. publishedDate = result["date"]
  34. #总评分
  35. score = result["score"]
  36. #图片数量
  37. imgNum = len(result["imgs"])
  38. #评论内容
  39. text = result["content"]
  40. commentList = [author,publishedDate,score,imgNum,text]
  41. allList.append(commentList)
  42. print(commentList)
  43. time.sleep(5)
  44. def storage(name,reviewsList):
  45. header = ['评论者ID','评论日期','总评分','图片数量','文本评论']
  46. wb = openpyxl.Workbook()
  47. sheet = wb.active
  48. sheet.title = "commentInfo"
  49. sheet.append(header)
  50. for reviewList in reviewsList:
  51. sheet.append(reviewList)
  52. wb.save("存储/去哪儿 " + name + "'s "+ 'comment.xlsx')
  53. if __name__ == "__main__":
  54. taskList = []
  55. for i in range(1,301):
  56. try:
  57. # task = gevent.spawn(comment,191026,i)
  58. # taskList.append(task)
  59. # gevent.joinall(taskList)
  60. comment(191026,i)
  61. except:
  62. pass
  63. storage("windows of the world",allList)
  64. print(len(allList))
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/372254
推荐阅读
相关标签
  

闽ICP备14008679号