当前位置:   article > 正文

爬虫——python爬取京东商品用户评价_爬取京东商品评论

爬取京东商品评论

以小米手环7为例,分别爬取小米手环7用户评价中的好评、中评、差评

使用工具:PyCharm Community

需要python库:requests

安装方法:File-->Settings-->Project -->Python Interpreter

代码如下:

好评:

  1. import requests
  2. if __name__ == "__main__":
  3.     #爬取好评100页
  4. for page in range(0,100):
  5. url = 'https://club.jd.com/comment/productPageComments.action'
  6. param = {
  7. 'productId': '100039939514',
  8. 'score': '3', #好评score
  9. 'sortType': '5',
  10. 'page': page,
  11. 'pageSize': '10',
  12. 'isShadowSku': '0',
  13. 'fold': '1',
  14. }
  15.     #UA伪装
  16. headers = {
  17. 'cookie': 'shshshfpa=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; shshshfpb=cYvrYbQje1MA2t7vxC5UUEw; __jdv=76161171|direct|-|none|-|1679360388347; __jdu=1677051379551729066919; areaId=14; PCSYCityID=CN_340000_340100_0; shshshfpx=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; __jda=122270672.1677051379551729066919.1677051379.1677240645.1679360388.14; __jdc=122270672; jsavif=1; shshshfp=560297ae18037fe111337616ab2a555f; token=06336cfeaa30940f5c417f6798e29f98,2,932978; __tk=115a0c213a52a38c2ce94507d97fc721,2,932978; ipLoc-djd=14-1116-3431-57939; 3AB9D23F7A4B3C9B=T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGM; jwotest_product=99; CA1AN5BV0CA8DS2EPC=166bb245180140fcb233e32ead6800cb; PCA9D23F7A4B3CSS=7fe8a2d8af887bd902df1a00848ab151; 3AB9D23F7A4B3CSS=jdd03T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGMAAAAMHAHEIWMQAAAAACMTZKWKC62MG3AX; _gia_d=1; shshshsID=55c4b556288dea72398a8eb93ef6dc03_8_1679362134063; __jdb=122270672.9.1677051379551729066919|14.1679360388; JSESSIONID=D6FE691B40A1D1D5386BAA5EDD77C29D.s1',
  18. 'referer': 'https://item.jd.com/',
  19. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
  20. }
  21. response = requests.get(url=url, params=param, headers=headers)
  22. for index in response.json()['comments']:
  23. content = index['content']
  24. print(content)
  25. with open('good_comments2.txt', mode='a', encoding='utf-8') as fp:
  26. fp.write(content)
  27. fp.write('\n')

中评:

  1. import requests
  2. if __name__ == "__main__":
  3.     #爬取中评55页
  4. for page in range(0,55):
  5. url = 'https://club.jd.com/comment/productPageComments.action'
  6. param = {
  7. 'productId': '100023000435',
  8. 'score': '2', #中评score
  9. 'sortType': '5',
  10. 'page': page,
  11. 'pageSize': '10',
  12. 'isShadowSku': '0',
  13. 'fold': '1',
  14. }
  15. headers = {
  16. 'cookie': 'shshshfpa=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; shshshfpb=cYvrYbQje1MA2t7vxC5UUEw; __jdv=76161171|direct|-|none|-|1679360388347; __jdu=1677051379551729066919; areaId=14; PCSYCityID=CN_340000_340100_0; shshshfpx=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; __jda=122270672.1677051379551729066919.1677051379.1677240645.1679360388.14; __jdc=122270672; jsavif=1; shshshfp=560297ae18037fe111337616ab2a555f; token=06336cfeaa30940f5c417f6798e29f98,2,932978; __tk=115a0c213a52a38c2ce94507d97fc721,2,932978; ipLoc-djd=14-1116-3431-57939; 3AB9D23F7A4B3C9B=T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGM; jwotest_product=99; CA1AN5BV0CA8DS2EPC=166bb245180140fcb233e32ead6800cb; PCA9D23F7A4B3CSS=7fe8a2d8af887bd902df1a00848ab151; 3AB9D23F7A4B3CSS=jdd03T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGMAAAAMHAHEIWMQAAAAACMTZKWKC62MG3AX; _gia_d=1; shshshsID=55c4b556288dea72398a8eb93ef6dc03_8_1679362134063; __jdb=122270672.9.1677051379551729066919|14.1679360388; JSESSIONID=D6FE691B40A1D1D5386BAA5EDD77C29D.s1',
  17. 'referer': 'https://item.jd.com/',
  18. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
  19. }
  20. response = requests.get(url=url, params=param, headers=headers)
  21. for index in response.json()['comments']:
  22. content = index['content']
  23. with open('middle_comments.txt', mode='a', encoding='utf-8') as fp:
  24. fp.write(content)
  25. fp.write('\n')

差评:

  1. import requests
  2. if __name__ == "__main__":
  3.     #爬取差评69页
  4. for page in range(0,69):
  5. url = 'https://club.jd.com/comment/productPageComments.action'
  6. param = {
  7. 'productId': '100023203263',
  8. 'score': '1', #差评score
  9. 'sortType': '5',
  10. 'page': page,
  11. 'pageSize': '10',
  12. 'isShadowSku': '0',
  13. 'fold': '1',
  14. }
  15. headers = {
  16. 'cookie': 'shshshfpa=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; shshshfpb=cYvrYbQje1MA2t7vxC5UUEw; __jdv=76161171|direct|-|none|-|1679360388347; __jdu=1677051379551729066919; areaId=14; PCSYCityID=CN_340000_340100_0; shshshfpx=353c3350-9f6e-c6e4-75c2-e45fb0638a20-1677122793; __jda=122270672.1677051379551729066919.1677051379.1677240645.1679360388.14; __jdc=122270672; jsavif=1; shshshfp=560297ae18037fe111337616ab2a555f; token=06336cfeaa30940f5c417f6798e29f98,2,932978; __tk=115a0c213a52a38c2ce94507d97fc721,2,932978; ipLoc-djd=14-1116-3431-57939; 3AB9D23F7A4B3C9B=T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGM; jwotest_product=99; CA1AN5BV0CA8DS2EPC=166bb245180140fcb233e32ead6800cb; PCA9D23F7A4B3CSS=7fe8a2d8af887bd902df1a00848ab151; 3AB9D23F7A4B3CSS=jdd03T6XOSS2CQO2OX3CXET3VGDVF7I5HMHLXB4ZJR7Y73ZLZJCFPBJOSJNGAPFVEW5DQB6OJQEHGFPLPICSY2LRQX6UUGMAAAAMHAHEIWMQAAAAACMTZKWKC62MG3AX; _gia_d=1; shshshsID=55c4b556288dea72398a8eb93ef6dc03_8_1679362134063; __jdb=122270672.9.1677051379551729066919|14.1679360388; JSESSIONID=D6FE691B40A1D1D5386BAA5EDD77C29D.s1',
  17. 'referer': 'https://item.jd.com/',
  18. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
  19. }
  20. response = requests.get(url=url, params=param, headers=headers)
  21. for index in response.json()['comments']:
  22. content = index['content']
  23. with open('bad_comments.txt', mode='a', encoding='utf-8') as fp:
  24. fp.write(content)
  25. fp.write('\n')

其中重要参数来源:

打开开发者工具,快捷键F12键,或鼠标右键-->检查-->网络

url = 'https://club.jd.com/comment/productPageComments.action'

不包括?号后参数

param参数:

headers:标头-->请求标头:cookie、referer、user-agent

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/384563
推荐阅读
相关标签
  

闽ICP备14008679号