当前位置:   article > 正文

python-每日一练-抽取某本书的前50条短评内容并计算评分的平均值_python里面用requires如何提取前五十信息

python里面用requires如何提取前五十信息

python-每日一练-抽取某本书的前50条短评内容并计算评分的平均值 

  1. '''
  2. 抽取某本书的前50条短评内容并计算评分的平均值。提示:有的评论中并不包含评分。
  3. '''
  4. import requests
  5. import re
  6. from bs4 import BeautifulSoup
  7. import time
  8. from functools import reduce
  9. headers = {
  10. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
  11. 'Cookie': 'll="118159"; bid=Tlj9LZXK6qY; __utmc=30149280; __utmc=81379588; gr_user_id=4cc23846-8110-487a-9875-d2c22a01ffc5; _vwo_uuid_v2=DDF9B30D066068B64AEED05150ED2CC21|59ca475fd37ba868d8b940b30cb8051c; __gads=ID=30286cce13d6257e-222fedbd9fd00006:T=1644908408:RT=1644908408:S=ALNI_Mbxvt-k0lb2MC6tAcI60A5qPobCCw; __yadk_uid=OU6iPv3WVtSb7pup23vqweBrH4Gj0jdG; _ga=GA1.1.1475140875.1644908471; refer_url=https://read.douban.com/category/105; viewed="35630000_35620000_35610000_35690000_10790000_10800000_10900000_11000000_11000044_11000544"; dbcl2="191997283:aBzcj8HkRFg"; ck=7X40; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03=9240ed32-5eb3-42cf-941a-95cdfa3e0efa; gr_cs1_9240ed32-5eb3-42cf-941a-95cdfa3e0efa=user_id%3A1; __utma=30149280.1907998096.1644908381.1644913515.1644932425.3; __utmz=30149280.1644932425.3.2.utmcsr=read.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt_douban=1; __utmz=81379588.1644932425.3.2.utmcsr=read.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utma=81379588.181811108.1644908384.1644913517.1644932425.3; __utmt=1; _pk_ref.100001.3ac3=%5B%22%22%2C%22%22%2C1644932425%2C%22https%3A%2F%2Fread.douban.com%2F%22%5D; _pk_ses.100001.3ac3=*; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03_9240ed32-5eb3-42cf-941a-95cdfa3e0efa=true; push_doumail_num=0; push_noty_num=0; _ga_RXNMP372GL=GS1.1.1644931471.4.1.1644932479.60; __utmt=1; __utmv=30149280.19199; __utmb=30149280.9.10.1644932425; __utmb=81379588.5.10.1644932425; _pk_id.100001.3ac3=5c3c19525f813f77.1644908384.3.1644932627.1644914955.'
  12. }
  13. # 获取短评评分
  14. def get_shorts(arr,num,start):
  15. time.sleep(1)
  16. patter = re.compile(pattern='"user-stars allstar(.*) rating"')
  17. r = requests.get('https://book.douban.com/subject/26873486/comments/?start={}&limit=20&status=P&sort=new_score'.format(str(start)),headers = headers)
  18. if r.status_code == 200 :
  19. market = r.text
  20. soup = BeautifulSoup(market,'lxml')
  21. datas = soup.find_all('li','comment-item')
  22. data_lens = len(datas)
  23. # print(data_lens)
  24. for i in range(0,data_lens):
  25. data = datas[i]
  26. data_str = str(data)
  27. patt_rs = re.findall(patter,data_str)
  28. if patt_rs and len(arr)<num:
  29. # 获取短评评分
  30. score = int(patt_rs[0])
  31. # 获取短评内容
  32. short = data.find('span','short')
  33. # print(patt_rs)
  34. # print(short)
  35. arr.append(score)
  36. return arr,start
  37. else:
  38. print('获取失败',r.status_code)
  39. return arr,start
  40. # 评分列表
  41. arr_score = []
  42. # 获取评分数量
  43. score_num = 50
  44. # 起始页数
  45. start = 0
  46. # 获取50条短评评分
  47. while len(arr_score)<50:
  48. arr,start = get_shorts(arr_score,score_num,start)
  49. start+=1
  50. # 获取短评评分平均分
  51. avg_score = reduce(lambda x,y:x+y,arr_score)//len(arr_score)
  52. # 打印
  53. print('短评评分平均分为:',avg_score)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/389726
推荐阅读
相关标签
  

闽ICP备14008679号