当前位置:   article > 正文

汽车之家评论

汽车之家评论
import csv
import os

import requests
from lxml import etree
import re



class Spider():
    def __init__(self):
        self.headers = {
            "authority": "www.autohome.com.cn",
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
            "cache-control": "no-cache",
            "pragma": "no-cache",
            "referer": "https://www.autohome.com.cn/beijing/",
            "sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Microsoft Edge\";v=\"122\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\"",
            "sec-fetch-dest": "document",
            "sec-fetch-mode": "navigate",
            "sec-fetch-site": "same-origin",
            "sec-fetch-user": "?1",
            "upgrade-insecure-requests": "1",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
        }
        self.cookies = {
            "fvlid": "1708592005308QmeCfkEcQf",
            "__ah_uuid_ng": "",
            "sessionuid": "be9bf153-8fda-41f7-99e3-9fbc5172d455",
            "ASP.NET_SessionId": "bmfejflbhqwqxmdwkf5jfuya",
            "ahsids": "5714_5998",
            "historybbsName4": "c-5998%7C%E5%A5%A5%E8%BF%AAA7L%2Cc-5714%7C%E5%AE%8F%E5%85%89MINIEV",
            "historyseries": "5714%2C5998",
            "ahpvno": "21",
            "pvidchain": "2112108,6830286,6861598,2042204,2042204,101075,6830286,6861598,6861421,3454440",
            "ahrlid": "1716956158466JPRl4Pm0jp-1716956304138"
        }
        self.price = ''
        self.count = 0


    def spider_list(self):
        proce_list = ['_0_5','_5_8','_8_15','_15_20','_20_30','_30_50','_50_100','_100_9000']
        proce_list = ['_30_9000']
        for self.price in proce_list:

            url = f"https://www.autohome.com.cn/price/ev/price{self.price}"
            response = requests.get(url, headers=self.headers, cookies=self.cookies).text

            home_html = etree.HTML(response)
            links = home_html.xpath("//li[@class='tw-group tw-relative tw-cursor-pointer tw-overflow-hidden tw-rounded tw-bg-[#F7FAFE] tw-pb-4 tw-text-center tw-text-[#111E36] hover:tw-shadow-[0_8px_32px_0_rgba(17,30,54,0.1)]']/div[@class='tw-mt-1 tw-px-4']/a/@href")[:5]


            for index , link in enumerate(links):
                # 'https://www.autohome.com.cn/5714/#pvareaid=6861421'
                match = re.search(r'www.autohome.com.cn/(\d+)/#pvareaid', link)
                if match:
                    seriesId = match.group(1)  # group(1)捕获第一个括号内的内容
                    pageIndex = 1
                    self.spider_subdata(seriesId , pageIndex , index)


    def spider_subdata(self,seriesId,pageIndex,index):
        url = "https://koubeiipv6.app.autohome.com.cn/pc/series/list"

        params = {
            "pm": "3",
            "seriesId": f"{seriesId}",
            "pageIndex": f"{pageIndex}",
            "pageSize": "20",
            "yearid": "0",
            "ge": "0",
            "seriesSummaryKey": "0",
            "order": "0"
        }
        response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params).json()
        print(response['result']['list'])


        comment_list = response['result']['list']
        for comments in comment_list:
            showId = comments['showId']
            self.spider_detail(showId)

        if response['result']['list'] != []:
            print(f'车型{index+1} : 第{pageIndex}页爬取完毕')
            pageIndex += 1
            self.spider_subdata(seriesId,pageIndex,index)



    def spider_detail(self ,showId):
        url = f"https://k.autohome.com.cn/detail/view_{showId}.html"
        response = requests.get(url, headers=self.headers, cookies=self.cookies).text
        html = etree.HTML(response)

        data = html.xpath("//div[@class='space kb-item']/p/text()")


        # 满意
        satisfied = ",".join(html.xpath("//div[@class='satisfied kb-item']/p/text()")).replace('\n','').strip()
        # 不满意
        unsatis = ",".join(html.xpath("//div[@class='unsatis kb-item']/p/text()")).replace('\n','').strip()

        space = self.check_comment(data , 0)# 空间
        feel = self.check_comment(data , 1)# 驾驶感受
        endurance = self.check_comment(data , 2)# 续航
        appearance = self.check_comment(data , 3)# 外观
        trim = self.check_comment(data , 4)# 内饰
        costPerformance = self.check_comment(data , 5)# 性价比
        intelligentize = self.check_comment(data , 6)# 智能化

        data_list = [satisfied,unsatis,space,feel,endurance,appearance,trim,costPerformance,intelligentize]

        if len(data) == 7:
            self.count += 1
            print(f"爬取数量{self.count} , 数据 : {data_list}")
            self.save_data_to_csv(data_list)




    def check_comment(self , data , count):
        try:
            result = data[count].replace('\n','')
        except:
            result = ''
        return result


    def save_data_to_csv(self,data_list):
        filename = f'{self.price}.csv'

        name_headers = [ '最满意', '最不满意', '空间', '驾驶感受', '续航', ' 外观', '内饰', '性价比', '智能化' ]

        if not os.path.isfile(filename):
            with open(f'{filename}', 'a', encoding='utf-8-sig', newline='')as f:
                csv_write = csv.DictWriter(f, fieldnames=name_headers)
                csv_write.writeheader()
        else:
            with open(f'{filename}', 'a', encoding='utf-8', newline='')as f:
                csv_write = csv.DictWriter(f, fieldnames=data_list)
                csv_write.writeheader()







if __name__ == '__main__':
    spider = Spider()
    spider.spider_list()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/羊村懒王/article/detail/659762
推荐阅读
相关标签
  

闽ICP备14008679号