1. 爬虫
JD.py
- import requests
- from urllib.parse import quote
- from urllib.parse import urlencode
- from lxml import etree
- import logging
- import json
- import time
-
- class JDSpider:
- # 爬虫实现类:传入商品类别(如手机、电脑),构造实例。然后调用getData爬取数据
- def __init__(self, categlory):
- self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % (quote(categlory)) # jD起始搜索页面
- self.commentBaseUrl = "https://club.jd.com/comment/productPageComments.action?"
- self.headers = {
- "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
- }
- self.productsId = self.getId()
- self.comtype = {0: "nagetive", 1: "medium