赞
踩
博主几天前发布的文章,几天后又有了新的解决思路:
完整的代码放在文章最后 用手机端抓取
首先请求到这个页面
我们的目标抓取景点名 详情链接 评分 景区等级 门票价格等数据
如果我们选择在主页面(上面这个页面)抓取 景点名和详情链接都可以捕获到,但是有的景区没有评分和等级-----------所以造成的困惑是后期用zip方法处理数据没有办法一一对应,因此我抓到详情链接 然后再通过每一个的详情链接去请求抓取需要的数据
1.先请求主页面得到详情链接
2.通过详情链接去请求子页面
兄弟们大坑来了---------得到的详情链接请求的子页面数据是动态加载的,也就意味着我们没有办法抓到评分等级这类的数据
我是这样做的:
先随便点开一个子链接 按F12 打开抓包工具
这个包的请求参数 poild 要改变
这个poild可以在详情页链接得到 于是我用正则将他匹配下来,带着他去请求这个url
这个数据包里只有景区评分和景区等级 没有景区价格 但是景区价格也是同样的方法,先找到这个包,然后带着poild去匹配就可以了
现在需求升级:
我现在需要全国的景区景点 很简单,先检索到全国的 城市id 然后改变参数
只要改变这个参数就可以
城市id我选择从景区景点介入
有大坑来了
这个数据包所返回的是字符串,没有办法用
返回的数据因为pycharm的原因是个字符串
解决方法:
用正则匹配 代码
momos = re.findall(r'"districtId":(.*?),.*?"districtName":"(.*?)",',data,re.S) |
城市id也有了,看我最后的代码
import mysql.connector import requests import json from jsonpath import jsonpath import sys import re import random import time from fake_useragent import UserAgent class XCspider(): def __init__(self): self.randomtime = random.randint(1,2) self.UA = UserAgent() self.conn = mysql.connector.connect(host='localhost',user='root',password='123456', database='sjb',auth_plugin='mysql_native_password') self.mysql_cursor = self.conn.cursor() ip = [ # '115.209.115.169:4256', # '117.93.79.97:4232', # '117.87.64.192:4231', # '114.99.13.191:4226', # '36.6.69.198:4278', # '59.58.49.214:4231', # '121.61.195.9:4231', # '115.229.206.57:4231', # '175.162.210.135:4231', # '120.35.176.91:4213', # '110.230.217.127:4223', # '60.173.24.129:4225', # '183.92.219.224:4225', # '223.156.86.132:4231', # '1.199.199.253:4247', # '42.59.111.242:4285', # '121.226.152.98:4236', # '27.157.230.186:4216', # '222.77.213.163:4245', # '220.168.239.10:4231', # '113.218.241.238:4231', # '1.49.231.217:4267', # '117.95.200.103:4231', # '27.44.37.158:4231', # '122.241.27.32:4231', '113.235.166.147:4278', '112.85.232.108:4278', '49.88.149.57:4263', '106.110.198.7:4236', ] self.ip = { 'https':random.choice(ip) } self.url = "https://m.ctrip.com/restapi/soa2/18254/json/getAttractionList" self.payload = { "index": 1, "count": 20, "sortType": 1, "isShowAggregation": True, "districtId": 2, "scene": "DISTRICT", "pageId": "214062", "traceId": "1dcf2c53-e847-7d59-c4a9-637959404701", "extension": [ { "name": "osVersion", "value": "6.0" }, { "name": "deviceType", "value": "android" } ], "filter": { "filterItems": [] }, "crnVersion": "2020-09-01 22:00:45", "isInitialState": True, "head": { "cid": "09031137218199825303", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888", "syscode": "09", "xsid": "", "extension": [] } } self.headers = { 'Connection': 'keep-alive', 'cookieOrigin': 'https://m.ctrip.com', # 'User-Agent': random.choice(self.UA), 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', 'content-type': 'application/json', 'Accept': '*/*', 'Origin': 'https://m.ctrip.com', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Dest': 'empty', 'Referer': 'https://m.ctrip.com/webapp/you/gspoi/sight/2.html?seo=0&allianceid=4897&sid=130026&isHideNavBar=YES&from=https%3A%2F%2Fm.ctrip.com%2Fwebapp%2Fyou%2Fgsdestination%2Fplace%2F2.html%3Fseo%3D0%26ishideheader%3Dtrue%26secondwakeup%3Dtrue%26dpclickjump%3Dtrue%26allianceid%3D4897%26sid%3D130026%26from%3Dhttps%253A%252F%252Fm.ctrip.com%252Fhtml5%252F', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': 'GUID=09031137218199825303; MKT_CKID=1655441050631.y3cpg.1v7v; _RSG=.mK7Vo8KIkFkJFbXO64ek8; _RGUID=fb92508c-9f1d-4c3e-8ba8-10662194f4ee; _RDG=2894d2e5596bd8261b1bd4f0d138733ac9; _bfaStatusPVSend=1; nfes_isSupportWebP=1; nfes_isSupportWebP=1; _ga=GA1.2.1595398136.1655441061; ibu_h5_lang=en; ibu_h5_local=en-us; StartCity_Pkg=PkgStartCity=25; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.2020737530.1655798638; MKT_OrderClick=ASID=4897130026&AID=4897&CSID=130026&OUID=&CT=1655883035853&CURL=https%3A%2F%2Fm.ctrip.com%2Fwebapp%2Fyou%2Fgspoi%2Fsight%2F2.html%3Fseo%3D0%26allianceid%3D4897%26sid%3D130026%26isHideNavBar%3DYES%26from%3Dhttps%253A%252F%252Fm.ctrip.com%252Fwebapp%252Fyou%252Fgsdestination%252Fplace%252F2.html%253Fseo%253D0%2526ishideheader%253Dtrue%2526secondwakeup%253Dtrue%2526dpclickjump%253Dtrue%2526allianceid%253D4897%2526sid%253D130026%2526from%253Dhttps%25253A%25252F%25252Fm.ctrip.com%25252Fhtml5%25252F&VAL={"h5_vid":"1655441049907.gadsg"}; Session=smartlinkcode=U130026&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_CKID_LMT=1655959369643; __zpspc=9.8.1655959369.1655959369.1%232%7Cwww.baidu.com%7C%7C%7C%7C%23; _jzqco=%7C%7C%7C%7C1655959370213%7C1.828646019.1655441050624.1655606559937.1655959369657.1655606559937.1655959369657.undefined.0.0.10.10; _bfs=1.1; _RF1=183.253.26.192; appFloatCnt=1; _bfi=p1%3D102001%26p2%3D0%26v1%3D237%26v2%3D0; _bfaStatus=success; _gat=1; MKT_Pagesource=H5; mktDpLinkSource=ullink; _pd=%7B%22r%22%3A20%2C%22d%22%3A253%2C%22_d%22%3A233%2C%22p%22%3A253%2C%22_p%22%3A0%2C%22o%22%3A254%2C%22_o%22%3A1%2C%22s%22%3A255%2C%22_s%22%3A1%7D; _bfa=1.1655441049907.gadsg.1.1655886903531.1655959404092.24.240.214059; _ubtstatus=%7B%22vid%22%3A%221655441049907.gadsg%22%2C%22sid%22%3A24%2C%22pvid%22%3A240%2C%22pid%22%3A%22214059%22%7D; Union=OUID=&AllianceID=4897&SID=130026&SourceID=&createtime=1655959405&Expires=1656564204559; GUID=09031168218403899804' } # 获取所有城市id 还是通过酒店去抓 获取城市id # def cityId(self,a): # # 发送请求 # for i in a: # if i == 21880: # continue # # url = "https://m.ctrip.com/webapp/hotel/j/hoteldetail/dianping/api/static/city" # headers = { # 'Cookie': 'JSESSIONID=C49F4E4E2865153D0814E187AE12B13A; GUID=09031168218403899804' # } # params = { # 'oversea': 'false', # # 更换城市id # 'index': i, # 'pageid': 212092 # } # # # response = requests.get(url,headers=headers,params=params) # response = requests.get(url,headers=headers,params=params,proxies=self.ip) # print('url为',i) # # print(response.json()) # # 解析数据 # # 得到所有的城市id # cityId = jsonpath(response.json(),'$..cityId') # print(cityId) # # 遍历取值 # for it in cityId: # print(it) # self.payload['index'] = 1 # self.payload['districtId'] = it # # 调用方法 # self.sendUrl() # 从景点找到的城市接口 def cityID(self): url = "https://m.ctrip.com/restapi/soa2/17916/json/invokeOnDemand?_fxpcqlniredt=09031137218199825303&x-traceID=09031137218199825303-1656049564481-6585044" payload = { "data": "{\"source\":\"destH5\"}", "serviceName": "DestinationCoreService.getStaticDistrictCategoryList", "head": { "cid": "09031137218199825303", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888", "syscode": "09", "xsid": "", "extension": [] } } headers = { 'cookieOrigin': 'https://m.ctrip.com', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', 'content-type': 'application/json', 'Origin': 'https://m.ctrip.com', 'Referer': 'https://m.ctrip.com/webapp/you/gsdestination/citySelector/citySelector.html?seo=0&districtId=2&allianceid=4897&sid=155952&isHideNavBar=YES&from=https%3A%2F%2Fm.ctrip.com%2Fwebapp%2Fyou%2Fgsdestination%2Fplace%2F2.html%3Fseo%3D0%26ishideheader%3Dtrue%26secondwakeup%3Dtrue%26dpclickjump%3Dtrue%26allianceid%3D4897%26sid%3D155952%26ouid%3Dindex%26from%3Dhttps%253A%252F%252Fm.ctrip.com%252Fhtml5%252F', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': 'GUID=09031137218199825303; MKT_CKID=1655441050631.y3cpg.1v7v; _RSG=.mK7Vo8KIkFkJFbXO64ek8; _RGUID=fb92508c-9f1d-4c3e-8ba8-10662194f4ee; _RDG=2894d2e5596bd8261b1bd4f0d138733ac9; _bfaStatusPVSend=1; nfes_isSupportWebP=1; nfes_isSupportWebP=1; _ga=GA1.2.1595398136.1655441061; ibu_h5_lang=en; ibu_h5_local=en-us; StartCity_Pkg=PkgStartCity=25; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.2020737530.1655798638; Session=smartlinkcode=U130026&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_Pagesource=H5; _jzqco=%7C%7C%7C%7C1655959370213%7C1.828646019.1655441050624.1655967880712.1655981364160.1655967880712.1655981364160.undefined.0.0.12.12; __zpspc=9.10.1655981364.1655981364.1%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23; appFloatCnt=3; _bfi=p1%3D102001%26p2%3D0%26v1%3D272%26v2%3D0; _bfaStatus=success; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1655985078165&CURL=https%3A%2F%2Fm.ctrip.com%2Fhtml5%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={"h5_vid":"1655441049907.gadsg"}; librauuid=; _RF1=183.253.27.72; mktDpLinkSource=ullink; _pd=%7B%22r%22%3A5%2C%22d%22%3A145%2C%22_d%22%3A140%2C%22p%22%3A146%2C%22_p%22%3A1%2C%22o%22%3A149%2C%22_o%22%3A3%2C%22s%22%3A155%2C%22_s%22%3A6%7D; _bfa=1.1655441049907.gadsg.1.1655977855790.1656049564258.32.291.214059; _ubtstatus=%7B%22vid%22%3A%221655441049907.gadsg%22%2C%22sid%22%3A32%2C%22pvid%22%3A291%2C%22pid%22%3A%22214059%22%7D; Union=OUID=&AllianceID=4897&SID=155952&SourceID=&createtime=1656049564&Expires=1656654364431; GUID=09031168218403899804' } response = requests.post(url,headers=headers, json=payload) # print(response.json()) data = jsonpath(response.json(),'$..data')[0] # print(data) momos = re.findall(r'"districtId":(.*?),.*?"districtName":"(.*?)",',data,re.S) for a,b in momos: # print(a) print(b) self.payload['districtId'] = a self.payload['index'] = 1 time.sleep(1) self.sendUrl() # 得到cityId方法返回的url def sendUrl(self): # 先取两页看看 # while True: for i in range(1,3): num = self.payload['index'] print(f'第{num}页') time.sleep(1) self.payload['index']=num + 1 try: response = requests.post(self.url, headers=self.headers, json=self.payload,proxies=self.ip) # response = requests.post(self.url, headers=self.headers, json=self.payload) # 调用方法 self.jieXi(response.json()) except: # 设置递归深度 sys.setrecursionlimit(5) return self.sendUrl() # 通过第一次解析得到的详情页url---去请求详情页 请求详情页链接主要是为了得到 poild # 评分 等级 url 通过详情页url解析得到 # 这个方法是去请求含有 评分和景区等级的url 通过 self.jieXi() 得到链接,然后用正则,得到的poild def ziUrl(self,poild): try: it = self.payload['districtId'] url = "https://m.ctrip.com/restapi/soa2/18254/json/GetSightOverview" payload = json.dumps({ "useSightExtend": True, "districtId": it, "scene": "basic", "head": { "cid": "09031137218199825303", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888", "syscode": "09", "xsid": "", "extension": [] }, "poiId": poild }) headers = { 'authority': 'm.ctrip.com', 'cookieorigin': 'https://m.ctrip.com', 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', 'content-type': 'application/json', 'accept': '*/*', 'origin': 'https://m.ctrip.com', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'https://m.ctrip.com/webapp/you/gspoi/sight/2/0.html?poiId=75627&seo=0&isHideNavBar=YES', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': 'GUID=09031137218199825303; MKT_CKID=1655441050631.y3cpg.1v7v; _RSG=.mK7Vo8KIkFkJFbXO64ek8; _RGUID=fb92508c-9f1d-4c3e-8ba8-10662194f4ee; _RDG=2894d2e5596bd8261b1bd4f0d138733ac9; _bfaStatusPVSend=1; nfes_isSupportWebP=1; nfes_isSupportWebP=1; _ga=GA1.2.1595398136.1655441061; ibu_h5_lang=en; ibu_h5_local=en-us; StartCity_Pkg=PkgStartCity=25; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.2020737530.1655798638; Session=smartlinkcode=U130026&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_CKID_LMT=1655959369643; __zpspc=9.8.1655959369.1655959369.1%232%7Cwww.baidu.com%7C%7C%7C%7C%23; _jzqco=%7C%7C%7C%7C1655959370213%7C1.828646019.1655441050624.1655606559937.1655959369657.1655606559937.1655959369657.undefined.0.0.10.10; _RF1=183.253.26.192; appFloatCnt=1; _bfi=p1%3D102001%26p2%3D0%26v1%3D237%26v2%3D0; _bfaStatus=success; MKT_Pagesource=H5; librauuid=; Union=OUID=&AllianceID=4897&SID=130026&SourceID=&AppID=&OpenID=&exmktID=&createtime=1655963140&Expires=1656567939691; MKT_OrderClick=ASID=4897130026&AID=4897&CSID=130026&OUID=&CT=1655963139698&CURL=https%3A%2F%2Fm.ctrip.com%2Fwebapp%2Fyou%2Fgspoi%2Fsight%2F2.html%3Fseo%3D0%26allianceid%3D4897%26sid%3D130026%26isHideNavBar%3DYES%26from%3Dhttps%253A%252F%252Fm.ctrip.com%252Fwebapp%252Fyou%252Fgsdestination%252Fplace%252F2.html%253Fseo%253D0%2526ishideheader%253Dtrue%2526secondwakeup%253Dtrue%2526dpclickjump%253Dtrue%2526allianceid%253D4897%2526sid%253D130026%2526from%253Dhttps%25253A%25252F%25252Fm.ctrip.com%25252Fhtml5%25252F&VAL={"h5_vid":"1655441049907.gadsg"}; hotelhst=1164390341; _pd=%7B%22r%22%3A18%2C%22d%22%3A289%2C%22_d%22%3A271%2C%22p%22%3A290%2C%22_p%22%3A1%2C%22o%22%3A292%2C%22_o%22%3A2%2C%22s%22%3A293%2C%22_s%22%3A1%7D; _bfa=1.1655441049907.gadsg.1.1655886903531.1655963157778.24.250.214070; _ubtstatus=%7B%22vid%22%3A%221655441049907.gadsg%22%2C%22sid%22%3A24%2C%22pvid%22%3A250%2C%22pid%22%3A214070%7D; GUID=09031168218403899804' } # response = requests.post(url, headers=headers, data=payload) response = requests.post(url, headers=headers, data=payload,proxies=self.ip) # print(response.json()) return response.json() # 返回子页面数据 except: sys.setrecursionlimit(5) return self.ziUrl(poild) # 价格url 这个也是一样的 跟self.ziUrl()一个道理 def priceUrl(self,poild): try: it = self.payload['districtId'] # 这个意思是 如果我请求的是北京,那么我下面请求的城市id也必须是北京,要保持一致 url = "https://m.ctrip.com/restapi/soa2/18254/json/getSightExtendInfo" payload = json.dumps({ "districtId": it, "scene": "basic", "head": { "cid": "09031137218199825303", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888", "syscode": "09", "xsid": "", "extension": [] }, "poiId": poild }) headers = { 'authority': 'm.ctrip.com', 'cookieorigin': 'https://m.ctrip.com', 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', 'content-type': 'application/json', 'accept': '*/*', 'origin': 'https://m.ctrip.com', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', 'referer': 'https://m.ctrip.com/webapp/you/gspoi/sight/2/0.html?poiId=81728&seo=0&isHideNavBar=YES', 'accept-language': 'zh-CN,zh;q=0.9', 'cookie': 'GUID=09031137218199825303; MKT_CKID=1655441050631.y3cpg.1v7v; _RSG=.mK7Vo8KIkFkJFbXO64ek8; _RGUID=fb92508c-9f1d-4c3e-8ba8-10662194f4ee; _RDG=2894d2e5596bd8261b1bd4f0d138733ac9; _bfaStatusPVSend=1; nfes_isSupportWebP=1; nfes_isSupportWebP=1; _ga=GA1.2.1595398136.1655441061; ibu_h5_lang=en; ibu_h5_local=en-us; StartCity_Pkg=PkgStartCity=25; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; _gid=GA1.2.2020737530.1655798638; Session=smartlinkcode=U130026&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; MKT_CKID_LMT=1655959369643; __zpspc=9.8.1655959369.1655959369.1%232%7Cwww.baidu.com%7C%7C%7C%7C%23; _jzqco=%7C%7C%7C%7C1655959370213%7C1.828646019.1655441050624.1655606559937.1655959369657.1655606559937.1655959369657.undefined.0.0.10.10; _RF1=183.253.26.192; appFloatCnt=1; _bfi=p1%3D102001%26p2%3D0%26v1%3D237%26v2%3D0; _bfaStatus=success; MKT_Pagesource=H5; Union=OUID=&AllianceID=4897&SID=130026&SourceID=&AppID=&OpenID=&exmktID=&createtime=1655963140&Expires=1656567939691; MKT_OrderClick=ASID=4897130026&AID=4897&CSID=130026&OUID=&CT=1655963139698&CURL=https%3A%2F%2Fm.ctrip.com%2Fwebapp%2Fyou%2Fgspoi%2Fsight%2F2.html%3Fseo%3D0%26allianceid%3D4897%26sid%3D130026%26isHideNavBar%3DYES%26from%3Dhttps%253A%252F%252Fm.ctrip.com%252Fwebapp%252Fyou%252Fgsdestination%252Fplace%252F2.html%253Fseo%253D0%2526ishideheader%253Dtrue%2526secondwakeup%253Dtrue%2526dpclickjump%253Dtrue%2526allianceid%253D4897%2526sid%253D130026%2526from%253Dhttps%25253A%25252F%25252Fm.ctrip.com%25252Fhtml5%25252F&VAL={"h5_vid":"1655441049907.gadsg"}; librauuid=; hotelhst=1164390341; _pd=%7B%22r%22%3A14%2C%22d%22%3A327%2C%22_d%22%3A313%2C%22p%22%3A328%2C%22_p%22%3A1%2C%22o%22%3A330%2C%22_o%22%3A2%2C%22s%22%3A331%2C%22_s%22%3A1%7D; _bfa=1.1655441049907.gadsg.1.1655886903531.1655966576567.25.253.214070; _ubtstatus=%7B%22vid%22%3A%221655441049907.gadsg%22%2C%22sid%22%3A25%2C%22pvid%22%3A253%2C%22pid%22%3A214070%7D; GUID=09031168218403899804' } # response = requests.post(url, headers=headers, data=payload) response = requests.post(url, headers=headers, data=payload,proxies=self.ip) return response.json() except: sys.setrecursionlimit(5) return self.priceUrl(poild) # 解析景点内容 def jieXi(self,data):# 解析景点名 链接 time.sleep(self.randomtime) try: poiName = jsonpath(data,'$..poiName') # 距市中心 distanceStr = jsonpath(data,'$..distanceStr') # xx地区 displayField = jsonpath(data,'$..displayField') detailUrl = jsonpath(data,'$..detailUrl') for poiNames,detailUrls,distanceStrs,displayFields in zip(poiName,detailUrl,distanceStr,displayField): # print(poiNames) # print(detailUrls) # 请求详情页内部 抓取相关 评分 等级 # 这个正则用来匹配 poild poild = re.findall(r'https.*?poiId=(.*?)&', str(detailUrls), re.S)[0] # 这个方法是返回所有的评分 景区等级 bbb = self.ziUrl(poild) # time.sleep(self.randomtime) # 下面这些个异常处理是因为有的景区没有评分,或是没有等级 # 没有评分,没有等级返回就为 False # 所以要手动更改为 '无’ try: # 评分 commentScore = jsonpath(bbb,'$..commentScore')[0] if commentScore == False: commentScore = '无' except: commentScore = '无' try: # 等级 poiLevel = jsonpath(bbb,'$..poiLevel')[0] poiLevel = str(poiLevel)+'A' if poiLevel == False: poiLevel = '无' except: poiLevel = '无' ccc = self.priceUrl(poild) # time.sleep(self.randomtime) # 解析价格 try: price = jsonpath(ccc,'$..price')[0] if price==False: price = '免费' except: price = '免费' print(poiNames,commentScore,distanceStrs,displayFields,poiLevel,price,detailUrls) # 保存 self.save(poiNames,commentScore,distanceStrs,displayFields,poiLevel,price,detailUrls) except: # 正常退出 print('阿巴阿巴阿巴阿巴阿巴阿巴阿巴') return self.jieXi(data) # 保存内容到数据库 def save(self,a,b,c,d,e,f,g): sql = 'insert into pl(poiName,commentScore,distanceStr,displayField,sightLevelStr,price,detailUrl) values(%s,%s,%s,%s,%s,%s,%s)' val = (a,b,c,d,e,f,g) self.mysql_cursor.execute(sql,val) self.conn.commit() # 执行代码 def start(self): # 26个字母以参数的形式进行传入 # 字母 I O V 没有景点 这又是一个大坑 # 修改列表a即可 self.cityID() if __name__ == '__main__': aaa = XCspider() # aaa.start() aaa.cityID() |
需求完成
只是请求太过频繁,需要调节请求速度
博主速度不会调
懂行的小伙伴,可以在这里留言,我请你吃棒棒糖,双享棒一块钱的哦(5毛的不吃)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。