赞
踩
使用的是python3
需要pip install MySQL
#爬取逻辑
1.根据name查询匹配建筑物
2.通过接口返回的建筑物信息获取建筑物ID
3.用建筑物ID查询边界信息
4.数据返回格式是json字符串,直接转成json处理
5.遍历出边界信息保存
6.存储边界信息
- #coding=utf-8
- import urllib.request
- from urllib.parse import quote
- import time
- import json
- import pymysql
- import random
- import string
-
- COON = pymysql.connect(
- host='127.0.0.1',
- port=3306,
- user='root',
- passwd='root',
- db='navi_scrapy',
- charset='utf8')
-
- pagestart = 1
-
- '''
- 1.数据表增加border字段
- 2.修改数据库链接
- 3.修改对应中的name和city字段
- 4.启动run_gaode_border.py
- '''
-
-
- start_url = 'https://restapi.amap.com/v3/place/text?key=4b86820a7590de60e4f81f53e59ae17f&citylimit=true&output=json&' #开始网址
-
- url = "https://ditu.amap.com/detail/get/detail?id="
-
-
-
- def hello():
- citys = get_data() #查询数据
- print(citys)
- for city in citys:
- tempurl = quote(start_url + "keywords="+str(city[2])+"&city="+str(city[3])+"", safe=string.printable)
- request = urllib.request.Request(url=tempurl, headers=get_header(),method='GET')
- time.sleep(0.8)
- response = urllib.request.urlopen(request)
- parse(response,{"id": city[0], "name": city[2]})
-
-
- def parse(response,meta):
-
- try:
- data = json.loads(response.read().decode("utf8"))
- print(data)
- if data["status"] == "1":
- poi = data["pois"][0] # 一般第一个就是查找的
- print( poi )
- if poi["parent"] != []:
- print("查询parent ID~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
- id = poi["parent"]
- else:
- id = poi["id"]
-
- gaode_url = url + str(id)
- print(gaode_url)
- request = urllib.request.Request(url=gaode_url, headers=get_header(), method='GET')
- time.sleep(0.8)
- response = urllib.request.urlopen(request)
- info(response,{"id": meta["id"], "url": gaode_url})
-
- else:
- print("接口返回异常~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~·")
- except Exception as e:
-
- print("查询失败~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~·" + e)
-
-
- def info(response,meta):
- try:
- data = json.loads(response.read().decode("utf8"))
- print(data)
- if data["status"] == "1":
- spec = data["data"]["spec"]
- border = spec["mining_shape"]["shape"]
- print("border :~~~~~~~~~~~~~~~~~~~",border)
- update_data((meta["id"]),border)
- except Exception as e:
- print("查询错误~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + meta["url"] + " error: "+ e)
-
- def get_data():
- # 创建游标
- cursor = COON.cursor()
- try:
- sql = "SELECT * FROM new_shopping WHERE border is null"
- # 执行SQL,并返回受影响行数,执行多次
- cursor.execute(sql)
- infoList = cursor.fetchall()
- return infoList
- except Exception as e:
- print(e)
- finally:
- # 关闭
- cursor.close()
-
- def update_data(id,border):
- # 创建游标
- cursor = COON.cursor()
- try:
- sql = "update new_shopping set border='" +str(border)+ " 'where id=" + str(id)
- # 执行SQL,并返回受影响行数,执行多次
- cursor.execute(sql)
- except Exception as e:
- print(e)
- finally:
- # 关闭游标
- cursor.close()
-
- def get_header():
- '''
- 随机生成User-Agent
- :return:
- '''
- head_connection = ['Keep-Alive', 'close']
- head_accept = ['text/html, application/xhtml+xml, */*',
- 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8']
- head_accept_language = ['zh-CN,fr-FR;q=0.5', 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3']
- head_user_agent = ['Opera/8.0 (Macintosh; PPC Mac OS X; U; en)',
- 'Opera/9.27 (Windows NT 5.2; U; zh-cn)',
- 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0)',
- 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET4.0C; .NET4.0E)',
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET4.0C; .NET4.0E; QQBrowser/7.3.9825.400)',
- 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080219 Firefox/2.0.0.12 Navigator/9.0.0.6',
- 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; rv:11.0) like Gecko)',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0 ',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Maxthon/4.0.6.2000 Chrome/26.0.1410.43 Safari/537.1 ',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.92 Safari/537.1 LBBROWSER',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/3.0 Safari/536.11',
- 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
- 'Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.0',
- 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0',
- 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
- ]
- result = {
- 'Connection': head_connection[0],
- 'Accept': head_accept[0],
- 'Accept-Language': head_accept_language[1],
- 'User-Agent': head_user_agent[random.randrange(0, len(head_user_agent))]
- }
- return result
-
- hello()

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。