当前位置:   article > 正文

python 获取全国所有的省、市、县、镇、村_python 根据城市获取省

python 根据城市获取省
获取全国所有的省、市、县、镇、村
from requests_html import HTMLSession
import requests
import time
import re
import  datetime
import json
session = HTMLSession()
import mysql.connector
mydb = mysql.connector.connect(
	host="localhost",
	user="root",
	passwd="123456",
	database="test"
)
mycursor = mydb.cursor()
sql = "TRUNCATE TABLE area"
mycursor.execute(sql)
mydb.commit()
# 获取省与直辖市
def get_Province_list():
	# 返回一个 response 对象
	response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/index.html')  # 单位秒数
	content = response.html.find('table.provincetable', first=True)
	li_list = content.find('a')
	for li in li_list:
		url = li.attrs['href']
		code = re.findall("\d+",url)[0]
		area_code = str(code)+'0000000000'
		area_name=li.text
		# SQL 插入语句
		sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)"
		val = (area_name, area_code, 0 ,0)
		mycursor.execute(sql, val)
		pid = mycursor.lastrowid
		mydb.commit()  # 数据表内容有更新,必须使用到该语句
		get_City_list(code,area_code, pid)

#获取市
def get_City_list(code,parent_code, pid):
	# 返回一个 response 对象
	response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'.html')  # 单位秒数
	content = response.html.find('table.citytable', first=True)
	citys = content.find('tr.citytr')
	for city in citys:
		td_list = city.find('td')
		ycode = td_list[0].find('a')[0]
		code1 = ycode.text[0:4]
		area_name = td_list[1].find('a')[0].text
		area_code = ycode.text[0:12]
		# SQL 插入语句
		sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)"
		val = (area_name, area_code, parent_code, pid)
		mycursor.execute(sql, val)
		pid1 = mycursor.lastrowid
		mydb.commit()  # 数据表内容有更新,必须使用到该语句
		get_County_list(code,code1,area_code,pid1)
#获取县级市
def get_County_list(code,code1,parent_code,pid1):
	# 返回一个 response 对象
	response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'/'+code1+'.html')
	content = response.html.find('table.countytable', first=True)
	if content:
		citys = content.find('tr.countytr')
		for city in citys:
			td_list = city.find('td')
			ycode=td_list[0].find('a')
			if len(ycode) == 0:
				area_code =td_list[0].text
				area_name = td_list[1].text
			else:
				area_code = ycode[0].text[0:12]
				area_name = td_list[1].find('a')[0].text
				code2 = area_code[2:4]
				code3 = ycode[0].text[0:6]
				sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)"
				val = (area_name, area_code, code, pid1)
				mycursor.execute(sql, val)
				pid2 = mycursor.lastrowid
				mydb.commit()  # 数据表内容有更新,必须使用到该语句
				get_jd_data(code,code2,code3,area_code,pid2)

def get_jd_data(code,code2,code3,parent_code,pid2):
	response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/' + code + '/' + code2 + '/' + code3 + '.html')
	content = response.html.find('table.towntable', first=True)
	if content:
		citys = content.find('tr.towntr')
		for city in citys:
			td_list = city.find('td')
			ycode = td_list[0].find('a')
			if len(ycode) == 0:
				area_code = td_list[0].text
				area_name = td_list[1].text
			else:
				area_code = ycode[0].text[0:12]
				area_name = td_list[1].find('a')[0].text
				sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)"
				val = (area_name, area_code, parent_code, pid2)
				mycursor.execute(sql, val)
				mydb.commit()  # 数据表内容有更新,必须使用到该语句

if __name__ == '__main__':
	get_Province_list()

表结构:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for area
-- ----------------------------
DROP TABLE IF EXISTS `area`;
CREATE TABLE `area`  (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `area_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区编码',
  `area_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区名称',
  `parent_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '上级编码',
  `parent_id` bigint(20) NULL DEFAULT 0 COMMENT '上级ID',
  `status` tinyint(1) NULL DEFAULT 1 COMMENT '状态 0 禁用 1 启用',
  `create_time` timestamp(0) NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 3184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '地区' ROW_FORMAT = Dynamic;

SET FOREIGN_KEY_CHECKS = 1;
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/171890
推荐阅读
相关标签
  

闽ICP备14008679号