赞
踩
from requests_html import HTMLSession import requests import time import re import datetime import json session = HTMLSession() import mysql.connector mydb = mysql.connector.connect( host="localhost", user="root", passwd="123456", database="test" ) mycursor = mydb.cursor() sql = "TRUNCATE TABLE area" mycursor.execute(sql) mydb.commit() # 获取省与直辖市 def get_Province_list(): # 返回一个 response 对象 response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/index.html') # 单位秒数 content = response.html.find('table.provincetable', first=True) li_list = content.find('a') for li in li_list: url = li.attrs['href'] code = re.findall("\d+",url)[0] area_code = str(code)+'0000000000' area_name=li.text # SQL 插入语句 sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)" val = (area_name, area_code, 0 ,0) mycursor.execute(sql, val) pid = mycursor.lastrowid mydb.commit() # 数据表内容有更新,必须使用到该语句 get_City_list(code,area_code, pid) #获取市 def get_City_list(code,parent_code, pid): # 返回一个 response 对象 response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'.html') # 单位秒数 content = response.html.find('table.citytable', first=True) citys = content.find('tr.citytr') for city in citys: td_list = city.find('td') ycode = td_list[0].find('a')[0] code1 = ycode.text[0:4] area_name = td_list[1].find('a')[0].text area_code = ycode.text[0:12] # SQL 插入语句 sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)" val = (area_name, area_code, parent_code, pid) mycursor.execute(sql, val) pid1 = mycursor.lastrowid mydb.commit() # 数据表内容有更新,必须使用到该语句 get_County_list(code,code1,area_code,pid1) #获取县级市 def get_County_list(code,code1,parent_code,pid1): # 返回一个 response 对象 response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'/'+code1+'.html') content = response.html.find('table.countytable', first=True) if content: citys = content.find('tr.countytr') for city in citys: td_list = city.find('td') ycode=td_list[0].find('a') if len(ycode) == 0: area_code =td_list[0].text area_name = td_list[1].text else: area_code = ycode[0].text[0:12] area_name = td_list[1].find('a')[0].text code2 = area_code[2:4] code3 = ycode[0].text[0:6] sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)" val = (area_name, area_code, code, pid1) mycursor.execute(sql, val) pid2 = mycursor.lastrowid mydb.commit() # 数据表内容有更新,必须使用到该语句 get_jd_data(code,code2,code3,area_code,pid2) def get_jd_data(code,code2,code3,parent_code,pid2): response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/' + code + '/' + code2 + '/' + code3 + '.html') content = response.html.find('table.towntable', first=True) if content: citys = content.find('tr.towntr') for city in citys: td_list = city.find('td') ycode = td_list[0].find('a') if len(ycode) == 0: area_code = td_list[0].text area_name = td_list[1].text else: area_code = ycode[0].text[0:12] area_name = td_list[1].find('a')[0].text sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)" val = (area_name, area_code, parent_code, pid2) mycursor.execute(sql, val) mydb.commit() # 数据表内容有更新,必须使用到该语句 if __name__ == '__main__': get_Province_list() 表结构: SET NAMES utf8mb4; SET FOREIGN_KEY_CHECKS = 0; -- ---------------------------- -- Table structure for area -- ---------------------------- DROP TABLE IF EXISTS `area`; CREATE TABLE `area` ( `id` bigint(20) NOT NULL AUTO_INCREMENT, `area_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区编码', `area_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区名称', `parent_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '上级编码', `parent_id` bigint(20) NULL DEFAULT 0 COMMENT '上级ID', `status` tinyint(1) NULL DEFAULT 1 COMMENT '状态 0 禁用 1 启用', `create_time` timestamp(0) NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', PRIMARY KEY (`id`) USING BTREE ) ENGINE = InnoDB AUTO_INCREMENT = 3184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '地区' ROW_FORMAT = Dynamic; SET FOREIGN_KEY_CHECKS = 1;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。