赞
踩
本文示例程序的功能主要是获取湖北省各个地市每日最高温、最低温、风速、风向等数据。包括历史气温数据以及未来40天预报气温数据。
历史气温数据地址:http://www.tianqihoubao.com/lishi/wuhan/month/202309.html
天气预报数据地址:http://www.weather.com.cn/weather40d/101200101.shtml
代码比较简单,就不详细注释了。
- import requests
- from bs4 import BeautifulSoup
- import pandas as pd
- import numpy as np
- from sqlalchemy import create_engine
-
- cities = {'huangshi':'黄石市',
- 'yichang':'宜昌市',
- 'ezhou':'鄂州市',
- 'jingmen':'荆门市',
- 'suizhou':'随州市',
- 'enshi':'恩施州',
- 'jingzhou':'荆州市',
- 'shennongjia':'神农架',
- 'tianmen':'天门市',
- 'qianjiang':'潜江市',
- 'xiantao':'仙桃市',
- 'wuhan':'武汉市',
- 'xianning':'咸宁市',
- 'huanggang':'黄冈市',
- 'shiyan':'十堰市',
- 'xiangyang':'襄阳市',
- 'xiaogan':'孝感市'}
-
- #爬取年月,需要手动更改传参,按季度手动追加增量数据至存量表
- years = ['2023']
- months = ['09']
-
-
- df = pd.DataFrame(columns=['city','date','tempture_low','tempture_high','weather_day','weather_night','winddirect_day','windpwr_day','winddirect_night','windpwr_night'])
-
- for k,v in cities.items():
- for year in years:
- for month in months:
- url = 'http://www.tianqihoubao.com/lishi/'+k+'/month/'+year+month+'.html'
- print('正在加载'+url)
- headers={'User-Agent':'User-Agent:Mozilla/5.0'}
- response = requests.get(url,headers=headers)
- html=response.content
-
- soup = BeautifulSoup(html, features="html.parser")
- headData = soup.select('table.b td')
-
- r1=[]
- for index, value in enumerate(headData):
- l1 = headData[index].text.split()
- s1 = ",".join(l1)
- s2 = s1.replace('\n','').replace('/','').replace(',,',',').replace('℃','')
- l2=s2.split(',')
- r1.append(l2)
-
- r2=[val for sublist in r1[4:] for val in sublist]
- r3=np.reshape(r2, (int(len(r2)/9), 9))
-
- d = pd.DataFrame(r3,columns=['date','weather_day','weather_night','tempture_low','tempture_high','winddirect_day','windpwr_day','winddirect_night','windpwr_night'])
- d['city']=v
-
- df=pd.concat([df,d],ignore_index=True)
-
-
- df['tempture_low'] = pd.to_numeric(df['tempture_low'])
- df['tempture_high'] = pd.to_numeric(df['tempture_high'])
- df=df.drop_duplicates()
-
- #保存为表格
- df.to_excel('D:\python代码\气温数据爬虫\湖北省各地区气温数据-增量.xlsx',index=False)
-
- #存储至本地postgres数据库
- engine1 =create_engine('postgresql+psycopg2://postgres:root@localhost:5432/postgres')
-
- try:
- df.to_sql('hb_qy_qw_zl',engine1,schema='sjsj',index=False,if_exists='replace')
- except Exception as e:
- print(e)

- import requests
- import json
- import pandas as pd
- from sqlalchemy import create_engine
-
-
- headers = {"Referer":"http://www.weather.com.cn/",
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36"}
-
- cities={'101200101':'武汉市',
- '101200201':'襄阳市',
- '101200301':'鄂州市',
- '101200401':'孝感市',
- '101200501':'黄冈市',
- '101200601':'黄石市',
- '101200701':'咸宁市',
- '101200801':'荆州市',
- '101200901':'宜昌市',
- '101201001':'恩施州',
- '101201101':'十堰市',
- '101201201':'神农架',
- '101201301':'随州市',
- '101201401':'荆门市',
- '101201501':'天门市',
- '101201601':'仙桃市',
- '101201701':'潜江市'}
-
- #预测月份,需要手动更改传参
- yms = ['202309','202310']
-
- sheet1 = pd.DataFrame(columns=['city','date','tempture_low','tempture_high'])
-
- for k,v in cities.items():
- for ym in yms:
- url = 'http://d1.weather.com.cn/calendar_new/2023/'+k+'_'+ym+'.html'
- print(v,ym,url)
- rows=[]
- response = requests.get(url,headers=headers)
- if response.status_code ==200:
- content=response.content.decode('utf-8')
- weathers = json.loads(content[11:])
- for i in range(len(weathers)):
- date=weathers[i]['date']
- temperature_high=weathers[i]['max']
- temperature_low=weathers[i]['min']
- rows.append([date,temperature_high,temperature_low])
-
- content_df = pd.DataFrame(rows,columns=['date','tempture_high','tempture_low'])
- content_df['city']=v
- sheet1=pd.concat([sheet1,content_df],ignore_index=True)
-
-
- writer1 = pd.ExcelWriter(r'D:\python代码\气温数据爬虫\湖北省各地区天气预报数据.xlsx', engine='xlsxwriter')
-
- sheet2=sheet1.copy()
- sheet2['date'] = sheet2['date'].str.slice(0, 4)+'年'+sheet2['date'].str.slice(4, 6)+'月'+sheet2['date'].str.slice(6, 8)+'日'
- sheet2['tempture_high']=pd.to_numeric(sheet2['tempture_high'])
- sheet2['tempture_low']=pd.to_numeric(sheet2['tempture_low'])
- sheet2.dropna(inplace=True)
- sheet2=sheet2.drop_duplicates()
- sheet2.to_excel(writer1,sheet_name='未来40天预报',index=False)
-
- sheet2['row_num']=sheet2.groupby('city')['date'].rank(ascending=True)
- sheet2['row_num']=sheet2['row_num'].values.astype(int)
-
- sheet3=sheet2[['city','date','tempture_low','tempture_high']][sheet2['row_num']<17]
- sheet3.to_excel(writer1,sheet_name='未来半个月预报',index=False)
-
- sheet4=sheet2[['city','date','tempture_low','tempture_high']][sheet2['row_num']<9]
- sheet4.to_excel(writer1,sheet_name='未来一周预报',index=False)
-
- writer1.save()
-
-
- #存储至本地postgres数据库
- engine1 =create_engine('postgresql+psycopg2://postgres:root@localhost:5432/postgres')
-
- try:
- sheet2.to_sql('hb_qy_qw_yb_1m',engine1,schema='sjsj',index=False,if_exists='replace')
- sheet3.to_sql('hb_qy_qw_yb_hm',engine1,schema='sjsj',index=False,if_exists='replace')
- sheet4.to_sql('hb_qy_qw_yb_1w',engine1,schema='sjsj',index=False,if_exists='replace')
- except Exception as e:
- print(e)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。