当前位置:   article > 正文

气温数据爬虫(历史+预报)_python爬取历史24小时气象数据

python爬取历史24小时气象数据

本文示例程序的功能主要是获取湖北省各个地市每日最高温、最低温、风速、风向等数据。包括历史气温数据以及未来40天预报气温数据。

历史气温数据地址:http://www.tianqihoubao.com/lishi/wuhan/month/202309.html

天气预报数据地址:http://www.weather.com.cn/weather40d/101200101.shtml

代码比较简单,就不详细注释了。

一、全省各地市历史气温数据爬取处理

  1. import requests
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4. import numpy as np
  5. from sqlalchemy import create_engine
  6. cities = {'huangshi':'黄石市',
  7. 'yichang':'宜昌市',
  8. 'ezhou':'鄂州市',
  9. 'jingmen':'荆门市',
  10. 'suizhou':'随州市',
  11. 'enshi':'恩施州',
  12. 'jingzhou':'荆州市',
  13. 'shennongjia':'神农架',
  14. 'tianmen':'天门市',
  15. 'qianjiang':'潜江市',
  16. 'xiantao':'仙桃市',
  17. 'wuhan':'武汉市',
  18. 'xianning':'咸宁市',
  19. 'huanggang':'黄冈市',
  20. 'shiyan':'十堰市',
  21. 'xiangyang':'襄阳市',
  22. 'xiaogan':'孝感市'}
  23. #爬取年月,需要手动更改传参,按季度手动追加增量数据至存量表
  24. years = ['2023']
  25. months = ['09']
  26. df = pd.DataFrame(columns=['city','date','tempture_low','tempture_high','weather_day','weather_night','winddirect_day','windpwr_day','winddirect_night','windpwr_night'])
  27. for k,v in cities.items():
  28. for year in years:
  29. for month in months:
  30. url = 'http://www.tianqihoubao.com/lishi/'+k+'/month/'+year+month+'.html'
  31. print('正在加载'+url)
  32. headers={'User-Agent':'User-Agent:Mozilla/5.0'}
  33. response = requests.get(url,headers=headers)
  34. html=response.content
  35. soup = BeautifulSoup(html, features="html.parser")
  36. headData = soup.select('table.b td')
  37. r1=[]
  38. for index, value in enumerate(headData):
  39. l1 = headData[index].text.split()
  40. s1 = ",".join(l1)
  41. s2 = s1.replace('\n','').replace('/','').replace(',,',',').replace('℃','')
  42. l2=s2.split(',')
  43. r1.append(l2)
  44. r2=[val for sublist in r1[4:] for val in sublist]
  45. r3=np.reshape(r2, (int(len(r2)/9), 9))
  46. d = pd.DataFrame(r3,columns=['date','weather_day','weather_night','tempture_low','tempture_high','winddirect_day','windpwr_day','winddirect_night','windpwr_night'])
  47. d['city']=v
  48. df=pd.concat([df,d],ignore_index=True)
  49. df['tempture_low'] = pd.to_numeric(df['tempture_low'])
  50. df['tempture_high'] = pd.to_numeric(df['tempture_high'])
  51. df=df.drop_duplicates()
  52. #保存为表格
  53. df.to_excel('D:\python代码\气温数据爬虫\湖北省各地区气温数据-增量.xlsx',index=False)
  54. #存储至本地postgres数据库
  55. engine1 =create_engine('postgresql+psycopg2://postgres:root@localhost:5432/postgres')
  56. try:
  57. df.to_sql('hb_qy_qw_zl',engine1,schema='sjsj',index=False,if_exists='replace')
  58. except Exception as e:
  59. print(e)

二、全省各地市预报气温数据爬取处理

  1. import requests
  2. import json
  3. import pandas as pd
  4. from sqlalchemy import create_engine
  5. headers = {"Referer":"http://www.weather.com.cn/",
  6. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36"}
  7. cities={'101200101':'武汉市',
  8. '101200201':'襄阳市',
  9. '101200301':'鄂州市',
  10. '101200401':'孝感市',
  11. '101200501':'黄冈市',
  12. '101200601':'黄石市',
  13. '101200701':'咸宁市',
  14. '101200801':'荆州市',
  15. '101200901':'宜昌市',
  16. '101201001':'恩施州',
  17. '101201101':'十堰市',
  18. '101201201':'神农架',
  19. '101201301':'随州市',
  20. '101201401':'荆门市',
  21. '101201501':'天门市',
  22. '101201601':'仙桃市',
  23. '101201701':'潜江市'}
  24. #预测月份,需要手动更改传参
  25. yms = ['202309','202310']
  26. sheet1 = pd.DataFrame(columns=['city','date','tempture_low','tempture_high'])
  27. for k,v in cities.items():
  28. for ym in yms:
  29. url = 'http://d1.weather.com.cn/calendar_new/2023/'+k+'_'+ym+'.html'
  30. print(v,ym,url)
  31. rows=[]
  32. response = requests.get(url,headers=headers)
  33. if response.status_code ==200:
  34. content=response.content.decode('utf-8')
  35. weathers = json.loads(content[11:])
  36. for i in range(len(weathers)):
  37. date=weathers[i]['date']
  38. temperature_high=weathers[i]['max']
  39. temperature_low=weathers[i]['min']
  40. rows.append([date,temperature_high,temperature_low])
  41. content_df = pd.DataFrame(rows,columns=['date','tempture_high','tempture_low'])
  42. content_df['city']=v
  43. sheet1=pd.concat([sheet1,content_df],ignore_index=True)
  44. writer1 = pd.ExcelWriter(r'D:\python代码\气温数据爬虫\湖北省各地区天气预报数据.xlsx', engine='xlsxwriter')
  45. sheet2=sheet1.copy()
  46. sheet2['date'] = sheet2['date'].str.slice(0, 4)+'年'+sheet2['date'].str.slice(4, 6)+'月'+sheet2['date'].str.slice(6, 8)+'日'
  47. sheet2['tempture_high']=pd.to_numeric(sheet2['tempture_high'])
  48. sheet2['tempture_low']=pd.to_numeric(sheet2['tempture_low'])
  49. sheet2.dropna(inplace=True)
  50. sheet2=sheet2.drop_duplicates()
  51. sheet2.to_excel(writer1,sheet_name='未来40天预报',index=False)
  52. sheet2['row_num']=sheet2.groupby('city')['date'].rank(ascending=True)
  53. sheet2['row_num']=sheet2['row_num'].values.astype(int)
  54. sheet3=sheet2[['city','date','tempture_low','tempture_high']][sheet2['row_num']<17]
  55. sheet3.to_excel(writer1,sheet_name='未来半个月预报',index=False)
  56. sheet4=sheet2[['city','date','tempture_low','tempture_high']][sheet2['row_num']<9]
  57. sheet4.to_excel(writer1,sheet_name='未来一周预报',index=False)
  58. writer1.save()
  59. #存储至本地postgres数据库
  60. engine1 =create_engine('postgresql+psycopg2://postgres:root@localhost:5432/postgres')
  61. try:
  62. sheet2.to_sql('hb_qy_qw_yb_1m',engine1,schema='sjsj',index=False,if_exists='replace')
  63. sheet3.to_sql('hb_qy_qw_yb_hm',engine1,schema='sjsj',index=False,if_exists='replace')
  64. sheet4.to_sql('hb_qy_qw_yb_1w',engine1,schema='sjsj',index=False,if_exists='replace')
  65. except Exception as e:
  66. print(e)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/753290
推荐阅读
相关标签
  

闽ICP备14008679号