当前位置:   article > 正文

python爬取高德地图道路交通状态数据代码_高德路网数据爬取

高德路网数据爬取
  1. """
  2. author: 17839606517
  3. """
  4. import datetime
  5. import datetime
  6. import os
  7. import csv
  8. from codecs import StreamReaderWriter
  9. import numpy as np
  10. import requests
  11. import pandas as pd
  12. import json
  13. import codecs
  14. import time
  15. def aaa():
  16. # 初始API的URL
  17. # url="https://restapi.amap.com/v3/traffic/status/rectangle?key=4aa83cdb4436daa9df422838b94b3ea3&extensions=all&rectangle="
  18. # url = "https://api.map.baidu.com/traffic/v1/polygon?ak="+ak+"&vertexes="
  19. # url1 = "&coord_type_input=gcj02&coord_type_output=gcj02"
  20. # 设定整个网格左下角坐标的经纬度值
  21. # 起点坐标:114.107347,30.475042(汪家嘴立交桥)114.192358,30.528554
  22. # 起终点坐标改:114.143639,30.477906(东方大道四环线)--114.442451,30.714406
  23. # 终点坐标:114.432749,30.624948(友谊大道立交)
  24. # lon:20;lat:8
  25. ak = ['8oa21GO9kDBUiFCDH9VmEEOS3F7G2UfW', 'GUDOwvKZM8teprBTm1DWBiaq9kqWkmTw',
  26. 'VNpap0WhfCzcQmrmvtjkdrSRUybgGNvL', 'YZdlNgbCUEzItueZ6xZI28LBkik0ZBtN',
  27. 'XLF1DWwGLdN3agDHNFW0GTiKiOTnlCu3', 'FkXcN4asOWY9DWwp1ERA5XZrjIGaLbCG',
  28. 'I69ZxDAtW0hz9N2xc1Cnqi74vPPqzmbZ', 'E8v35ryXDhNgSDiulnNcapRj4EGCNaIV',
  29. '8oa21GO9kDBUiFCDH9VmEEOS3F7G2UfW', 'YZdlNgbCUEzItueZ6xZI28LBkik0ZBtN',
  30. 'vpVlSRw8f7aHYpbprFk6QBqlomfAMr2A']
  31. akk=0
  32. baselng = 114.143639
  33. baselat = 30.477906
  34. # 设定每个网格单元的经纬度宽
  35. widthlng = 0.013
  36. # 同一维度,lng=0.01≈1000米
  37. widthlat = 0.012
  38. # 同一经度,lat=0.01≈1113米
  39. # 用于储存数据
  40. x=[]
  41. #用于标识交通态势线段
  42. num=0
  43. #爬取过程可能会出错中断,因此增加异常处理
  44. try:
  45. #循环每个网格进行数据爬取,在这里构建了23X20网格
  46. for i in range(0,23):
  47. # print(i)
  48. xx=i
  49. #设定网格单元的左下与右上坐标的纬度值
  50. #在这里对数据进行处理,使之保留6位小数(不保留可能会莫名其妙出错)
  51. startlat=round(baselat+i*widthlat,6)
  52. endlat=round(startlat+widthlat,6)
  53. for j in range(0,20):
  54. # print(j)
  55. #设定网格单元的左下与右上坐标的经度值
  56. startlng=round(baselng+j*widthlng,6)
  57. endlng=round(startlng+widthlng,6)
  58. #设置API的URL并进行输出测试
  59. #a=23*i+20*j
  60. # int(a)
  61. #b=int(a)
  62. akkk=akk%11
  63. ak1=ak[akkk]
  64. #print(akkk)
  65. url = "https://api.map.baidu.com/traffic/v1/polygon?ak=" + ak1 + "&vertexes="
  66. url1 = "&coord_type_input=gcj02&coord_type_output=gcj02"
  67. locStr=str(startlat)+","+str(endlng)+";"+str(endlat)+","+str(endlng)\
  68. +";"+str(endlat)+","+str(startlng)+";"+str(startlat)+","+str(startlng)
  69. thisUrl=url+locStr+url1
  70. #print(thisUrl)
  71. #爬取数据
  72. data=requests.get(thisUrl)
  73. s=data.json()
  74. akk=akk+1
  75. #print(s)
  76. #print(s)
  77. #a1=['a','b']
  78. a1=s.get('road_traffic')
  79. a0=s.get('evaluation')
  80. #print(a0.get('status'))
  81. #print(type(a1))
  82. #print(type(a1) is list)
  83. if (type(a1) is list)==False:
  84. #print('截至吧')
  85. continue
  86. #print(a1)
  87. #print(i)
  88. #print(j)
  89. for i1 in range (0,len(a1)):
  90. #print(len(s.get('description')))
  91. if len(s.get('description'))>8:
  92. a3 = []
  93. a4 = a1[i1].get('congestion_sections')
  94. #print(a4==None)
  95. if (a4==None)==False:
  96. for i2 in range (0,len(a4)):
  97. #print (len(a4))
  98. if len(a4)==1:
  99. a3 = []
  100. curr_time = datetime.datetime.now()
  101. time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
  102. a3.append(a1[i1].get('road_name'))
  103. #a3.append(pd.DataFrame([time_str]))
  104. a3.append(time_str)
  105. a3.append(a0.get('status'))
  106. a3.append(a0.get('status_desc'))
  107. a3.append(a4[i2].get('section_desc'))
  108. a3.append(a4[i2].get('status'))
  109. a3.append(a4[i2].get('speed'))
  110. a3.append(a4[i2].get('congestion_distance'))
  111. a3.append(a4[i2].get('congestion_trend'))
  112. f1: StreamReaderWriter = codecs.open("main4.txt", 'a', 'gbk')
  113. #print(a3)
  114. f1.write(str(xx) + ',' + str(j) + ',')
  115. for i in range(0, len(a3)):
  116. f1.write(str(a3[i]) + ",")
  117. f1.write('\n')
  118. f1.close()
  119. a3=[]
  120. else:
  121. a3 = []
  122. curr_time = datetime.datetime.now()
  123. time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
  124. a3.append(a1[i1].get('road_name'))
  125. a3.append(time_str)
  126. a3.append(a0.get('status'))
  127. a3.append(a0.get('status_desc'))
  128. a3.append(a4[i2].get('section_desc'))
  129. a3.append(a4[i2].get('status'))
  130. a3.append(a4[i2].get('speed'))
  131. a3.append(a4[i2].get('congestion_distance'))
  132. a3.append(a4[i2].get('congestion_trend'))
  133. f: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
  134. #print(a3)
  135. f.write(str(xx) + ',' + str(j) + ',')
  136. for i in range(0, len(a3)):
  137. f.write(str(a3[i]) + ",")
  138. f.write('\n')
  139. f.close()
  140. a3=[]
  141. #print (a4[i2])
  142. else:
  143. a3 = []
  144. #print(len(a1[i1]))
  145. curr_time = datetime.datetime.now()
  146. time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
  147. #print(time_str)
  148. a3.append(a1[i1].get('road_name'))
  149. #a3.append(pd.DataFrame([time_str]))
  150. a3.append(time_str)
  151. a3.append("1")
  152. a3.append("畅通")
  153. f2: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
  154. f2.write(str(xx) + ',' + str(j) + ',')
  155. for i in range(0, len(a3)):
  156. f2.write(str(a3[i]) + ",")
  157. f2.write('\n')
  158. f2.close()
  159. a3=[]
  160. elif len(s.get('description'))==8:
  161. a3 = []
  162. #print (len(a1[i1]))
  163. curr_time = datetime.datetime.now()
  164. #print(curr_time)
  165. time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
  166. #print(time_str)
  167. a3.append(a1[i1].get('road_name'))
  168. a3.append(time_str)
  169. a3.append("1")
  170. a3.append("畅通")
  171. f3: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
  172. f3.write(str(xx) + ',' + str(j) + ',')
  173. for i in range(0, len(a3)):
  174. f3.write(str(a3[i]) + ",")
  175. f3.write('\n')
  176. f3.close()
  177. a3=[]
  178. else:
  179. continue
  180. #
  181. #print(len(a1[i1]))
  182. #print(a3)
  183. '''a3= np.asarray(a3)
  184. np.savetxt("temp.csv",a3,delimiter=",")'''
  185. '''if not os.path.exists('result_s1.csv'):
  186. a3.to_csv('result_s1.csv', encoding='gbk', mode='a', index=False, index_label=False) # 保存数据
  187. else:
  188. a3.to_csv('result_s1.csv', encoding='gbk', mode='a', index=False, index_label=False,header=False)'''
  189. '''with open("test.txt","w")as csvfile:
  190. for i in range(0,len(a3)):
  191. writer=csv.writer(csvfile)
  192. #writer.writerow(['1','2','3','4','5','6','7','8','9'])
  193. writer.writerows(a3)'''
  194. #a=s.get('road_traffic').get('road_name')
  195. #print(a)
  196. '''
  197. print(a.get('description'))
  198. #注意,提取数值需要使用XXX.get()的方式来实现,如a[k].get('speed')
  199. #若使用a[k]['speed']来提取,或会导致KeyError错误
  200. for k in range(0,len(a)):
  201. #if s['description']!='该区域整体畅通。'
  202. s2=a[k]['congestion_distance']
  203. #s3=s2.split(";")
  204. for l in range(0,len(s2)):
  205. s4=x.append(s2)
  206. x.append([a[k].get('section_desc'),a[k].get('status'),a[k].get('speed'),num,float(s4)])
  207. print(x)
  208. num=num+1
  209. '''
  210. #time.sleep(0.1)
  211. #若爬取网格较多,可使用time.sleep(秒数)来避免高德的单秒API调用次数的限制
  212. except Exception as e:
  213. pass
  214. var=1
  215. while var==1:
  216. ts=time.time()
  217. start_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '04:59', '%Y-%m-%d%H:%M')
  218. end_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '23:59', '%Y-%m-%d%H:%M')
  219. now_time = datetime.datetime.now()
  220. if now_time > start_time and now_time < end_time:
  221. aaa()
  222. else:
  223. continue
  224. ts1=time.time()
  225. time.sleep(3600-ts1+ts)
  226. #将数据结构化存储至规定目录的CSV文件中
  227. '''
  228. c = pd.DataFrame(x,columns=['section_desc','status','speed','congestion_distance','x','y'])
  229. c.to_csv('E:/新建文件夹/600/爬虫.csv',encoding='utf-8-sig')
  230. '''

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/418689
推荐阅读
相关标签
  

闽ICP备14008679号