赞
踩
-
- """
- author: 17839606517
- """
-
-
- import datetime
- import datetime
- import os
- import csv
- from codecs import StreamReaderWriter
-
- import numpy as np
- import requests
- import pandas as pd
- import json
- import codecs
- import time
-
-
-
- def aaa():
- # 初始API的URL
- # url="https://restapi.amap.com/v3/traffic/status/rectangle?key=4aa83cdb4436daa9df422838b94b3ea3&extensions=all&rectangle="
- # url = "https://api.map.baidu.com/traffic/v1/polygon?ak="+ak+"&vertexes="
- # url1 = "&coord_type_input=gcj02&coord_type_output=gcj02"
- # 设定整个网格左下角坐标的经纬度值
- # 起点坐标:114.107347,30.475042(汪家嘴立交桥)114.192358,30.528554
- # 起终点坐标改:114.143639,30.477906(东方大道四环线)--114.442451,30.714406
- # 终点坐标:114.432749,30.624948(友谊大道立交)
- # lon:20;lat:8
- ak = ['8oa21GO9kDBUiFCDH9VmEEOS3F7G2UfW', 'GUDOwvKZM8teprBTm1DWBiaq9kqWkmTw',
- 'VNpap0WhfCzcQmrmvtjkdrSRUybgGNvL', 'YZdlNgbCUEzItueZ6xZI28LBkik0ZBtN',
- 'XLF1DWwGLdN3agDHNFW0GTiKiOTnlCu3', 'FkXcN4asOWY9DWwp1ERA5XZrjIGaLbCG',
- 'I69ZxDAtW0hz9N2xc1Cnqi74vPPqzmbZ', 'E8v35ryXDhNgSDiulnNcapRj4EGCNaIV',
- '8oa21GO9kDBUiFCDH9VmEEOS3F7G2UfW', 'YZdlNgbCUEzItueZ6xZI28LBkik0ZBtN',
- 'vpVlSRw8f7aHYpbprFk6QBqlomfAMr2A']
-
- akk=0
- baselng = 114.143639
- baselat = 30.477906
- # 设定每个网格单元的经纬度宽
- widthlng = 0.013
- # 同一维度,lng=0.01≈1000米
- widthlat = 0.012
- # 同一经度,lat=0.01≈1113米
- # 用于储存数据
- x=[]
- #用于标识交通态势线段
- num=0
-
- #爬取过程可能会出错中断,因此增加异常处理
- try:
- #循环每个网格进行数据爬取,在这里构建了23X20网格
- for i in range(0,23):
- # print(i)
- xx=i
- #设定网格单元的左下与右上坐标的纬度值
- #在这里对数据进行处理,使之保留6位小数(不保留可能会莫名其妙出错)
- startlat=round(baselat+i*widthlat,6)
- endlat=round(startlat+widthlat,6)
- for j in range(0,20):
- # print(j)
- #设定网格单元的左下与右上坐标的经度值
- startlng=round(baselng+j*widthlng,6)
- endlng=round(startlng+widthlng,6)
- #设置API的URL并进行输出测试
- #a=23*i+20*j
- # int(a)
- #b=int(a)
- akkk=akk%11
- ak1=ak[akkk]
- #print(akkk)
- url = "https://api.map.baidu.com/traffic/v1/polygon?ak=" + ak1 + "&vertexes="
- url1 = "&coord_type_input=gcj02&coord_type_output=gcj02"
-
- locStr=str(startlat)+","+str(endlng)+";"+str(endlat)+","+str(endlng)\
- +";"+str(endlat)+","+str(startlng)+";"+str(startlat)+","+str(startlng)
- thisUrl=url+locStr+url1
- #print(thisUrl)
- #爬取数据
- data=requests.get(thisUrl)
- s=data.json()
- akk=akk+1
- #print(s)
- #print(s)
- #a1=['a','b']
- a1=s.get('road_traffic')
- a0=s.get('evaluation')
- #print(a0.get('status'))
- #print(type(a1))
- #print(type(a1) is list)
- if (type(a1) is list)==False:
- #print('截至吧')
- continue
- #print(a1)
- #print(i)
- #print(j)
- for i1 in range (0,len(a1)):
-
- #print(len(s.get('description')))
- if len(s.get('description'))>8:
- a3 = []
- a4 = a1[i1].get('congestion_sections')
- #print(a4==None)
- if (a4==None)==False:
- for i2 in range (0,len(a4)):
- #print (len(a4))
- if len(a4)==1:
- a3 = []
- curr_time = datetime.datetime.now()
- time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
- a3.append(a1[i1].get('road_name'))
- #a3.append(pd.DataFrame([time_str]))
- a3.append(time_str)
- a3.append(a0.get('status'))
- a3.append(a0.get('status_desc'))
- a3.append(a4[i2].get('section_desc'))
- a3.append(a4[i2].get('status'))
- a3.append(a4[i2].get('speed'))
- a3.append(a4[i2].get('congestion_distance'))
- a3.append(a4[i2].get('congestion_trend'))
- f1: StreamReaderWriter = codecs.open("main4.txt", 'a', 'gbk')
- #print(a3)
- f1.write(str(xx) + ',' + str(j) + ',')
- for i in range(0, len(a3)):
- f1.write(str(a3[i]) + ",")
- f1.write('\n')
- f1.close()
- a3=[]
- else:
- a3 = []
- curr_time = datetime.datetime.now()
- time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
- a3.append(a1[i1].get('road_name'))
- a3.append(time_str)
- a3.append(a0.get('status'))
- a3.append(a0.get('status_desc'))
- a3.append(a4[i2].get('section_desc'))
- a3.append(a4[i2].get('status'))
- a3.append(a4[i2].get('speed'))
- a3.append(a4[i2].get('congestion_distance'))
- a3.append(a4[i2].get('congestion_trend'))
- f: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
- #print(a3)
- f.write(str(xx) + ',' + str(j) + ',')
- for i in range(0, len(a3)):
- f.write(str(a3[i]) + ",")
- f.write('\n')
- f.close()
- a3=[]
- #print (a4[i2])
- else:
- a3 = []
- #print(len(a1[i1]))
- curr_time = datetime.datetime.now()
- time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
- #print(time_str)
- a3.append(a1[i1].get('road_name'))
- #a3.append(pd.DataFrame([time_str]))
- a3.append(time_str)
- a3.append("1")
- a3.append("畅通")
- f2: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
- f2.write(str(xx) + ',' + str(j) + ',')
- for i in range(0, len(a3)):
- f2.write(str(a3[i]) + ",")
- f2.write('\n')
- f2.close()
- a3=[]
-
- elif len(s.get('description'))==8:
- a3 = []
- #print (len(a1[i1]))
- curr_time = datetime.datetime.now()
- #print(curr_time)
- time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
- #print(time_str)
- a3.append(a1[i1].get('road_name'))
- a3.append(time_str)
- a3.append("1")
- a3.append("畅通")
- f3: StreamReaderWriter = codecs.open("main4.txt", 'a','gbk')
- f3.write(str(xx) + ',' + str(j) + ',')
- for i in range(0, len(a3)):
- f3.write(str(a3[i]) + ",")
- f3.write('\n')
- f3.close()
- a3=[]
- else:
- continue
- #
- #print(len(a1[i1]))
- #print(a3)
- '''a3= np.asarray(a3)
- np.savetxt("temp.csv",a3,delimiter=",")'''
- '''if not os.path.exists('result_s1.csv'):
- a3.to_csv('result_s1.csv', encoding='gbk', mode='a', index=False, index_label=False) # 保存数据
- else:
- a3.to_csv('result_s1.csv', encoding='gbk', mode='a', index=False, index_label=False,header=False)'''
- '''with open("test.txt","w")as csvfile:
- for i in range(0,len(a3)):
- writer=csv.writer(csvfile)
- #writer.writerow(['1','2','3','4','5','6','7','8','9'])
- writer.writerows(a3)'''
-
- #a=s.get('road_traffic').get('road_name')
- #print(a)
- '''
- print(a.get('description'))
- #注意,提取数值需要使用XXX.get()的方式来实现,如a[k].get('speed')
- #若使用a[k]['speed']来提取,或会导致KeyError错误
- for k in range(0,len(a)):
- #if s['description']!='该区域整体畅通。'
- s2=a[k]['congestion_distance']
- #s3=s2.split(";")
- for l in range(0,len(s2)):
- s4=x.append(s2)
- x.append([a[k].get('section_desc'),a[k].get('status'),a[k].get('speed'),num,float(s4)])
- print(x)
- num=num+1
- '''
- #time.sleep(0.1)
- #若爬取网格较多,可使用time.sleep(秒数)来避免高德的单秒API调用次数的限制
- except Exception as e:
- pass
- var=1
- while var==1:
- ts=time.time()
- start_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '04:59', '%Y-%m-%d%H:%M')
- end_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + '23:59', '%Y-%m-%d%H:%M')
- now_time = datetime.datetime.now()
- if now_time > start_time and now_time < end_time:
- aaa()
- else:
- continue
- ts1=time.time()
- time.sleep(3600-ts1+ts)
- #将数据结构化存储至规定目录的CSV文件中
- '''
- c = pd.DataFrame(x,columns=['section_desc','status','speed','congestion_distance','x','y'])
- c.to_csv('E:/新建文件夹/600/爬虫.csv',encoding='utf-8-sig')
- '''
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。