赞
踩
import urllib.request import requests import re from bs4 import BeautifulSoup as bs from urllib.request import quote import ast import pickle def urlopen(url): #登陆信息 head = {} head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' head['Accept-Language'] = 'zh-CN,zh;q=0.9' head['Cache-Control'] = 'no-cache' head['Connection'] = 'keep-alive' head['Content-Length']='97' head['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540774055' head['Host']='business.hcp66.com' head['Pragma']='no-cache' head['Referer']='http://business.hcp66.com/member/index/login.html' head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' head['X-Requested-With'] = 'XMLHttpRequest' req = urllib.request.Request(url,headers = head) data = {} data['gotourl']='' data['member[username]'] = 'hcp.com' data['member[password]'] = '1456' data['member[code]']='' data = urllib.parse.urlencode(data).encode('utf-8') html = urllib.request.urlopen(req,data) html = html.read() return html def chaurlopen(Length,city1,city2,city3): #查询页的信息 url = 'http://business.hcp66.com/member/index/shop.html' head = {} head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' head['Accept-Language'] = 'zh-CN,zh;q=0.9' head['Cache-Control'] = 'no-cache' head['Connection'] = 'keep-alive' head['Content-Length']=Length head['Content-Type'] = 'application/x-www-form-urlencoded' head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540768648' head['Host']='business.hcp66.com' head['Pragma']='no-cache' head['Referer']='http://business.hcp66.com/member/index/shop.html' head['Upgrade-Insecure-Requests']='1' head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' req = urllib.request.Request(url,headers = head) data ={} data['search[city1]'] = city1 data['search[city2]'] = city2 data['search[city3]'] = city3 data['b1']='查询' data = urllib.parse.urlencode(data).encode('utf-8') html = urllib.request.urlopen(req,data) html = html.read() return html def length_(city1,city2,city3): #这个是计算length值 data ={} data['search[city1]'] = city1 data['search[city2]'] = city2 data['search[city3]'] = city3 data['b1']='查询' req = requests.post('http://httpbin.org/post', data) length = len(data.keys()) * 2 - 1 total = ''.join(list(data.keys()) + list(data.values())) length += len(total) length = req.json()['headers']['Content-Length'] return length def xia(): url= 'http://business.hcp66.com/member/index/login.html' html = urlopen(url) #先登陆 html = html.decode('utf-8') htmldic = ast.literal_eval(html) #登陆成功把信息转成字典输入 print(htmldic) dic = {'北京市':'1','天津市':'2','河北省':'3','山西省':'4','内蒙古自治区':'5','辽宁省':'6','吉林省':'7','黑龙江省':'8','上海市':'9','江苏省':'10','浙江省':'11','安徽省':'12','福建省':'13','江西省':'14','山东省':'15','河南省':'16','湖北省':'17','湖南省':'18','广东省':'19','广西壮族自治区':'20','海南省':'21','重庆市':'22','四川省':'23','贵州省':'24','云南省':'25','西藏自治区':'26','陕西省':'28','甘肃省':'28','青海省':'29','宁夏回族自治区':'30','新疆维吾尔自治区':'31','台湾省':'32','香港特别行政区':'33','澳门特别行政区':'34',} #这个是后面需要提交的data file = open('name.pkl','rb') #这个是把全国地区信息保存起来的 dict_name = pickle.load(file) #打开这个字典然后赋值给dict_name dict_qu = input('请输入省 市 区(县)空格隔开:') #这里接受输入的信息 dict_qu = dict_qu.split() # 把输入的信息变成一个列表 print(dict_qu) city1 = dic[dict_qu[0]] #提取第一个元素 并且在全国地区的字典里找到。 再把字典的值传给了city1 city2 = dict_name[dict_qu[1]] if len(dict_qu)==2: #判断输入的信息如果没有输入县或区 city3 默认等于0 city3 = '0' else: #如果有就查字典 赋值给city3 city3 = dict_name[dict_qu[2]] Length = length_(city1,city2,city3) #这个地方就是查length 值 print(city1) print(city2) print(city3) print(Length) cont = chaurlopen(Length,city1,city2,city3) #现在所有表单数据准备好了就可以访问查询了 cont = cont.decode('utf-8') cont = bs(cont,'lxml') #得到的结果 list1 = cont.find_all('div',style="padding-top:50px;padding-left:15px;") list1 = list1[0] content = list1.find_all('td', height="30") if len(content)==0: print("这个地区暂时无安装网点") c=0 for i in content: i = i.text i = i.strip() if len(i)>5: cha = i.find('通用记录仪') if cha==-1: print(i) c=c+1 if c==2: print('\n') c = 0 x =0 while x == 0: xia()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。