赞
踩
8 self.headers = {
9 “User-Agent”: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
10 “Chrome/45.0.2454.101 Safari/537.36”,
11 ‘Accept-Encoding’: ‘gzip, deflate, sdch’,
12 }
13
14 # 爬取西刺代理的国内高匿代理
15 def get_proxy_nn(self):
16 proxy_list = []
17 res = requests.get(“http://www.xicidaili.com/nn”, headers=self.headers)
18 ip_list = re.findall(‘(\d+.\d+.\d+.\d+)’, res.text)
19 port_list = re.findall(‘(\d+)’, res.text)
20 for ip, port in zip(ip_list, port_list):
21 proxy_list.append(ip + “:” + port)
22 return proxy_list
23
24 # 验证代理是否能用
25 def verify_proxy(self, proxy_list):
26 for proxy in proxy_list:
27 proxies = {
28 “http”: proxy
29 }
30 try:
31 if requests.get(‘http://www.baidu.com’, proxies=proxies, timeout=2).status_code == 200:
32 print(‘success %s’ % proxy)
33 if proxy not in self.proxy_list:
34 self.proxy_list.append(proxy)
35 except:
36 print(‘fail %s’ % proxy)
37
38 # 保存到proxies.txt里
39 def save_proxy(self):
4
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。