赞
踩
- import time
- from selenium import webdriver
-
- # 使用代理
- options = webdriver.ChromeOptions()
-
- options.add_argument("--proxy-server=http://101.37.79.125:3128")
- driver = webdriver.Chrome(chrome_options=options)
- driver.maximize_window()
-
- driver.get('url')
python 爬取代理服务器 保存本地(本人亲测,可用)
- import requests
- from bs4 import BeautifulSoup
- import random
-
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'}
-
- def xici_ip(page):
- for num_page in range(1,page+1):
- url_part = "http://www.xicidaili.com/wn/" # 爬取西刺代理的IP,此处选的是国内https
- url = url_part + str(num_page) # 构建爬取的页面URL
- r = requests.get(url, headers=headers)
- if r.status_code == 200:
- soup = BeautifulSoup(r.text,'lxml')
- trs = soup.find_all('tr')
- for i in range(1,len(trs)):
- tr = trs[i]
- tds = tr.find_all('td')
- ip_item = tds[1].text + ':' + tds[2].text
- # print('抓取第'+ str(page) + '页第' + str(i) +'个:' + ip_item)
- with open(r'D:\ip.txt', 'a', encoding='utf-8') as f:
- f.writelines(ip_item + '\n')
- # time.sleep(1)
- return ('存储成功')
-
- def get_ip():
- with open(r'D:\ip.txt', 'r', encoding='utf-8') as f:
- lines = f.readlines()
- return random.choice(lines)
-
- def check_ip():
- proxies = {'HTTPS': 'HTTPS://' + get_ip().replace('\n', '')}
- try:
- r = requests.get('http://httpbin.org/ip', headers=headers, proxies=proxies, timeout=10)
- if r.status_code == 200:
- return proxies
- except Exception as e:
- print(e)
-
- def main():
- xici_ip(1)
- try:
- return check_ip()
- except Exception as e:
- print(e)
- check_ip()
-
- if __name__ == '__main__':
- main()

赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
赞
踩
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。