赞
踩
for循环和多线程 + selenium
实例一
for循环
# -*- coding: utf-8 -*-
"""
Datetime: 2019/6/22
Author: Zhang Yafei
Description:
"""
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor
import functools
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-gpu')
def timeit(func):
"""
装饰器: 判断函数执行时间
:param func:
:return:
"""
@functools.wraps(func)
def inner(*args, **kwargs):
start = time.time()
ret = func(*args, **kwargs)
end = time.time() - start
if end < 60:
print(f'花费时间:\t{round(end, 2)}秒')
else:
min, sec = divmod(end, 60)
print(f'花费时间\t{round(min)}分\t{round(sec, 2)}秒')
return ret
return inner
class PolicyUrlDownload(object):
""" 政策数据下载 """
def __init__(self, url, pages_num, output_file, a_xpath, headless: bool=True):
self.url_list = [url.format(page) for page in range(1, pages_num+1)]
self.output_file = output_file
self.a_xpath = a_xpath
if headless:
self.driver = webdriver.Chrome(options=chrome_options)
else:
self.driver = webdriver.Chrome()
def start(self, page, url):
with open(self.output_file, mode='a', encoding='utf-8') as file:
print(f"make request to {url}")
self.driver.get(url)
titles = self.driver.find_elements_by_xpath(self.a_xpath)
for title in titles:
href = title.get_attribute('href')
file.write(f'{page}\t{href}\n')
print(f'{url} download completed')
def run(self):
for page, url in enumerate(self.url_list):
self.start(page+1, url)
self.driver.close()
@timeit
def main(setting):
policy_data = PolicyUrlDownload(**setting)
policy_data.run()
if __name__ == '__main__':
start_time = time.time()
print('######################## 开始下载 #########################')
# 多配置页面地址下载
settings = [
{
'output_file': '药品供应保障综合的管理.txt',
&
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。