赞
踩
今天在工作中遇到爬虫效率问题,在此处记录多进程、多线程测试脚本
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- __author__ = 'Seven'
- from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
- import time
-
-
- def gcd(pair):
- a, b = pair
- low = min(a, b)
- for i in range(low, 0, -1):
- if a % i == 0 and b % i == 0:
- return i
-
-
- numbers = [
- (1963309, 2265973), (1879675, 2493670), (2030677, 3814172),
- (1551645, 2229620), (1988912, 4736670), (2198964, 7876293)
- ]
-
-
- def thread_map_test():
- start_time = time.time()
- with ThreadPoolExecutor(max_workers=4) as pool:
- results = pool.map(gcd, numbers)
- results = list(results)
- end_time = time.time()
- print(f'运行结果:{results}')
- print(f'多线程map运行时长:{end_time - start_time}')
-
-
- def thread_submit_test():
- start_time = time.time()
- results = []
- with ThreadPoolExecutor(max_workers=4) as pool:
- for i in numbers:
- future = pool.submit(gcd, i)
- results.append(future)
- results = [result.result() for result in results]
- end_time = time.time()
- print(f'运行结果:{results}')
- print(f'多线程submit运行时长:{end_time - start_time}')
-
-
- def process_map_test():
- start_time = time.time()
- with ProcessPoolExecutor(max_workers=4) as pool:
- results = pool.map(gcd, numbers)
- results = list(results)
- end_time = time.time()
- print(f'运行结果:{results}')
- print(f'多进程map运行时长:{end_time - start_time}')
-
-
- def process_submit_test():
- start_time = time.time()
- results = []
- with ProcessPoolExecutor(max_workers=4) as pool:
- for i in numbers:
- future = pool.submit(gcd, i)
- results.append(future)
- results = [result.result() for result in results]
- end_time = time.time()
- print(f'运行结果:{results}')
- print(f'多进程submit运行时长:{end_time - start_time}')
-
-
- if __name__ == '__main__':
- thread_map_test()
- thread_submit_test()
- process_map_test()
- process_submit_test()

当多进程/多线程传参时,一个为可变变量,一个为不可变变量,可参照如下代码进行传参:
- with ProcessPoolExecutor(max_workers=4) as pool:
- pool.map(partial(data_crawl, variable_constant=variable_constant), variable_changed)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。