赞
踩
map的基本使用:
map函数一手包办了序列操作,参数传递和结果保存等一系列的操作。
from multiprocessing.dummy import Pool
poop = Pool(4) # 4代表电脑是多少核的
results = pool.map(爬取函数,网址列表)
from multiprocessing.dummy import Pool as ThreadPool import requests import time kv = {'user-agent':'Mozilla/5.0'} def getsource(url): html = requests.get(url,headers=kv) urls = [] for i in range(0,41): i = i*50 newpage = 'https://tieba.baidu.com/f?kw=读书&ie=utf-8&pn=' + str(i) urls.append(newpage) # 单线程爬取 time1 = time.time() for each in urls: print(each) getsource(each) time2 = time.time() print('单线程耗时: ' + str(time2-time1)) # 多线程爬取 pool = ThreadPool(8) time3 = time.time() results = pool.map(getsource, urls) pool.close() pool.join() time4 = time.time() print('多线程所消耗时间:' + str(time4 - time3))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。