赞
踩
之前用python写多进程用multiprocessing库,多线程调用threading库,python3中multiprocessing的多线程比threading的多线程写法简单多,记录下用法。
提:多线程多进程不是用来炫技的技术、工作中提升效率的选择。多数高计算密集性任务用多进程,高IO密集型任务用多线程。
from multiprocessing.dummy import Pool
def test(data):
print (data)
# #----------启动多线程--------#
domainlist = [1,2,3,4]
pool = Pool(50) #配置启动线程数
result = pool.map(test, domainlist) # 参数:多次传递值调用的函数,传递参数的list
from multiprocessing import Pool
def test (data):
print (data)
#----------启动多进程---------#
domainlist = [1,2,3,4]
pool = Pool(processes=10) #启动进程个数
pool.map(test,domainlist) #map函数两个参数,第一个是需要迭代的函数,第二个是需要遍历的参数写成list
pool.close()
pool.join()
#-----结束多进程--------------#
eg:大量的domain需要判断下是否存活,需要尽快判断完成。参考用多线程和多进程写发实现。
import time,json,requests #import threading from multiprocessing.dummy import Pool def req(domain): try: url = 'https://'+domain r = requests.get(url,timeout=2) #print("请求域名"+domain+"返回码:" + str(r.status_code)) return r.status_code except Exception as e: #print(e) return 'noserver' def writefile(filename,data): wf = open(filename, 'a+') # python3 print(data, file=wf) wf.close() def checkhttp(domain): data = req(domain) if data == 200: print(domain + "-----is alive ") writefile('alive.log', domain) elif data == 301 or data == 302: print('30x跳转:' + domain) elif data == 'noserver': writefile('notsever.log', domain) if __name__ == "__main__": print('start_time:'+str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) f1 = "/root/test/alldomain.txt" domainlist = [] with open(f1,'r') as f: for domain in f.readlines(): domain = domain.split()[0] domainlist.append(domain) # #----------启动多线程--------# pool = Pool(10) #配置启动线程数 result = pool.map(checkhttp, domainlist) # 参数:多次传递值调用的函数,传递参数的list print('end_time:' + str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
启动后验证是否启动成功:top,pstree命令都可 (多线程启动10个线程,实际存在13个,3个是主进程调度使用的线程)
import time,json,requests from multiprocessing import Pool def req(domain,timeout=2): try: url = 'https://'+domain r = requests.get(url) print("请求域名"+domain+"返回码:" + str(r.status_code)) return r.status_code except Exception as e: pass return 'nohttpserver' def writefile(filename,data): wf = open(filename, 'a+') # python3 print(data, file=wf) wf.close() def checkhttp(domain): data = req(domain) if data == 200: writefile('Ishttpserver.log',domain) elif data == 301 or data == 302: print('30x跳转:' + domain) #待二次判断 elif data == 'nohttpserver': writefile('Nserver.log', domain) if __name__ == "__main__": f1 = "/root/test/alldomain.txt" domainlist = [] with open(f1,'r') as f: for domain in f.readlines(): domain = domain.split()[0] domainlist.append(domain) #----------启动多进程---------# pool = Pool(processes=10) #启动进程个数 pool.map(checkhttp,domainlist) #map函数两个参数,第一个是需要迭代的函数,第二个是需要遍历的参数写成list pool.close() pool.join() #-----结束多进程--------------#
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。