当前位置: article > 正文

python爬虫小测试_爬虫在线检测

作者：Li_阴宅 | 2024-08-20 02:58:08

踩

爬虫在线检测

功能

实现数据抓取；定时发送邮件

1、数据抓取

新建work.py文件

#!/usr/bin/python3
# pip3 install requests pandas lxml xlsxwriter openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple 
import re
import requests
import random,time
from lxml import html
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
import threading
from queue import Queue
import time
#from func_timeout import FunctionTimedOut, func_timeout
import pandas as pd


gQueue = Queue()
headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
               "Accept-Encoding": "gzip, deflate",
               "Accept-Language": "en-US,en;q=0.5",
               "Connection": "keep-alive",
               "Host": "blog.csdn.net",
               "Upgrade-Insecure-Requests": "1",
               "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0"}


class MyLog:
    def __init__(self):
        import logging,os
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.DEBUG)  # Log等级总开关
        #rq = time.strftime('%Y%m%d', time.localtime(time.time()))
        logdir = "/var/log/my_log"
        if not os.path.exists(logdir):
            os.mkdir(logdir)
        logfile = logdir + '/message' + '.log'
        fh = logging.FileHandler(logfile, mode='a')
        fh.setLevel(logging.INFO)
        formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)
    def info(self,mes):
        self.logger.info(mes)
        return
    def debug(self,mes):
        self.logger.debug(mes)
        return
    def warning(self,mes):
        self.logger.warning(mes)
        return
    def error(self,mes):
        self.logger.error(mes)
        return
log = MyLog()
log.info('started')

def get_page_count():
    url = "https://xyk.cebbank.com/jfmall/search?keywords="
    res = requests.get(url, headers, timeout=10)
    res.decoding = 'gbk'
    log.info(res.text)
    result = re.compile('<a href="javascript:void(0)" class="next" rel="next">(.*?)</a>',re.S)
    #result = re.compile('<a href=".*?" class="ep">(.*?)</a>',re.S)
    page = re.findall(result, res.text)
    log.info(page)

def get_urls(page):
    global dict_score
    requests.DEFAULT_RETRIES = 15  # 增加重试连接次数
    s = requests.session()
    s.keep_alive = False  # 关闭多余连接
    url0 = "https://xyk.cebbank.com/jfmall/search?keywords=&pageNo=" + str(page)
    req0 = requests.get(url0, headers, timeout=60)
    req0.decoding = 'gbk'
    #result = re.compile(r'class="t1 ">.*? <a target="_blank" title=".*?" href="(.*?)".*? <span class="t2">',re.S)#无re.S只在每一行内匹配
    result1 = re.compile('<div class="main-item-list-title">.*?<a href="(.*?)" class="text-hover-black js-filter-title"   rel="noopener noreferrer"',re.S)
    result2 = re.compile('<span class="text-color-red text-font-size-18 text-font-weight-bold">(.*?)</span>',re.S)

    url = re.findall(result1, req0.text)
    jifen = re.findall(result2, req0.text)
    urls  = ["https://xyk.cebbank.com" + u1 for u1 in  url]
    dict_score = {}
    for i in range(0,len(urls)):
        dict_score[urls[i]] = jifen[i]
    return urls

def deal_size_color(data):
    color = ''
    size = ''
    if len(data) == 0:
        color,size = '无','无'
    if  len(data) == 1:
        if '色' in data[0]:
            color = data[0]
            size = '无'
        else:
            size = data[0]
            color = '无'
    if len(data) == 2:
        if '色' in data[0]:
            color = data[0]
            size = data[1]
        else:
            size = data[0]
            if '色' in data[1]:
                color = data[1]
            else:
                color = '无'
    if ',' in color:
        color = color.replace(',',';')
    if ',' in size:
        size = size.replace(',',';')
    if '"' in size:
        size = size.replace('"','')
    return [color,size]

def get_data(url):
    try:
        global dict_score
        requests.DEFAULT_RETRIES = 15  # 增加重试连接次数
        s = requests.session()
        s.keep_alive = False  # 关闭多余连接
        res = requests.get(url, headers, timeout=60)
        res.encoding = 'utf-8'
        t1 = html.fromstring(res.text)
        name = t1.xpath('//div[@class="product-detail-content-title js-itemId"]/text()')[0].strip()
        duihuan = t1.xpath('//div[@class="text-color-red text-font-weight-bold"]/text()')[0].strip()
        score = dict_score[url]
        #color = t1.xpath('//span[@class="meta-title"]/text()')[0].strip()
        size_col = t1.xpath('//span[contains(@class,"meta-title")]/text()')#[0].strip()
        sc = deal_size_color(data=size_col)
        size = sc[1].strip()
        color = sc[0].strip()
        get_style = t1.xpath('//span[@class="exchangeWay"]/text()')[0].strip()
        categorys = t1.xpath('//a[@class="js-category-select"]/text()')
        tt = [i.strip() for i in categorys if i.strip()]
        category = tt[3]
        gongying = t1.xpath('//div[@class="real-information"]/span/text()')
        shop = gongying[1]
        shop_call = gongying[3]
        shop_time = gongying[5]
        content = str(name) + ',' + str(score) + ',' + str(color) + ',' + str(size) + ',' + str(get_style) + ',' + str(category) + ',' + str(duihuan) + ',' \
                    + str(shop) + ',' + str(shop_call) + ',' + str(shop_time) + ',' + str(url) + '\n'
        return content
    except Exception as e:
        log.info(e)
        log.info("##################this url is a no response: %s" % url)

def get_data_all(pages):
    pages = int(pages)
    for page in range(1,pages + 1):
        log.info('正在获取第%s页商品...' %page)
        urls_one = get_urls(page)
        log.info("该页所有商品URL: %s" % urls_one)
        log.info("正在全力工作中......")
        count = 0
        if not urls_one:
            continue
        try:
            for i in urls_one:
                content = get_data(i)
                gQueue.put(content)
                #count += 1
                #today_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
                #log.info(f'{today_time}###第{count}条###',content)
                time.sleep(3.4)
                #file.write(content)
                #log.info(content)
        except Exception as e:
            log.info(e)
            continue
    log.info('成功取到所有数据.')

def save_data():
    count = 0
    title = '商品名,兑换积分,商品规格,商品颜色,购买方式,分类,兑换,供货商名,供货商电话,供货商工作时间,商品链接\n'
    times = time.time()
    local_time = time.localtime(times)
    today = time.strftime("%Y-%m-%d",local_time)
    today_time = time.strftime("%Y-%m-%d %H:%M:%S",local_time)
    file_name = '/root/py/work-' + today + '.csv'
    with open(file_name, 'w', encoding='gbk') as file:
        file.write(title)
    while True:
        time.sleep(0.1)
        if not gQueue.empty():
            msg = gQueue.get()
            if msg == None:
                continue
            count = count + 1
            today_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
            log.info(f'%s###第%s条###%s' % (today_time,count,msg))
            with open(file_name, 'a', encoding='gbk') as file:
                file.write(msg)

def send_mail():
    # 发信方的信息：发信邮箱，QQ 邮箱授权码
    # 授权码password可以在qq邮箱网站申请：设置->账户->开启服务：POP3/SMTP服务 (如何使用 Foxmail 等软件收发邮件？)选择开启\即可生成授权码
    from_addr = '......@qq.com'
    password = '......'
    # 收信方邮箱
    #to_addr_qq = '......@qq.com'
    to_addr_qq = '......@qq.com'
    to_addr = '......@nooce.cn'
    # 发信服务器
    smtp_server = 'smtp.qq.com'
    html_msg = """
    <p>csv文件</p>
    """

    # 创建一个带附件的实例msg
    msg = MIMEMultipart()
    msg['From'] = Header('Q')  # 发送者
    msg['To'] = Header('珍')  #
    times = time.time()
    local_time = time.localtime(times)
    today = time.strftime("%Y-%m-%d",local_time)
    today_title = time.strftime("%Y年%m月%d日，",local_time)
    subject = today_title + '来自Q sir的邮件'
    msg['Subject'] = Header(subject, 'utf-8')  # 邮件主题
    # 邮件正文内容
    msg.attach(MIMEText(html_msg, 'html', 'utf-8'))
    # 构造附件1，传送当前目录下的 test1.txt 文件
    context = '/root/py/work-' + today + '.xlsx'
    att1 = MIMEText(open(context, 'rb').read(), 'base64', 'utf-8')
    att1["Content-Type"] = 'application/octet-stream'
    # 这里的filename可以任意写，写什么名字，邮件中显示什么名字
    att1["Content-Disposition"] = 'attachment; filename="zhuzhu-2022.xlsx"'
    msg.attach(att1)
    try:
        smtpobj = smtplib.SMTP_SSL(smtp_server)
        smtpobj.connect(smtp_server, 465)    # 建立连接--qq邮箱服务和端口号
        smtpobj.login(from_addr, password)   # 登录--发送者账号和口令
        smtpobj.sendmail(from_addr, to_addr, msg.as_string())
        smtpobj.sendmail(from_addr, to_addr_qq, msg.as_string())
        log.info("给小可爱的邮件已经成功发送！")
    except smtplib.SMTPException:
        log.info("无法发送邮件哦")
    finally:
        # 关闭服务器
        smtpobj.quit()

def csv_excel():
    try:
        times = time.time()
        local_time = time.localtime(times)
        today = time.strftime("%Y-%m-%d",local_time)
        #filename = '/root/py/work-' + today + '.csv'
        #csv_file=pd.read_csv(filename, low_memory=False, encoding='gbk')
        #csv_file.to_excel('/root/py/work-' + today + '.xlsx', index=False, encoding='gbk')
        xlsFilepath = '/root/py/work-' + today + '.xlsx'
        csv_path = '/root/py/work-' + today + '.csv'
        my_dataframe = pd.read_csv(csv_path, low_memory=False, encoding='gbk')
        writer = pd.ExcelWriter(xlsFilepath, engine='xlsxwriter')
        #写excel文件使用pandas to_excel
        my_dataframe.to_excel(writer, startrow = 1, sheet_name='Sheet1', index=False)
        workbook = writer.book
        worksheet = writer.sheets['Sheet1']
        #遍历每一列并设置width ==该列的最大长度。填充长度也增加了2。
        for i, col in enumerate(my_dataframe.columns):
            # 求列I的长度
            column_len = my_dataframe[col].astype(str).str.len().max()
            # 如果列标题较大，则设置长度
            # 大于最大列值长度
            column_len = max(column_len, len(col)) + 2
            # 设置列的长度
            worksheet.set_column(i, i, column_len)
        writer.save()
        log.info("csv to excel success")
        return 0
    except Exception as e:
        log.info('csv to excel failed,reason is%s' % e)
        return -1

def main():
    try:
        pages = 15
        #pages = func_timeout(10, lambda: input('请输入总页数（要输入整数哦）:'))
    except ValueError as e:
        log.info('不和你玩了！')
        return
    #except FunctionTimedOut:
    #    pages = 10
    #    log.info('输入超时，默认获取10页数据:')
    t1 = threading.Thread(target=get_data_all,args=(pages,))
    t2 = threading.Thread(target=save_data)
    t1.setDaemon(True)
    t2.setDaemon(True)
    t1.start()
    t2.start()
    while True:
        time.sleep(10)
        if t1.isAlive():
            pass
        else:
            exc = csv_excel()
            if exc != 0:
                return
            log.info("####准备发送邮件啦####")
            time.sleep(25)
            send_mail()
            log.info("####任务结束####")
            break
if __name__ == '__main__':
    main()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

2、使用linux的crontab执行定时任务

crontab -e

05 09 * * * /usr/bin/python3 /root/py/work.py &
30 13 * * 1 /usr/bin/python3 /root/py/work.py &
1
2

3、使用httpx异步调用任务

#!/usr/bin/python3
import re
import httpx
import asyncio,aiohttp
import random,time
from lxml import html
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
#import threading
from queue import Queue
import time,os
import pandas as pd
from func_timeout import FunctionTimedOut, func_timeout


gQueue = Queue()

headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
               "Accept-Encoding": "gzip, deflate",
               "Accept-Language": "en-US,en;q=0.5",
               "Connection": "keep-alive",
               "Host": "xyk.cebbank.com",
               "Upgrade-Insecure-Requests": "1",
               "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
               }

dict_score = {}
url_res_dict = {}


def send_mail():
    # 发信方的信息：发信邮箱，QQ 邮箱授权码
    from_addr = '@qq.com'
    password = ''
    # 收信方邮箱
    #to_addr_qq = '@qq.com'
    to_addr_qq = '@qq.com'
    to_addr = '@nooce.cn'
    # 发信服务器
    smtp_server = 'smtp.qq.com'
    html_msg = """
    <p>csv文件</p>
    """

    # 创建一个带附件的实例msg
    msg = MIMEMultipart()
    msg['From'] = Header('Q')  # 发送者
    msg['To'] = Header('珍')  #
    times = time.time()
    local_time = time.localtime(times)
    today = time.strftime("%Y-%m-%d",local_time)
    today_title = time.strftime("%Y-%m-%d:",local_time)
    subject = today_title + '来自Q sir的邮件'
    msg['Subject'] = Header(subject, 'utf-8')  # 邮件主题
    # 邮件正文内容
    msg.attach(MIMEText(html_msg, 'html', 'utf-8'))
    # 构造附件1，传送当前目录下的 test1.txt 文件
    context = 'work-' + today + '.xlsx'
    att1 = MIMEText(open(context, 'rb').read(), 'base64', 'utf-8')
    att1["Content-Type"] = 'application/octet-stream'
    # 这里的filename可以任意写，写什么名字，邮件中显示什么名字
    att1["Content-Disposition"] = 'attachment; filename="zhuzhu-2022.xlsx"'
    msg.attach(att1)
    try:
        smtpobj = smtplib.SMTP_SSL(smtp_server)
        smtpobj.connect(smtp_server, 465)    # 建立连接--qq邮箱服务和端口号
        smtpobj.login(from_addr, password)   # 登录--发送者账号和口令
        smtpobj.sendmail(from_addr, to_addr, msg.as_string())
        smtpobj.sendmail(from_addr, to_addr_qq, msg.as_string())
        print("给小可爱的邮件已经成功发送！")
    except smtplib.SMTPException:
        print("无法发送邮件哦")
    finally:
        # 关闭服务器
        smtpobj.quit()
        
def get_urls(page):
    global dict_score
    url0 = "https://xyk.cebbank.com/jfmall/search?keywords=&pageNo=" + str(page)
    with httpx.Client() as client:
        req0 = client.get(url0,headers=headers,timeout=30)
    req0.decoding = 'gbk'
    #result = re.compile(r'class="t1 ">.*? <a target="_blank" title=".*?" href="(.*?)".*? <span class="t2">',re.S)#无re.S只在每一行内匹配
    result1 = re.compile('<div class="main-item-list-title">.*?<a href="(.*?)" class="text-hover-black js-filter-title"   rel="noopener noreferrer"',re.S)
    result2 = re.compile('<span class="text-color-red text-font-size-18 text-font-weight-bold">(.*?)</span>',re.S)

    url = re.findall(result1, req0.text)
    jifen = re.findall(result2, req0.text)
    urls  = ["https://xyk.cebbank.com" + u1 for u1 in  url]
    if 'This is 403 error page' in req0.text:
        print('have a 403 error,function not use')
        return
    for i in range(0,len(urls)):
        dict_score[urls[i]] = jifen[i]
    return urls

def deal_size_color(data):
    color = ''
    size = ''
    if len(data) == 0:
        color,size = '无','无'
    if	len(data) == 1:
        if '色' in data[0]:
            color = data[0]
            size = '无'
        else:
            size = data[0]
            color = '无'
    if len(data) == 2:
        if '色' in data[0]:
            color = data[0]
            size = data[1]
        else:
            size = data[0]
            if '色' in data[1]:
                color = data[1]
            else:
                color = '无'
    if ',' in color:
        color = color.replace(',',';')
    if ',' in size:
        size = size.replace(',',';')
    if '"' in size:
        size = size.replace('"','')
    return [color,size]

async def get_data(url):
        global gQueue
        global dict_score
        global count
        global url_res_dict
        count +=  1
        try:
            #with httpx.Client() as client:
            async with asyncio.Semaphore(500):
                async with httpx.AsyncClient() as client:
                    res = await client.get(url,headers=headers,timeout=20)
                    url_res_dict[res] = url
                    gQueue.put(res)
        except Exception as e:
            print('超时数据自动跳过.')
        '''
        async with asyncio.Semaphore(10):
            async with aiohttp.ClientSession() as session:
                #res = await client.get(url,headers=headers,timeout=30)
                async with session.get(url,headers=headers,timeout=30) as response:
                    res = await response.read()
                    url_res_dict[res] = url
                    gQueue.put(res)
        '''
def save_csv():
    global url_res_dict,dict_score
    while not gQueue.empty():
        try:     
            res = gQueue.get()
            url = url_res_dict[res]
            # aiohttp
            #res = res.decode('UTF-8')
            #t1 = html.fromstring(res)
            # httpx
            
            res.encoding = 'utf-8'
            t1 = html.fromstring(res.text)
            
            name = t1.xpath('//div[@class="product-detail-content-title js-itemId"]/text()')[0].strip()
            duihuan = t1.xpath('//div[@class="text-color-red text-font-weight-bold"]/text()')[0].strip()
            score = dict_score[url]
            #color = t1.xpath('//span[@class="meta-title"]/text()')[0].strip()
            size_col = t1.xpath('//span[contains(@class,"meta-title")]/text()')#[0].strip()
            sc = deal_size_color(data=size_col)
            size = sc[1].strip()
            color = sc[0].strip()
            get_style = t1.xpath('//span[@class="exchangeWay"]/text()')[0].strip()
            categorys = t1.xpath('//a[@class="js-category-select"]/text()')
            tt = [i.strip() for i in categorys if i.strip()]
            category = tt[3]
            gongying = t1.xpath('//div[@class="real-information"]/span/text()')
            shop = gongying[1]
            shop_call = gongying[3]
            shop_time = gongying[5]
            content = str(name) + ',' + str(score) + ',' + str(color) + ',' + str(size) + ',' + str(get_style) + ',' + str(category) + ',' + str(duihuan) + ',' \
                        + str(shop) + ',' + str(shop_call) + ',' + str(shop_time) + ',' + str(url) + '\n'
            local_time = time.localtime(time.time())
            today = time.strftime("%Y-%m-%d",local_time)
            today_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
            #print(content)
            if content == None:
                return
            file_name = 'work-' + today + '.csv'
            with open(file_name, 'a', encoding='gbk') as file:
                file.write(content)
        except Exception as e:
            print(e)

def csv_excel():
    print("开始转换成excel数据")
    try:
        times = time.time()
        local_time = time.localtime(times)
        today = time.strftime("%Y-%m-%d",local_time)
        xlsFilepath = 'work-' + today + '.xlsx'
        csv_path = 'work-' + today + '.csv'
        my_dataframe = pd.read_csv(csv_path, low_memory=False, encoding='gbk')
        #print(my_dataframe['兑换积分'].sort_values())
        if not len(my_dataframe):
            print("not data")
            return -1
        my_dataframe = my_dataframe.sort_values(by='兑换积分')
        writer = pd.ExcelWriter(xlsFilepath, engine='xlsxwriter')
        #写excel文件使用pandas to_excel
        my_dataframe.to_excel(writer, startrow = 1, sheet_name='Sheet1', index=False)
        workbook = writer.book
        worksheet = writer.sheets['Sheet1']
        #遍历每一列并设置width ==该列的最大长度。填充长度也增加了2。
        for i, col in enumerate(my_dataframe.columns):
            # 求列I的长度
            column_len = my_dataframe[col].astype(str).str.len().max()
            # 如果列标题较大，则设置长度
            # 大于最大列值长度
            column_len = max(column_len, len(col)) + 2
            # 设置列的长度
            worksheet.set_column(i, i, column_len)
        writer.save()
        print("转换成excel表格成功。")
        #return 0
    except Exception as e:
        print('转换成excel表格失败，原因 is%s' % e)
        return -1
    else:
        
        times = time.time()
        local_time = time.localtime(times)
        today = time.strftime("%Y-%m-%d",local_time)
        path = 'work-' + today + '.csv'
        if os.path.exists(path):
            os.remove(path)
        return 0



def get_tasks():
    
    #a = input("请输入总页数")
    
    try:
        pages = func_timeout(15, lambda: input('请输入需要的数据总页数，默认每页20条数据（要输入整数哦）:'))
        mail = func_timeout(15, lambda: input('是否发送邮件，请输入"yes" or "no":'))
    except FunctionTimedOut:
        pages = 10
        mail = 'no'
        print('输入超时，默认获取10页数据，不发邮件哦')
    print("开始获取数据了哦，默认按照兑换积分升序排序。")
    global count
    #pages = 15
    count = 0
    title = '商品名,兑换积分,商品规格,商品颜色,购买方式,分类,兑换,供货商名,供货商电话,供货商工作时间,商品链接\n'
    times = time.time()
    local_time = time.localtime(times)
    today1 = time.strftime("%Y-%m-%d",local_time)
    today_time = time.strftime("%Y-%m-%d %H:%M:%S",local_time)
    file_name = 'work-' + today1 + '.csv'
    with open(file_name, 'w', encoding='gbk') as file:
        file.write(title)
    pages = int(pages)
    urls_all = []
    for page in range(1,pages + 1):
        print('正在获取第%s页商品...' %page)
        time.sleep(0.5)
        urls_all.extend(get_urls(page))
    print("所有商品URL: %s" % len(urls_all))
    return (urls_all,mail)

if __name__ == '__main__':
    result = get_tasks()
    if len(result[0]) > 500:
        a,b = len(result[0]),500
        #由于windows最大并发数509,此处分割最大请求为500
        res = lambda a,b:[(i*b,i*b+b) for i in range(0,int(a/b))]
        L1 = res(a,b)
        L1.append((L1[-1][-1],a))
        print(f"{L1}--由于数据条数为：{len(result[0])}条，需要分{len(L1)}次运行")
    start = time.time()
    loop = asyncio.get_event_loop()
    print("开始进行并发请求中...")
    # 判断是否需要多次使用loop处理并发
    if len(result[0]) > 500:
        for i in L1:
            tasks=[
                loop.create_task(get_data(i))
                for i in result[0][i[0]:i[1]]
                ]
            loop.run_until_complete(asyncio.wait(tasks))
    else:
        tasks=[
                loop.create_task(get_data(i))
                for i in result[0]
                ]
        loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    save_csv()
    end = time.time()
    print(f"执行完成，共耗时: {end - start}秒")
    csv_excel()
    if result[1] == 'yes' or result[1] == 'y':
        send_mail()
        print("邮件发送成功。")
    else:
        print("用户取消发送邮件邮件。")
    
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/Li_阴宅/article/detail/1004841