赞
踩
**前言:**昨天接到这个任务,然后搜索了很多资料和博客去看,这些资料链接我会放在本文后面
我以网页邮箱(163邮箱为例)
#-*- encoding: utf-8 -*- import email,sys from imapclient import IMAPClient from bs4 import BeautifulSoup #服务器网址 hostname = 'imap.163.com' #用户名即邮箱账号 username = '邮箱账号' #授权码不是邮箱原密码 passwd = '你的授权码' #链接服务器 server = IMAPClient(hostname, ssl= True) #登陆 try: #登陆账号 server.login(username, passwd) # 上传客户端身份信息 server.id_({"name": "IMAPClient", "version": "2.1.0"}) #导航目录的列表,'INBOX','草稿箱'、'已发送'等 dictList = server.list_folders() # print(dictList) #对收件箱只读 info = server.select_folder('INBOX', readonly = True) except server.Error: print('Could not login') sys.exit(1)
#获取邮件列表 result = server.search() for uid in result: massageList = server.fetch(uid,['BODY[]']) mailBody = massageList[uid][b'BODY[]'] #邮件内容解析最里面那层是按字节来解析邮件主题内容,这个过程生成Message类型 try : #我看其他博主的都是拿到str类型的,如果这个时候拿到邮件的类型是str就用这个 email_content = email.message_from_string(mailBody) except TypeError: #但压力在我这,我没拿到str类型的,拿到的是bytes字节类型的,那就用字节解析吧 email_content = email.message_from_bytes(mailBody) #如果想知道字符集的可以在这先输出一下,查看结果里面的字符集是'utf-8'还是什么 # print(email_content) #标题 subject = email.header.make_header(email.header.decode_header(email_content['SUBJECT'])) #发件人 mail_from = email.header.make_header(email.header.decode_header(email_content['From'])) #收件日期 envlope = (server.fetch(uid,['ENVELOPE']))[uid][b'ENVELOPE'] dates = envlope.date # 获取内容的type编码方式 maintype = email_content.get_content_maintype() if maintype == 'multipart': for part in email_content.get_payload(): #获取邮件中的文本 if part.get_content_maintype() == 'text': #取出正文内容并去掉前后的换行符、空格 mail_content = part.get_payload(decode=True).strip() elif maintype == 'text': mail_content = email_content.get_payload(decode=True).strip() #用对应的字符集去解码 try: #解码显示中文,如果utf-8不行用gbk或者其他 mail_content = mail_content.decode('gbk') except UnicodeDecodeError: try: #拿到的内容是html格式的 mail_content = mail_content.decode('utf-8') except UnicodeDecodeError: print('decode error') sys.exit(1)
#写进txt
#记得写上编码方式为'gb18030',不然写入txt会报错
with open(f'D:\IMAP\{uid}.txt','w+',encoding="gb18030") as f:
f.write(f'From:{mail_from}'+'\n')
f.write(f'Subject:{subject}'+'\n')
f.write(f'Date:{dates}'+'\n')
f.write(f'正文内容:'+'\n')
#用BeautifulSoup库的HTML解析器来解析邮件文本,并去掉多余的换行符
f.write((BeautifulSoup(mail_content,'html.parser').get_text().strip()).replace('\n\n', '')+'\n')
#-*- encoding: utf-8 -*- import email,sys from imapclient import IMAPClient from bs4 import BeautifulSoup #服务器网址 hostname = 'imap.163.com' #用户名即邮箱账号 username = '邮箱账号' #授权码不是邮箱原密码 passwd = '你的授权码' #链接服务器 server = IMAPClient(hostname, ssl= True) #登陆 try: #登陆账号 server.login(username, passwd) # 上传客户端身份信息 server.id_({"name": "IMAPClient", "version": "2.1.0"}) #导航目录的列表,'INBOX','草稿箱'、'已发送'等 dictList = server.list_folders() # print(dictList) #对收件箱只读 info = server.select_folder('INBOX', readonly = True) except server.Error: print('Could not login') sys.exit(1) #获取邮件列表 result = server.search() for uid in result: massageList = server.fetch(uid,['BODY[]']) mailBody = massageList[uid][b'BODY[]'] #邮件内容解析最里面那层是按字节来解析邮件主题内容,这个过程生成Message类型 try : email_content = email.message_from_string(mailBody) except TypeError: email_content = email.message_from_string(str(email.message_from_bytes(mailBody))) # print(email_content) #标题 subject = email.header.make_header(email.header.decode_header(email_content['SUBJECT'])) #发件人 mail_from = email.header.make_header(email.header.decode_header(email_content['From'])) #收件日期 envlope = (server.fetch(uid,['ENVELOPE']))[uid][b'ENVELOPE'] dates = envlope.date # 获取内容的type编码方式 maintype = email_content.get_content_maintype() if maintype == 'multipart': for part in email_content.get_payload(): #获取邮件中的文本 if part.get_content_maintype() == 'text': #下载 mail_content = part.get_payload(decode=True).strip() elif maintype == 'text': mail_content = email_content.get_payload(decode=True).strip() try: #解码显示中文,如果utf-8不行用gbk或者其他 mail_content = mail_content.decode('gbk') except UnicodeDecodeError: try: mail_content = mail_content.decode('utf-8') except UnicodeDecodeError: print('decode error') sys.exit(1) #写进txt with open(f'D:\IMAP\{uid}.txt','w+',encoding="gb18030") as f: f.write(f'From:{mail_from}'+'\n') f.write(f'Subject:{subject}'+'\n') f.write(f'Date:{dates}'+'\n') f.write(f'正文内容:'+'\n') f.write((BeautifulSoup(mail_content,'html.parser').get_text().strip()).replace('\n\n', '')+'\n') # print('From: ', mail_from) # print('Subject: ', subject) # print('Date:',dates) # print('-'*10, 'mail content', '-'*10) # print(mail_content.replace('<br>', '\n')) # print('-'*10, 'mail content', '-'*10) #退出登陆 server.logout()
article/details/114489568)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。