当前位置:   article > 正文

python解析提取.eml邮件内容及附件_python读取邮件eml里面的附件

python读取邮件eml里面的附件
import re
import os
import email
from email.header import decode_header
from email.utils import parsedate_to_datetime


def parse_eml(eml_fp, attr_dir):
    """
    eml文件解析
    :params eml_fp: eml文件路径
    :params attr_dir: 附件保存目录
    """
    if not os.path.exists(attr_dir):
        os.makedirs(attr_dir)

    # 读取eml文件
    with open(eml_fp, "r") as file:
        eml_content = file.read()
    # 转为email对象
    msg = email.message_from_string(eml_content)

    # 邮件主题
    subject_bytes, subject_encode = decode_header(msg["Subject"])[0]
    if subject_encode:
        subject = subject_bytes.decode(subject_encode)
    else:
        subject = subject_bytes
    print("主题:", subject)

    # 邮件发件人
    from_ip = re.search("<(.*)>", msg["from"]).group(1)
    print("发件人邮箱:", from_ip)
    from_name = decode_header(msg["from"].split("<")[0].strip())
    if from_name:
        if from_name[0] and from_name[0][1]:
            from_n = from_name[0][0].decode(from_name[0][1])
        else:
            from_n = from_name[0][0]
    print("发件人名称:", from_n)

    # 邮件时间
    received_date = parsedate_to_datetime(msg["date"])
    print("接收时间:", received_date)

    # 邮件正文及附件
    for par in msg.walk():
        if not par.is_multipart():  # 判断是否为multipart,里面的数据不需要
            name = par.get_param("name")  # 获取附件的文件名
            if name:  
                # 附件
                fname = decode_header(name)[0]
                if fname[1]:
                    attr_name = fname[0].decode(fname[1])
                else:
                    attr_name = fname[0]
                print("附件名:", attr_name)
                # 解码附件内容
                attr_data = par.get_payload(decode=True)
                attr_fp = os.path.join(attr_dir, attr_name)
                with open(attr_fp, 'wb') as f_write:
                    f_write.write(attr_data)
            else:  
                # 正文
                text_char = par.get_content_charset()
                if "text/plain" in par["content-type"]:  # 文本正文
                    body = par.get_payload(decode=True).decode(text_char)
                    print("邮件正文:", body)
                else:  # html格式正文
                    html_body = par.get_payload(decode=True).decode(text_char)
                    print("HTML正文:", html_body)
            print("-" * 60)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号