main
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3import email
4import email.policy
5import re
6from email.header import decode_header
7from email.message import EmailMessage
8from io import BytesIO
9
10from glom import glom
11from loguru import logger
12from pyrogram.client import Client
13from pyrogram.types import InputMediaDocument, Message, ReplyParameters
14
15from config import CAPTION_LENGTH, DB, TEXT_LENGTH, TZ
16from database.r2 import set_cf_r2
17from messages.utils import count_without_entities
18from utils import convert_html, convert_md, myself, nowstr, readable_size
19
20
21async def eml2md(client: Client, message: Message):
22 """EML文件转Markdown."""
23 if glom(message, "document.mime_type", default="") != "message/rfc822":
24 return
25 # 确保是SMTP机器人或自己发送的邮件
26 if not (glom(message, "from_user.username", default="") == "smtpbot" or glom(message, "from_user.id", default=0) == (await myself(client)).id):
27 return
28 data: BytesIO = await client.download_media(message, in_memory=True) # type: ignore
29 logger.debug(f"Downloading Email file: {data.name}")
30 msg = email.message_from_binary_file(data, policy=email.policy.default)
31 # 提取邮件元信息
32 subject = decode(msg.get("Subject"))
33 from_ = decode(msg.get("From"))
34 to = decode(msg.get("To"))
35 reply_to = decode(msg.get("Reply-To"))
36 date = decode(msg.get("Date")) or nowstr(TZ)
37 cc = decode(msg.get("Cc"))
38 bcc = decode(msg.get("Bcc"))
39 html, md = extract_body(msg)
40 attachments = extract_attachments(msg)
41 r2_key = f"TTL/90d/{message.chat.id}-{message.id}.html"
42 await set_cf_r2(key=r2_key, data=html, mime_type="text/html")
43 url = f"{DB.CF_R2_PUBLIC_URL}/TTL/90d/{message.chat.id}-{message.id}.html"
44 header = ""
45 if subject:
46 header += f"**[{subject}]({url})**\n"
47 if from_:
48 header += f"**From**: {refine_email(from_)}\n"
49 if to:
50 header += f"**To**: {refine_email(to)}\n"
51 if reply_to:
52 header += f"**Reply-To**: {refine_email(reply_to)}\n"
53 if date:
54 header += f"**Date**: {date}\n"
55 if cc:
56 header += f"**CC**: {refine_email(cc)}\n"
57 if bcc:
58 header += f"**BCC**: {refine_email(bcc)}\n"
59 if attachments:
60 header += f"{attachments}\n"
61
62 async def sent_file(caption: str = ""):
63 with BytesIO(html.encode("utf-8")) as fhtml, BytesIO(md.encode("utf-8")) as fmd:
64 await client.send_media_group(
65 message.chat.id,
66 message_thread_id=message.message_thread_id,
67 media=[
68 InputMediaDocument(fhtml, file_name=f"{subject}.html"),
69 InputMediaDocument(fmd, caption=caption, file_name=f"{subject}.md"),
70 ],
71 reply_parameters=ReplyParameters(message_id=message.id),
72 )
73
74 length = await count_without_entities(header + md)
75 if length <= CAPTION_LENGTH:
76 await sent_file(header + md)
77 elif CAPTION_LENGTH < length <= TEXT_LENGTH:
78 await message.reply_text(header + md, quote=True)
79 await sent_file(header)
80 else:
81 await sent_file(header)
82 await client.mark_chat_unread(message.chat.id)
83
84
85def decode(texts: str | None) -> str:
86 # ruff: noqa: PLW2901
87 if not texts:
88 return ""
89 try:
90 decoded_parts = decode_header(texts)
91 result_parts = []
92 for part, encoding in decoded_parts:
93 if isinstance(part, bytes):
94 if encoding:
95 try:
96 part = part.decode(encoding)
97 except (UnicodeDecodeError, LookupError):
98 part = part.decode("utf-8", errors="replace")
99 else:
100 part = part.decode("utf-8", errors="replace")
101 result_parts.append(str(part))
102 return "".join(result_parts)
103 except Exception:
104 return str(texts)
105
106
107def refine_email(email: str) -> str:
108 return email.replace("<", "(").replace(">", ")")
109
110
111def extract_body(msg: EmailMessage) -> tuple[str, str]:
112 """Extract the email body to HTML & Markdown."""
113 html_part: str | None = None
114 text_part: str | None = None
115
116 if msg.is_multipart():
117 for part in msg.walk():
118 content_type = part.get_content_type()
119 disposition = str(part.get("Content-Disposition", ""))
120
121 if "attachment" in disposition:
122 continue
123
124 if content_type == "text/html" and html_part is None:
125 html_part = part.get_content()
126 elif content_type == "text/plain" and text_part is None:
127 text_part = part.get_content()
128 else:
129 content_type = msg.get_content_type()
130 if content_type == "text/html":
131 html_part = msg.get_content()
132 elif content_type == "text/plain":
133 text_part = msg.get_content()
134
135 if html_part:
136 md = convert_md(html=html_part)
137 return html_part, md
138 if text_part:
139 text_part = re.sub(r"\n\s*\n", "\n\n", text_part).strip()
140 html = convert_html(texts=text_part)
141 return html, text_part
142
143 return "", ""
144
145
146def extract_attachments(msg: EmailMessage) -> str:
147 """List attachments with their filenames and sizes."""
148 attachments = ""
149
150 if not msg.is_multipart():
151 return ""
152
153 for part in msg.walk():
154 disposition = str(part.get("Content-Disposition", ""))
155 if "attachment" not in disposition:
156 continue
157
158 filename = part.get_filename() or "unnamed"
159 payload = part.get_payload(decode=True)
160 size = len(payload) if payload else 0
161 attachments += f"- `{filename}` ({readable_size(size)})\n"
162
163 return f"**Attachments**\n{attachments}" if attachments else ""