Commit c7886e5
Changed files (2)
src
messages
src/messages/database.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-import contextlib
import json
import re
@@ -11,6 +10,7 @@ from pyrogram.types import Message, ReplyParameters
from config import DB
from database import del_db, get_db, set_db
+from messages.parser import parse_msg
from messages.progress import modify_progress
from messages.utils import sender_markdown_to_html
from utils import to_int
@@ -46,16 +46,10 @@ async def save_messages(messages: list[Message | None], key: str, metadata: dict
data = []
media_group_ids = set() # save once
for msg in valid_messages:
- text = ""
- if msg.text:
- text = msg.text
- if msg.caption:
- text = msg.caption
- if hasattr(text, "html"): # DO NOT use markdown, because this format has some bugs
- text = text.html # type: ignore
+ info = parse_msg(msg, silent=True)
# Caution: this format should be consistent with `handle_social_media` function in `handler.py`
# text = re.sub(r"^๐ค\[@.*?\]\(tg://user\?id=\d+\)//", "", text) # remove markdown send_from_user
- text = re.sub(r"^๐ค\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", text) # remove markdown send_from_user
+ text = re.sub(r"^๐ค\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", info["html"]) # remove markdown send_from_user
msg_extra = {"text": text} if text else {}
if msg.media_group_id:
if msg.media_group_id not in media_group_ids:
@@ -63,19 +57,19 @@ async def save_messages(messages: list[Message | None], key: str, metadata: dict
media_group_ids.add(msg.media_group_id)
data.append({"type": "media_group", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
- if msg.video:
+ if info["mtype"] == "video":
logger.trace(f"Saving video message {msg.id}")
data.append({"type": "video", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
- if msg.photo:
+ if info["mtype"] == "photo":
logger.trace(f"Saving photo message {msg.id}")
data.append({"type": "photo", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
- if msg.audio:
+ if info["mtype"] == "audio":
logger.trace(f"Saving audio message {msg.id}")
data.append({"type": "audio", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
- if msg.text:
+ if info["mtype"] == "text":
logger.trace(f"Saving text message {msg.id}")
data.append({"type": "text", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
@@ -133,8 +127,7 @@ async def copy_messages_from_db(client: Client, message: Message, key: str, kv:
results = []
try:
for idx, item in enumerate(sorted(data, key=lambda x: x["mid"])):
- with contextlib.suppress(ValueError):
- cid = int(item["cid"])
+ cid = to_int(item["cid"])
if idx != 0:
reply_parameters = ReplyParameters() # only send as reply of the first message
logger.debug(f"Copying {item['type']} message: ({cid}, {item['mid']}) -> target_chat={target_chat}")
src/messages/parser.py
@@ -31,6 +31,7 @@ def parse_msg(message: Message, *, silent: bool = False, verbose: bool = False)
media_group_id = message.media_group_id if message.media_group_id else 0
is_bot = bool(message.from_user and message.from_user.is_bot)
text = message.text or message.caption or ""
+ html = text.html if hasattr(text, "html") else "" # type: ignore
dt = message.date.replace(tzinfo=ZoneInfo(TZ)) if isinstance(message.date, datetime) else nowdt(TZ)
time = f"{dt:%Y-%m-%d %H:%M:%S}"
@@ -96,6 +97,7 @@ def parse_msg(message: Message, *, silent: bool = False, verbose: bool = False)
"media_group_id": int(media_group_id),
"is_bot": bool(is_bot),
"text": str(text),
+ "html": str(html),
"first_name": str(first_name),
"last_name": str(last_name),
"full_name": str(full_name),