Commit 74c0eb9

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-01-25 10:28:38
feat: add `/combine` function to combine chat history
1 parent 75158b7
src/messages/chat_history.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import TZ
+from messages.parser import parse_msg
+
+
+async def get_chat_history(client: Client, message: Message, offset_id: int, num_history: int = 0) -> list[str]:
+    """Get given number of chat history in text format."""
+    if num_history <= 0:
+        return []
+    history = []
+    async for msg in client.get_chat_history(chat_id=message.chat.id, offset_id=offset_id, limit=num_history):  # type: ignore
+        if msg.empty:
+            continue
+        texts = msg.text or msg.caption or ""
+        info = parse_msg(msg, silent=True)
+        time = ""
+        dt = msg.date
+        if isinstance(dt, datetime):
+            time = f"({msg.date.replace(tzinfo=ZoneInfo(TZ)):%Y-%m-%d %H:%M:%S})"
+        media = f"[{msg.media.name}]" if msg.media else ""
+        res = ""
+        if info["full_name"]:
+            res += f"@{info['full_name']} "
+        if time:
+            res += f"{time}\n"
+        res += f"{media}{texts}"
+        if res:
+            history.append(res)
+    return history[::-1]
src/messages/parser.py
@@ -9,10 +9,10 @@ from pyrogram.types import Message
 from config import cache
 
 
-def parse_msg(message: Message, *, verbose: bool = False) -> dict:
+def parse_msg(message: Message, *, silent: bool = False, verbose: bool = False) -> dict:
     if cached := cache.get(f"parse_msg-{message.chat.id}-{message.id}"):
         return cached
-    if verbose:
+    if not silent and verbose:
         logger.trace(f"{message!r}")
     chat_type = message.chat.type.name if message.chat and message.chat.type else ""
     chat_title = message.chat.title if message.chat and message.chat.title else ""
@@ -67,7 +67,8 @@ def parse_msg(message: Message, *, verbose: bool = False) -> dict:
         summary += f" 📝{text}"
     if caption:
         summary += f" 📝{caption}"
-    logger.info(f"{summary!r}")
+    if not silent:
+        logger.info(f"{summary!r}")
 
     info = {  # ensure the type of each field
         "chat_type": str(chat_type),
src/others/combine_history.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import io
+import re
+
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import COMBINATION_MAX_HISTORY, ENABLE, PREFIX, READING_SPEED
+from messages.chat_history import get_chat_history
+from messages.sender import send2tg
+from messages.utils import equal_prefix, get_reply_to, startswith_prefix
+from utils import to_int
+
+HELP = f"""
+💬**合并对话历史**
+使用说明:
+`{PREFIX.COMBINATION} + #Number` (最多{COMBINATION_MAX_HISTORY}条)
+将最近的N条消息文本合并为txt文件
+
+如果以 `{PREFIX.COMBINATION} + #Number` 回复消息M
+则合并消息M之前的N条消息文本 (包含M)
+
+示例:
+`{PREFIX.COMBINATION} #10`: 合并最近10条消息文本
+"""
+
+
+async def combine_history(client: Client, message: Message, **kwargs):
+    """Download the url from the message."""
+    if not ENABLE.COMBINATION:
+        return
+    if not startswith_prefix(message.text, prefix=[PREFIX.COMBINATION]):
+        return
+    # send docs if message == "/combine", without reply
+    if equal_prefix(message.text, prefix=[PREFIX.WGET]) and not message.reply_to_message:
+        await send2tg(client, message, texts=HELP, **kwargs)
+        return
+
+    # get the number of messages to combine
+    num_history = 0
+    if matched := re.match(r"^" + PREFIX.COMBINATION + r"\s+#(\d+)", message.text):
+        num_history = int(matched.group(1))
+    else:
+        await send2tg(client, message, texts=HELP, **kwargs)
+        return
+
+    offset_id = message.id
+    # reply a message with /combine
+    if message.reply_to_message:
+        message = message.reply_to_message
+        offset_id = message.id + 1  # include the reply message
+    history = await get_chat_history(client, message, offset_id, num_history)
+    combination = "\n\n".join(history)
+    length = len(combination)
+    reading_minutes = length / READING_SPEED  # minutes
+
+    target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
+    reply_msg_id = kwargs.get("reply_msg_id", 0)
+    reply_parameters = get_reply_to(message.id, reply_msg_id)
+    with io.BytesIO(combination.encode("utf-8")) as f:
+        await client.send_document(
+            to_int(target_chat),
+            f,
+            file_name=f"最近{num_history}条消息历史.txt",
+            reply_parameters=reply_parameters,
+            caption=f"总字符: {length}\n 预计时长: {reading_minutes:.1f}分钟",
+        )
src/config.py
@@ -18,6 +18,8 @@ TEXT_LENGTH = int(os.getenv("TEXT_LENGTH", "4096"))  # Maximum length of text me
 CAPTION_LENGTH = int(os.getenv("CAPTION_LENGTH", "1024"))  # 4096 for Premium user
 MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", "2000")) * 1024 * 1024  # 4000 MB for Premium user
 ASR_MAX_DURATION = int(os.getenv("ASR_MAX_DURATION", "600"))
+COMBINATION_MAX_HISTORY = int(os.getenv("COMBINATION_MAX_HISTORY", "500"))  # Maximum number of messages to combine
+READING_SPEED = int(os.getenv("READING_SPEED", "300"))  # words per minute
 DAILY_MESSAGES = os.getenv("DAILY_MESSAGES", "{}")  # Useful for daily checkin for some services. Should be a json string: '{"chat-1": "msg-1", "chat-2": "msg-2"}'
 
 
@@ -25,6 +27,7 @@ class ENABLE:
     ASR = os.getenv("ENABLE_ASR", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     AUDIO = os.getenv("ENABLE_AUDIO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     BILIBILI = os.getenv("ENABLE_BILIBILI", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    COMBINATION = os.getenv("ENABLE_COMBINATION", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     CRONTAB = os.getenv("ENABLE_CRONTAB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     DOUYIN = os.getenv("ENABLE_DOUYIN", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     GPT = os.getenv("ENABLE_GPT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -53,6 +56,7 @@ class PREFIX:
     SUBTITLE = os.getenv("PREFIX_SUBTITLE", "/subtitle").lower()
     WGET = os.getenv("PREFIX_WGET", "/wget").lower()
     OCR = os.getenv("PREFIX_OCR", "/ocr").lower()
+    COMBINATION = os.getenv("PREFIX_COMBINATION", "/combine").lower()
 
 
 class UA:
src/handler.py
@@ -15,6 +15,7 @@ from messages.parser import parse_msg
 from messages.sender import send2tg
 from messages.utils import equal_prefix, startswith_prefix
 from networking import flatten_rediercts, match_social_media_link
+from others.combine_history import combine_history
 from others.download_external import download_url_in_message
 from others.extract_audio import extract_audio_file
 from others.gpt import gpt_response
@@ -39,6 +40,7 @@ async def handle_utilities(
     ai: bool = True,
     asr: bool = True,
     audio: bool = True,
+    combine: bool = True,
     subtitle: bool = True,
     wget: bool = True,
     ocr: bool = True,
@@ -59,6 +61,7 @@ async def handle_utilities(
         ai (bool, optional): Enable GPT. Defaults to True.
         asr (bool, optional): Enable ASR. Defaults to True.
         audio (bool, optional): Enable Video -> Audio. Defaults to True.
+        combine (bool, optional): Enable History Combination. Defaults to True.
         subtitle (bool, optional): Enable YouTube subtitle. Defaults to True.
         wget (bool, optional): Enable WGET. Defaults to True.
         ocr (bool, optional): Enable OCR. Defaults to True.
@@ -73,6 +76,8 @@ async def handle_utilities(
         await voice_to_text(client, message, **kwargs)  # /asr
     if audio:
         await extract_audio_file(client, message, **kwargs)  # /audio
+    if combine:
+        await combine_history(client, message, **kwargs)  # /audio
     if subtitle:
         await get_subtitle(client, message, **kwargs)  # /subtitle
     if wget:
@@ -238,7 +243,7 @@ def get_social_media_help(cmd_prefix: list[str] | None = None, ignore_prefix: li
     prefixes = set(cmd_prefix) - set(ignore_prefix)
     msg = "🔗**链接解析**"
     if prefixes:
-        msg += f"\n🔗命令前缀: {', '.join(prefixes)}"
+        msg += f" 前缀: {', '.join(prefixes)}"
         msg += "\n🔄使用 `/retry` 回复消息强制重试"
     if ENABLE.YOUTUBE:
         msg += "\n🔴油管"
@@ -268,6 +273,9 @@ def get_social_media_help(cmd_prefix: list[str] | None = None, ignore_prefix: li
         msg += f"\n⏬**下载文件**: `{PREFIX.WGET}` + URL"
     if ENABLE.OCR:
         msg += f"\n🔤**图片转文字**: `{PREFIX.OCR}` 回复图片消息"
+    if ENABLE.COMBINATION:
+        msg += f"\n💬**合并历史**: `{PREFIX.COMBINATION} #N` 合并最近N条对话历史"
+    msg += "\n\n单独发送每个命令前缀本身可查看该命令详细使用说明"
     return msg