Commit 74c0eb9
Changed files (5)
src
src/messages/chat_history.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import TZ
+from messages.parser import parse_msg
+
+
+async def get_chat_history(client: Client, message: Message, offset_id: int, num_history: int = 0) -> list[str]:
+ """Get given number of chat history in text format."""
+ if num_history <= 0:
+ return []
+ history = []
+ async for msg in client.get_chat_history(chat_id=message.chat.id, offset_id=offset_id, limit=num_history): # type: ignore
+ if msg.empty:
+ continue
+ texts = msg.text or msg.caption or ""
+ info = parse_msg(msg, silent=True)
+ time = ""
+ dt = msg.date
+ if isinstance(dt, datetime):
+ time = f"({msg.date.replace(tzinfo=ZoneInfo(TZ)):%Y-%m-%d %H:%M:%S})"
+ media = f"[{msg.media.name}]" if msg.media else ""
+ res = ""
+ if info["full_name"]:
+ res += f"@{info['full_name']} "
+ if time:
+ res += f"{time}\n"
+ res += f"{media}{texts}"
+ if res:
+ history.append(res)
+ return history[::-1]
src/messages/parser.py
@@ -9,10 +9,10 @@ from pyrogram.types import Message
from config import cache
-def parse_msg(message: Message, *, verbose: bool = False) -> dict:
+def parse_msg(message: Message, *, silent: bool = False, verbose: bool = False) -> dict:
if cached := cache.get(f"parse_msg-{message.chat.id}-{message.id}"):
return cached
- if verbose:
+ if not silent and verbose:
logger.trace(f"{message!r}")
chat_type = message.chat.type.name if message.chat and message.chat.type else ""
chat_title = message.chat.title if message.chat and message.chat.title else ""
@@ -67,7 +67,8 @@ def parse_msg(message: Message, *, verbose: bool = False) -> dict:
summary += f" 📝{text}"
if caption:
summary += f" 📝{caption}"
- logger.info(f"{summary!r}")
+ if not silent:
+ logger.info(f"{summary!r}")
info = { # ensure the type of each field
"chat_type": str(chat_type),
src/others/combine_history.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import io
+import re
+
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import COMBINATION_MAX_HISTORY, ENABLE, PREFIX, READING_SPEED
+from messages.chat_history import get_chat_history
+from messages.sender import send2tg
+from messages.utils import equal_prefix, get_reply_to, startswith_prefix
+from utils import to_int
+
+HELP = f"""
+💬**合并对话历史**
+使用说明:
+`{PREFIX.COMBINATION} + #Number` (最多{COMBINATION_MAX_HISTORY}条)
+将最近的N条消息文本合并为txt文件
+
+如果以 `{PREFIX.COMBINATION} + #Number` 回复消息M
+则合并消息M之前的N条消息文本 (包含M)
+
+示例:
+`{PREFIX.COMBINATION} #10`: 合并最近10条消息文本
+"""
+
+
+async def combine_history(client: Client, message: Message, **kwargs):
+ """Download the url from the message."""
+ if not ENABLE.COMBINATION:
+ return
+ if not startswith_prefix(message.text, prefix=[PREFIX.COMBINATION]):
+ return
+ # send docs if message == "/combine", without reply
+ if equal_prefix(message.text, prefix=[PREFIX.WGET]) and not message.reply_to_message:
+ await send2tg(client, message, texts=HELP, **kwargs)
+ return
+
+ # get the number of messages to combine
+ num_history = 0
+ if matched := re.match(r"^" + PREFIX.COMBINATION + r"\s+#(\d+)", message.text):
+ num_history = int(matched.group(1))
+ else:
+ await send2tg(client, message, texts=HELP, **kwargs)
+ return
+
+ offset_id = message.id
+ # reply a message with /combine
+ if message.reply_to_message:
+ message = message.reply_to_message
+ offset_id = message.id + 1 # include the reply message
+ history = await get_chat_history(client, message, offset_id, num_history)
+ combination = "\n\n".join(history)
+ length = len(combination)
+ reading_minutes = length / READING_SPEED # minutes
+
+ target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
+ reply_msg_id = kwargs.get("reply_msg_id", 0)
+ reply_parameters = get_reply_to(message.id, reply_msg_id)
+ with io.BytesIO(combination.encode("utf-8")) as f:
+ await client.send_document(
+ to_int(target_chat),
+ f,
+ file_name=f"最近{num_history}条消息历史.txt",
+ reply_parameters=reply_parameters,
+ caption=f"总字符: {length}\n 预计时长: {reading_minutes:.1f}分钟",
+ )
src/config.py
@@ -18,6 +18,8 @@ TEXT_LENGTH = int(os.getenv("TEXT_LENGTH", "4096")) # Maximum length of text me
CAPTION_LENGTH = int(os.getenv("CAPTION_LENGTH", "1024")) # 4096 for Premium user
MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", "2000")) * 1024 * 1024 # 4000 MB for Premium user
ASR_MAX_DURATION = int(os.getenv("ASR_MAX_DURATION", "600"))
+COMBINATION_MAX_HISTORY = int(os.getenv("COMBINATION_MAX_HISTORY", "500")) # Maximum number of messages to combine
+READING_SPEED = int(os.getenv("READING_SPEED", "300")) # words per minute
DAILY_MESSAGES = os.getenv("DAILY_MESSAGES", "{}") # Useful for daily checkin for some services. Should be a json string: '{"chat-1": "msg-1", "chat-2": "msg-2"}'
@@ -25,6 +27,7 @@ class ENABLE:
ASR = os.getenv("ENABLE_ASR", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
AUDIO = os.getenv("ENABLE_AUDIO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
BILIBILI = os.getenv("ENABLE_BILIBILI", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+ COMBINATION = os.getenv("ENABLE_COMBINATION", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
CRONTAB = os.getenv("ENABLE_CRONTAB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
DOUYIN = os.getenv("ENABLE_DOUYIN", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
GPT = os.getenv("ENABLE_GPT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -53,6 +56,7 @@ class PREFIX:
SUBTITLE = os.getenv("PREFIX_SUBTITLE", "/subtitle").lower()
WGET = os.getenv("PREFIX_WGET", "/wget").lower()
OCR = os.getenv("PREFIX_OCR", "/ocr").lower()
+ COMBINATION = os.getenv("PREFIX_COMBINATION", "/combine").lower()
class UA:
src/handler.py
@@ -15,6 +15,7 @@ from messages.parser import parse_msg
from messages.sender import send2tg
from messages.utils import equal_prefix, startswith_prefix
from networking import flatten_rediercts, match_social_media_link
+from others.combine_history import combine_history
from others.download_external import download_url_in_message
from others.extract_audio import extract_audio_file
from others.gpt import gpt_response
@@ -39,6 +40,7 @@ async def handle_utilities(
ai: bool = True,
asr: bool = True,
audio: bool = True,
+ combine: bool = True,
subtitle: bool = True,
wget: bool = True,
ocr: bool = True,
@@ -59,6 +61,7 @@ async def handle_utilities(
ai (bool, optional): Enable GPT. Defaults to True.
asr (bool, optional): Enable ASR. Defaults to True.
audio (bool, optional): Enable Video -> Audio. Defaults to True.
+ combine (bool, optional): Enable History Combination. Defaults to True.
subtitle (bool, optional): Enable YouTube subtitle. Defaults to True.
wget (bool, optional): Enable WGET. Defaults to True.
ocr (bool, optional): Enable OCR. Defaults to True.
@@ -73,6 +76,8 @@ async def handle_utilities(
await voice_to_text(client, message, **kwargs) # /asr
if audio:
await extract_audio_file(client, message, **kwargs) # /audio
+ if combine:
+ await combine_history(client, message, **kwargs) # /audio
if subtitle:
await get_subtitle(client, message, **kwargs) # /subtitle
if wget:
@@ -238,7 +243,7 @@ def get_social_media_help(cmd_prefix: list[str] | None = None, ignore_prefix: li
prefixes = set(cmd_prefix) - set(ignore_prefix)
msg = "🔗**链接解析**"
if prefixes:
- msg += f"\n🔗命令前缀: {', '.join(prefixes)}"
+ msg += f" 前缀: {', '.join(prefixes)}"
msg += "\n🔄使用 `/retry` 回复消息强制重试"
if ENABLE.YOUTUBE:
msg += "\n🔴油管"
@@ -268,6 +273,9 @@ def get_social_media_help(cmd_prefix: list[str] | None = None, ignore_prefix: li
msg += f"\n⏬**下载文件**: `{PREFIX.WGET}` + URL"
if ENABLE.OCR:
msg += f"\n🔤**图片转文字**: `{PREFIX.OCR}` 回复图片消息"
+ if ENABLE.COMBINATION:
+ msg += f"\n💬**合并历史**: `{PREFIX.COMBINATION} #N` 合并最近N条对话历史"
+ msg += "\n\n单独发送每个命令前缀本身可查看该命令详细使用说明"
return msg