Commit 053bad0
Changed files (5)
src/llm/summary.py
@@ -19,15 +19,15 @@ from messages.chat_history import get_parsed_chat_history
from messages.parser import parse_msg
from messages.progress import modify_progress
from messages.sender import send2tg
-from messages.utils import equal_prefix, to_int
+from messages.utils import equal_prefix, startswith_prefix, to_int
from utils import nowdt
-HELP = f"""🤖**GPT总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
+HELP = f"""🤖**AI总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
+⚠️使用`{PREFIX.COMBINATION}`命令只生成聊天记录文件, 不进行AI总结
使用说明:
# 后跟消息数量或时间范围
@ 后跟用户名 (可多次使用@)
-
**1️⃣指定条目数**
- `{PREFIX.AI_SUMMARY} #N`: 总结最近的N条历史消息
- `{PREFIX.AI_SUMMARY} #N @User`: 总结最近只属于User的N条消息
@@ -53,11 +53,11 @@ HELP = f"""🤖**GPT总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
- `{PREFIX.AI_SUMMARY} #20240101123000-20240101124000`: 总结从2024-01-01 12:30:00开始到2024-01-01 12:40:00的消息
- `{PREFIX.AI_SUMMARY} #20240101123000 @User`: 总结从2024-01-01 12:30:00开始到现在的消息, 且只属于User的消息
- `{PREFIX.AI_SUMMARY} #20240101123000-20240101124000 @User`: 总结从2024-01-01 12:30:00开始到2024-01-01 12:40:00的消息, 且只属于User的消息
+- 时间格式中没有任何分隔符, 必须为YYYYMMDDHHMMSS (14位纯数字)
注意:
- 用上述各种`{PREFIX.AI_SUMMARY}`命令回复消息M, 视为将截止时间设为消息M的发送时间
- 如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
-- 3️⃣的时间格式中没有任何分隔符, 必须为YYYYMMDDHHMMSS (14位纯数字)
"""
DAILY_SUMMARY_PREFIX = "🏪**#爬楼助手**\n"
CONTEXT_FILENAME = "聊天记录.txt"
@@ -72,12 +72,14 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
summary_prefix (str | None): Prefix string of the response message.
"""
# send docs if message == "/summary"
- if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY]):
+ if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]):
await send2tg(client, message, texts=HELP, **kwargs)
return
- # get the number of messages to combine
info = parse_msg(message)
+ need_summay = startswith_prefix(info["text"], prefix=[PREFIX.AI_SUMMARY])
+ # replace /combine with /summary, because we need to use `/summary` to match different patterns
+ info["text"] = re.sub(r"^" + PREFIX.COMBINATION, PREFIX.AI_SUMMARY, info["text"], flags=re.IGNORECASE)
num_history = MAX_MESSAGE_SUMMARY
filter_users = []
begin_time = datetime.fromtimestamp(0, tz=ZoneInfo(TZ))
@@ -146,6 +148,9 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
# send contexts as txt file
with io.BytesIO(parsed["txt_format"].encode("utf-8")) as f:
await client.send_document(to_int(message.chat.id), f, file_name=CONTEXT_FILENAME, caption=msg)
+ if not need_summay:
+ await modify_progress(del_status=True, **kwargs)
+ return
await modify_progress(text=f"🤖**{summary_model_name}**总结中...\n{msg}", force_update=True, **kwargs)
config = get_gpt_config(model_type="text", contexts=contexts, force_model=summary_model)
src/others/combine_history.py
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import io
-import re
-
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import MAX_MESSAGE_RETRIEVED, PREFIX, READING_SPEED
-from llm.utils import count_tokens
-from messages.chat_history import get_parsed_chat_history
-from messages.parser import parse_msg
-from messages.sender import send2tg
-from messages.utils import equal_prefix, get_reply_to, startswith_prefix
-from utils import to_int
-
-HELP = f"""
-💬**合并对话历史** (最多{MAX_MESSAGE_RETRIEVED}条)
-使用说明:
-1. `{PREFIX.COMBINATION} + #N`
-将最近的N条消息文本合并为txt文件
-
-2. `{PREFIX.COMBINATION} + #N + @User`
-将最近只属于User的N条消息合并为txt文件
-
-如果以 `{PREFIX.COMBINATION} + #N` (或附加User) 回复消息M
-则合并消息M之前的N条消息文本 (包含M)
-
-示例:
-1. `{PREFIX.COMBINATION} #10`: 合并最近10条消息为txt文本
-2. `{PREFIX.COMBINATION} #20 @123456`: 合并最近UID为123456的20条消息为txt文本
-3. `{PREFIX.COMBINATION} #20 @John`: 合并最近用户John(大小写均可)的20条消息为txt文本
-如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
-"""
-
-
-async def combine_history(client: Client, message: Message, **kwargs):
- """Download the url from the message."""
- if not startswith_prefix(message.text, prefix=[PREFIX.COMBINATION]):
- return
- # send docs if message == "/combine", without reply
- if equal_prefix(message.text, prefix=[PREFIX.WGET]) and not message.reply_to_message:
- await send2tg(client, message, texts=HELP, **kwargs)
- return
-
- # get the number of messages to combine
- info = parse_msg(message)
- num_history = MAX_MESSAGE_RETRIEVED
- if matched := re.match(r"^" + PREFIX.COMBINATION + r"\s+#(\d+)\s+@(\w+)", info["text"]):
- num_history = min(int(matched.group(1)), MAX_MESSAGE_RETRIEVED)
- filter_user = str(matched.group(2))
- file_name = f"最近{num_history}条{filter_user}的消息.txt"
- elif matched := re.match(r"^" + PREFIX.COMBINATION + r"\s+#(\d+)", info["text"]):
- num_history = min(int(matched.group(1)), MAX_MESSAGE_RETRIEVED)
- filter_user = ""
- file_name = f"最近{num_history}条消息记录.txt"
- else:
- await send2tg(client, message, texts=HELP, **kwargs)
- return
-
- offset_id = info["mid"]
- # reply a message with /combine
- if message.reply_to_message:
- message = message.reply_to_message
- info = parse_msg(message, silent=True)
- offset_id = info["mid"] + 1 # include the reply message
- # set custom chat_id and message_id (useful for debug)
- if matched := re.search(r"cid=(-?\w+)", info["text"], re.IGNORECASE):
- info["cid"] = to_int(matched.group(1))
- if matched := re.search(r"mid=(\d+)", info["text"], re.IGNORECASE):
- info["mid"] = int(matched.group(1))
- offset_id = info["mid"] + 1 # include this message
-
- history = await get_parsed_chat_history(client, info["cid"], offset_id, num_history, users=filter_user)
- if not history:
- await send2tg(client, message, texts=f"最近{num_history}条消息中未找到符合条件的消息", **kwargs)
- return
- combination = ""
- num_chars = 0
- for info in history:
- if info["full_name"]:
- combination += f"@{info['full_name']} "
- combination += f"{info['time']}\n"
- media = f"[{info['mtype']}]" if info["mtype"] != "text" else ""
- combination += f"{media}{info['text']}"
- num_chars += len(f"{media}{info['text']}")
- combination += "\n\n"
- num_tokens = count_tokens(combination)
- reading_minutes = num_chars / READING_SPEED # minutes
- target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
- reply_msg_id = kwargs.get("reply_msg_id", 0)
- reply_parameters = get_reply_to(message.id, reply_msg_id)
- with io.BytesIO(combination.encode("utf-8")) as f:
- await client.send_document(
- to_int(target_chat),
- f,
- file_name=file_name,
- reply_parameters=reply_parameters,
- caption=f"字符数: {num_chars}\nToken: {num_tokens}\n阅读时长: {reading_minutes:.1f}分钟",
- )
src/config.py
@@ -19,7 +19,7 @@ TEXT_LENGTH = int(os.getenv("TEXT_LENGTH", "4096")) # Maximum length of text me
CAPTION_LENGTH = int(os.getenv("CAPTION_LENGTH", "1024")) # 4096 for Premium user
MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", "2000")) * 1024 * 1024 # 4000 MB for Premium user
ASR_MAX_DURATION = int(os.getenv("ASR_MAX_DURATION", "600"))
-MAX_MESSAGE_RETRIEVED = int(os.getenv("MAX_MESSAGE_RETRIEVED", "1000000")) # Maximum number of messages to combine
+MAX_MESSAGE_RETRIEVED = int(os.getenv("MAX_MESSAGE_RETRIEVED", "1000000")) # Maximum number of messages to retrieve
MAX_MESSAGE_SUMMARY = int(os.getenv("MAX_MESSAGE_SUMMARY", "9999")) # Maximum number of messages to summay
READING_SPEED = int(os.getenv("READING_SPEED", "300")) # words per minute
DAILY_MESSAGES = os.getenv("DAILY_MESSAGES", "{}") # Useful for daily checkin for some services. Should be a json string: '{"chat-1": "msg-1", "chat-2": "msg-2"}'
@@ -34,7 +34,6 @@ class ENABLE: # see fine-grained permission in `src/permission.py`
AI_SUMMARY = os.getenv("ENABLE_AI_SUMMARY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
ASR = os.getenv("ENABLE_ASR", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
AUDIO = os.getenv("ENABLE_AUDIO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
- COMBINATION = os.getenv("ENABLE_COMBINATION", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
CRONTAB = os.getenv("ENABLE_CRONTAB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
DOUYIN = os.getenv("ENABLE_DOUYIN", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
GPT = os.getenv("ENABLE_GPT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
src/handler.py
@@ -17,7 +17,6 @@ from messages.parser import parse_msg
from messages.sender import send2tg
from messages.utils import equal_prefix, startswith_prefix
from networking import match_social_media_link
-from others.combine_history import combine_history
from others.download_external import download_url_in_message
from others.extract_audio import extract_audio_file
from others.raw_img_file import convert_raw_img_file
@@ -43,7 +42,6 @@ async def handle_utilities(
ai: bool = True,
asr: bool = True,
audio: bool = True,
- combine: bool = True,
subtitle: bool = True,
wget: bool = True,
ocr: bool = True,
@@ -66,7 +64,6 @@ async def handle_utilities(
ai (bool, optional): Enable GPT. Defaults to True.
asr (bool, optional): Enable ASR. Defaults to True.
audio (bool, optional): Enable Video -> Audio. Defaults to True.
- combine (bool, optional): Enable History Combination. Defaults to True.
subtitle (bool, optional): Enable YouTube subtitle. Defaults to True.
wget (bool, optional): Enable WGET. Defaults to True.
ocr (bool, optional): Enable OCR. Defaults to True.
@@ -83,8 +80,6 @@ async def handle_utilities(
await voice_to_text(client, message, **kwargs) # /asr
if audio:
await extract_audio_file(client, message, **kwargs) # /audio
- if combine:
- await combine_history(client, message, **kwargs) # /combine
if subtitle:
await get_subtitle(client, message, **kwargs) # /subtitle
if wget:
@@ -279,8 +274,6 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefixes: list[str] |
msg += f"\n🗣**语音转文字**: `{PREFIX.ASR}` 回复语音消息"
if permission["audio"]:
msg += f"\n🎧**提取音频或语音**: `{PREFIX.AUDIO}` `{PREFIX.VOICE}` 回复消息"
- if permission["combine"]:
- msg += f"\n💬**合并历史**: `{PREFIX.COMBINATION} #N` 合并最近N条对话历史"
if permission["ocr"]:
msg += f"\n🔤**图片转文字**: `{PREFIX.OCR}` 回复图片消息"
if permission["price"]:
@@ -288,7 +281,7 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefixes: list[str] |
if permission["subtitle"]:
msg += f"\n📃**提取字幕**: `{PREFIX.SUBTITLE}` + 油管链接 (或回复油管链接)"
if permission["summary"] and permission["ai"]: # summary depends on ai
- msg += f"\n🤖**总结历史**: `{PREFIX.AI_SUMMARY} #N` AI总结最近N条对话历史"
+ msg += f"\n🤖**总结历史**: `{PREFIX.AI_SUMMARY}` AI总结历史聊天记录"
if permission["wget"]:
msg += f"\n⏬**下载文件**: `{PREFIX.WGET}` + URL"
src/permission.py
@@ -97,7 +97,6 @@ def check_service(cid: int | str, ctype: str) -> dict:
"need_prefix": True,
"asr": True,
"audio": True,
- "combine": True,
"subtitle": True,
"wget": True,
"ocr": True,
@@ -140,8 +139,6 @@ def check_service(cid: int | str, ctype: str) -> dict:
permission["asr"] = False
if not ENABLE.AUDIO:
permission["audio"] = False
- if not ENABLE.COMBINATION:
- permission["combine"] = False
if not ENABLE.SUBTITLE:
permission["subtitle"] = False
if not ENABLE.WGET: