Commit aa0f85e
Changed files (1)
src
llm
src/llm/summary.py
@@ -183,8 +183,6 @@ async def get_contexts(history: list[dict]) -> dict:
"text": """总结以下网络聊天记录, 识别关键主题、争议话题以及重要观点。提供一个简明的总结, 保留原始意图和上下文。如有必要, 引用原始用户名和时间戳, 并使用清晰的语言。
每一条消息的格式如下:
{
- "id": 消息ID, 按顺序递增,
- "type": 消息类型,
"time": 消息发送时间,
"url": 消息链接,
"username": 消息发送者,
@@ -208,7 +206,7 @@ async def get_contexts(history: list[dict]) -> dict:
- 在必要时引用用户名和时间。
- 保持清晰和简洁的表达。
- 引用用户名时, 请使用 **username** 格式。如: **username**
-- 引用时间时, 请使用 [HH:MM:SS](url) 格式。如: [12:30:00](https://t.me/username/1234567890)
+- 引用时间时, 请使用 [HH:MM:SS](url) 格式。如: [12:30:00](https://t.me/c/1234/56789)
""",
}
],
@@ -220,23 +218,16 @@ async def get_contexts(history: list[dict]) -> dict:
continue
if info["is_bot"]: # bots
continue
- if info["text"].startswith("/"): # commands
- continue
-
- if info["text"].startswith("👤"): # social media
- continue
-
if info["text"]: # currently, we only include texts
if len(user_context) == 0:
begin_time = info["datetime"]
end_time = info["datetime"]
+ media_type = f"[{info['mtype']}] " if info["mtype"] != "text" else ""
content = {
- "id": info["mid"],
- "type": info["mtype"],
"time": f"{info['datetime']:%H:%M:%S}",
"url": info["message_url"],
"username": info["full_name"],
- "message": info["text"],
+ "message": media_type + info["text"],
}
if reply_msg_content := get_message_by_id(history, info.get("reply_to_message_id")):
content["reply_to_message"] = reply_msg_content
@@ -253,8 +244,6 @@ def get_txt_format(history: list[dict]) -> str:
for info in history:
if info["file_name"] == CONTEXT_FILENAME:
continue
- if info["is_bot"]: # bots
- continue
if info["media_group_id"] in txt_mediagroup_ids:
continue
# add txt format
@@ -271,10 +260,7 @@ def get_txt_format(history: list[dict]) -> str:
# append quote msg
reply_msg_content = get_message_by_id(history, info.get("reply_to_message_id"))
if reply_msg_content:
- if reply_msg_content["type"] == "text":
- txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
- else:
- txt_format += f"\n<quote>{reply_msg_content['username']}: [{reply_msg_content['type']}] {reply_msg_content['message']}</quote>"
+ txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
txt_format += "\n\n"
return txt_format
@@ -286,14 +272,12 @@ def get_message_by_id(history: list[dict], message_id: int | None = None) -> dic
info = next((info for info in history if info["mid"] == message_id), {})
if not info:
return {}
-
+ media_type = f"[{info['mtype']}] " if info["mtype"] != "text" else ""
return {
- "id": info["mid"],
- "type": info["mtype"],
"time": f"{info['datetime']:%H:%M:%S}",
"url": info["message_url"],
"username": info["full_name"],
- "message": info["text"] or info["mtype"],
+ "message": media_type + info["text"],
}
@@ -306,22 +290,29 @@ def get_media_group_by_id(history: list[dict], media_group_id: int | None = None
async def daily_summary(client: Client):
"""Daily summary of the chat history."""
now = nowdt(TZ)
- if now.hour not in [4, 12, 20]:
+ durations = {
+ 0: 12,
+ 12: 12,
+ 7: 24,
+ } # time in hour: duration in hours
+ if now.hour not in durations:
return
+ duration = durations[now.hour]
if now.minute != 0:
return
- mapping = {}
+ mapping = {} # summarize chat id -> send to chat id
try:
mapping = json.loads(TID.DAILY_SUMMARY)
except Exception:
logger.warning(f"Invalid DAILY_SUMMARY: {TID.DAILY_SUMMARY}")
+ return
for source_chat_id, target_chat_id in mapping.items():
logger.info(f"Summary chat {source_chat_id}, send results to {target_chat_id}")
# fake message
message = Message(
id=0,
chat=Chat(id=target_chat_id),
- text=f"/summary #8h cid={to_int(source_chat_id)}", # type: ignore
+ text=f"/summary #{duration}h cid={to_int(source_chat_id)}", # type: ignore
)
await ai_summary(
client,