Commit aa0f85e

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-27 13:43:57
chore(summary): update summary logic
1 parent a91f1bf
Changed files (1)
src
src/llm/summary.py
@@ -183,8 +183,6 @@ async def get_contexts(history: list[dict]) -> dict:
                     "text": """总结以下网络聊天记录, 识别关键主题、争议话题以及重要观点。提供一个简明的总结, 保留原始意图和上下文。如有必要, 引用原始用户名和时间戳, 并使用清晰的语言。
 每一条消息的格式如下:
 {
-    "id": 消息ID, 按顺序递增,
-    "type": 消息类型,
     "time": 消息发送时间,
     "url": 消息链接,
     "username": 消息发送者,
@@ -208,7 +206,7 @@ async def get_contexts(history: list[dict]) -> dict:
 - 在必要时引用用户名和时间。
 - 保持清晰和简洁的表达。
 - 引用用户名时, 请使用 **username** 格式。如: **username**
-- 引用时间时, 请使用 [HH:MM:SS](url) 格式。如: [12:30:00](https://t.me/username/1234567890)
+- 引用时间时, 请使用 [HH:MM:SS](url) 格式。如: [12:30:00](https://t.me/c/1234/56789)
 """,
                 }
             ],
@@ -220,23 +218,16 @@ async def get_contexts(history: list[dict]) -> dict:
             continue
         if info["is_bot"]:  # bots
             continue
-        if info["text"].startswith("/"):  # commands
-            continue
-
-        if info["text"].startswith("👤"):  # social media
-            continue
-
         if info["text"]:  # currently, we only include texts
             if len(user_context) == 0:
                 begin_time = info["datetime"]
             end_time = info["datetime"]
+            media_type = f"[{info['mtype']}] " if info["mtype"] != "text" else ""
             content = {
-                "id": info["mid"],
-                "type": info["mtype"],
                 "time": f"{info['datetime']:%H:%M:%S}",
                 "url": info["message_url"],
                 "username": info["full_name"],
-                "message": info["text"],
+                "message": media_type + info["text"],
             }
             if reply_msg_content := get_message_by_id(history, info.get("reply_to_message_id")):
                 content["reply_to_message"] = reply_msg_content
@@ -253,8 +244,6 @@ def get_txt_format(history: list[dict]) -> str:
     for info in history:
         if info["file_name"] == CONTEXT_FILENAME:
             continue
-        if info["is_bot"]:  # bots
-            continue
         if info["media_group_id"] in txt_mediagroup_ids:
             continue
         # add txt format
@@ -271,10 +260,7 @@ def get_txt_format(history: list[dict]) -> str:
         # append quote msg
         reply_msg_content = get_message_by_id(history, info.get("reply_to_message_id"))
         if reply_msg_content:
-            if reply_msg_content["type"] == "text":
-                txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
-            else:
-                txt_format += f"\n<quote>{reply_msg_content['username']}: [{reply_msg_content['type']}] {reply_msg_content['message']}</quote>"
+            txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
         txt_format += "\n\n"
     return txt_format
 
@@ -286,14 +272,12 @@ def get_message_by_id(history: list[dict], message_id: int | None = None) -> dic
     info = next((info for info in history if info["mid"] == message_id), {})
     if not info:
         return {}
-
+    media_type = f"[{info['mtype']}] " if info["mtype"] != "text" else ""
     return {
-        "id": info["mid"],
-        "type": info["mtype"],
         "time": f"{info['datetime']:%H:%M:%S}",
         "url": info["message_url"],
         "username": info["full_name"],
-        "message": info["text"] or info["mtype"],
+        "message": media_type + info["text"],
     }
 
 
@@ -306,22 +290,29 @@ def get_media_group_by_id(history: list[dict], media_group_id: int | None = None
 async def daily_summary(client: Client):
     """Daily summary of the chat history."""
     now = nowdt(TZ)
-    if now.hour not in [4, 12, 20]:
+    durations = {
+        0: 12,
+        12: 12,
+        7: 24,
+    }  # time in hour: duration in hours
+    if now.hour not in durations:
         return
+    duration = durations[now.hour]
     if now.minute != 0:
         return
-    mapping = {}
+    mapping = {}  # summarize chat id -> send to chat id
     try:
         mapping = json.loads(TID.DAILY_SUMMARY)
     except Exception:
         logger.warning(f"Invalid DAILY_SUMMARY: {TID.DAILY_SUMMARY}")
+        return
     for source_chat_id, target_chat_id in mapping.items():
         logger.info(f"Summary chat {source_chat_id}, send results to {target_chat_id}")
         # fake message
         message = Message(
             id=0,
             chat=Chat(id=target_chat_id),
-            text=f"/summary #8h cid={to_int(source_chat_id)}",  # type: ignore
+            text=f"/summary #{duration}h cid={to_int(source_chat_id)}",  # type: ignore
         )
         await ai_summary(
             client,