Commit ea2111d

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-04-15 15:45:46
refactor(summary): add message type in txt file
1 parent 59707bf
Changed files (1)
src
src/llm/summary.py
@@ -216,18 +216,16 @@ async def get_contexts(history: list[dict]) -> dict:
         }
     ]
     user_context = []
-    txt_format = ""  # simplified format, send as txt file
     for info in history:
-        if info["text"].startswith("/"):  # commands
-            continue
-
         if info["file_name"] == CONTEXT_FILENAME:
             continue
+        if info["is_bot"]:  # bots
+            continue
+        if info["text"].startswith("/"):  # commands
+            continue
 
         if info["text"].startswith("👤"):  # social media
             continue
-        if info["is_bot"]:  # bots
-            continue
 
         if info["text"]:  # currently, we only include texts
             if len(user_context) == 0:
@@ -241,15 +239,45 @@ async def get_contexts(history: list[dict]) -> dict:
                 "username": info["full_name"],
                 "message": info["text"],
             }
-            txt_format += f"[{info['datetime']:%m-%d %H:%M:%S}]{content['username']}:\n"
             if reply_msg_content := get_message_by_id(history, info.get("reply_to_message_id")):
                 content["reply_to_message"] = reply_msg_content
-                txt_format += f"<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>\n"
             user_context.append({"type": "text", "text": str(content)})
-            txt_format += f"{content['message']}\n\n"
     if not user_context:
         return {}
-    return {"system_context": system_context, "user_context": user_context, "txt_format": txt_format, "begin_time": begin_time, "end_time": end_time}
+    return {"system_context": system_context, "user_context": user_context, "txt_format": get_txt_format(history), "begin_time": begin_time, "end_time": end_time}
+
+
+def get_txt_format(history: list[dict]) -> str:
+    """Format the history as plaintext."""
+    txt_format = ""
+    txt_mediagroup_ids = set()  # record processed mediagroup messages
+    for info in history:
+        if info["file_name"] == CONTEXT_FILENAME:
+            continue
+        if info["is_bot"]:  # bots
+            continue
+        if info["media_group_id"] in txt_mediagroup_ids:
+            continue
+        # add txt format
+        txt_format += f"[{info['datetime']:%m-%d %H:%M:%S}]{info['full_name']}:\n"
+        if info["mtype"] != "text":  # not plaintext message
+            # media group
+            if info["media_group_id"] > 0:
+                media_types = [f"[{x['mtype']}]" for x in history if x["media_group_id"] == info["media_group_id"]]
+                txt_format += " ".join(media_types)
+                txt_mediagroup_ids.add(info["media_group_id"])
+            else:
+                txt_format += f"[{info['mtype']}]"
+        txt_format += info["text"]
+        # append quote msg
+        reply_msg_content = get_message_by_id(history, info.get("reply_to_message_id"))
+        if reply_msg_content:
+            if reply_msg_content["type"] == "text":
+                txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
+            else:
+                txt_format += f"\n<quote>{reply_msg_content['username']}: [{reply_msg_content['type']}] {reply_msg_content['message']}</quote>"
+        txt_format += "\n\n"
+    return txt_format
 
 
 def get_message_by_id(history: list[dict], message_id: int | None = None) -> dict:
@@ -270,6 +298,12 @@ def get_message_by_id(history: list[dict], message_id: int | None = None) -> dic
     }
 
 
+def get_media_group_by_id(history: list[dict], media_group_id: int | None = None) -> list[dict]:
+    if not media_group_id:
+        return []
+    return [x for x in history if x["media_group_id"] == media_group_id]
+
+
 async def daily_summary(client: Client):
     """Daily summary of the chat history."""
     now = nowdt(TZ)