Commit ea2111d
Changed files (1)
src
llm
src/llm/summary.py
@@ -216,18 +216,16 @@ async def get_contexts(history: list[dict]) -> dict:
}
]
user_context = []
- txt_format = "" # simplified format, send as txt file
for info in history:
- if info["text"].startswith("/"): # commands
- continue
-
if info["file_name"] == CONTEXT_FILENAME:
continue
+ if info["is_bot"]: # bots
+ continue
+ if info["text"].startswith("/"): # commands
+ continue
if info["text"].startswith("👤"): # social media
continue
- if info["is_bot"]: # bots
- continue
if info["text"]: # currently, we only include texts
if len(user_context) == 0:
@@ -241,15 +239,45 @@ async def get_contexts(history: list[dict]) -> dict:
"username": info["full_name"],
"message": info["text"],
}
- txt_format += f"[{info['datetime']:%m-%d %H:%M:%S}]{content['username']}:\n"
if reply_msg_content := get_message_by_id(history, info.get("reply_to_message_id")):
content["reply_to_message"] = reply_msg_content
- txt_format += f"<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>\n"
user_context.append({"type": "text", "text": str(content)})
- txt_format += f"{content['message']}\n\n"
if not user_context:
return {}
- return {"system_context": system_context, "user_context": user_context, "txt_format": txt_format, "begin_time": begin_time, "end_time": end_time}
+ return {"system_context": system_context, "user_context": user_context, "txt_format": get_txt_format(history), "begin_time": begin_time, "end_time": end_time}
+
+
+def get_txt_format(history: list[dict]) -> str:
+ """Format the history as plaintext."""
+ txt_format = ""
+ txt_mediagroup_ids = set() # record processed mediagroup messages
+ for info in history:
+ if info["file_name"] == CONTEXT_FILENAME:
+ continue
+ if info["is_bot"]: # bots
+ continue
+ if info["media_group_id"] in txt_mediagroup_ids:
+ continue
+ # add txt format
+ txt_format += f"[{info['datetime']:%m-%d %H:%M:%S}]{info['full_name']}:\n"
+ if info["mtype"] != "text": # not plaintext message
+ # media group
+ if info["media_group_id"] > 0:
+ media_types = [f"[{x['mtype']}]" for x in history if x["media_group_id"] == info["media_group_id"]]
+ txt_format += " ".join(media_types)
+ txt_mediagroup_ids.add(info["media_group_id"])
+ else:
+ txt_format += f"[{info['mtype']}]"
+ txt_format += info["text"]
+ # append quote msg
+ reply_msg_content = get_message_by_id(history, info.get("reply_to_message_id"))
+ if reply_msg_content:
+ if reply_msg_content["type"] == "text":
+ txt_format += f"\n<quote>{reply_msg_content['username']}: {reply_msg_content['message']}</quote>"
+ else:
+ txt_format += f"\n<quote>{reply_msg_content['username']}: [{reply_msg_content['type']}] {reply_msg_content['message']}</quote>"
+ txt_format += "\n\n"
+ return txt_format
def get_message_by_id(history: list[dict], message_id: int | None = None) -> dict:
@@ -270,6 +298,12 @@ def get_message_by_id(history: list[dict], message_id: int | None = None) -> dic
}
+def get_media_group_by_id(history: list[dict], media_group_id: int | None = None) -> list[dict]:
+ if not media_group_id:
+ return []
+ return [x for x in history if x["media_group_id"] == media_group_id]
+
+
async def daily_summary(client: Client):
"""Daily summary of the chat history."""
now = nowdt(TZ)