Commit e3e9072

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-04 12:27:29
fix(gpt): include texts from audio messages
1 parent b0f74ae
Changed files (3)
src/llm/contexts.py
@@ -60,7 +60,7 @@ async def single_gpt_context(client: Client, message: Message) -> dict:
     info = parse_msg(message, silent=True)
     role = "assistant" if BOT_TIPS in info["text"] else "user"
 
-    if info["mtype"] not in ["text", "photo", "voice", "video", "document"]:
+    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document"]:
         return {}
 
     extra_txt_extensions = [".sh", ".json", ".xml"]  # treat these as txt file
@@ -119,7 +119,7 @@ async def single_gemini_context(client: Client, message: Message) -> dict:
     """
     info = parse_msg(message, silent=True)
     role = "model" if BOT_TIPS in info["text"] else "user"
-    if info["mtype"] not in ["text", "photo", "voice", "video", "document"]:
+    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document"]:
         return {}
     extra_mime_types = ["application/pdf", "application/x-javascript"]  # gemini has built-in support for these
     extra_txt_extensions = [".sh", ".json", ".xml"]  # also treat these as txt file
src/llm/utils.py
@@ -194,6 +194,8 @@ def clean_reasoning(text: str) -> str:
 
 def clean_response(text: str) -> str:
     """Remove bot prefix and reasoning content."""
+    text = re.sub(r"^๐Ÿ‘ค@.*?\/\/", "", text)  # remove markdown send_from_user
+    text = re.sub(r"^๐Ÿ‘ค\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", text)  # remove html send_from_user
     text = clean_cmd_prefix(text)
     text = clean_bot_tips(text)
     return clean_reasoning(text)
src/messages/database.py
@@ -49,7 +49,7 @@ async def save_messages(messages: list[Message | None], key: str, metadata: dict
         info = parse_msg(msg, silent=True)
         # Caution: this format should be consistent with `handle_social_media` function in `handler.py`
         # text = re.sub(r"^๐Ÿ‘ค\[@.*?\]\(tg://user\?id=\d+\)//", "", text)  # remove markdown send_from_user
-        text = re.sub(r"^๐Ÿ‘ค\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", info["html"])  # remove markdown send_from_user
+        text = re.sub(r"^๐Ÿ‘ค\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", info["html"])  # remove html send_from_user
         msg_extra = {"text": text} if text else {}
         if msg.media_group_id:
             if msg.media_group_id not in media_group_ids: