Commit 8ec8fbd

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-04-29 17:17:27
refactor(gpt): chain the response
1 parent f6a7b0f
src/llm/contexts.py
@@ -58,7 +58,7 @@ async def single_gpt_context(client: Client, message: Message) -> dict:
     }
     """
     info = parse_msg(message, silent=True)
-    role = "assistant" if f"{BOT_TIPS})" in info["text"] else "user"
+    role = "assistant" if BOT_TIPS in info["text"] else "user"
 
     if info["mtype"] not in ["text", "photo", "voice", "video", "document"]:
         return {}
@@ -118,7 +118,7 @@ async def single_gemini_context(client: Client, message: Message) -> dict:
     }
     """
     info = parse_msg(message, silent=True)
-    role = "model" if f"{BOT_TIPS})" in info["text"] else "user"
+    role = "model" if BOT_TIPS in info["text"] else "user"
     if info["mtype"] not in ["text", "photo", "voice", "video", "document"]:
         return {}
     extra_mime_types = ["application/pdf", "application/x-javascript"]  # gemini has built-in support for these
src/llm/gemini.py
@@ -11,11 +11,11 @@ from google.genai.types import ContentUnionDict, GenerateContentConfig, GoogleSe
 from loguru import logger
 from PIL import Image
 from pyrogram.client import Client
-from pyrogram.types import Message
+from pyrogram.types import Message, ReplyParameters
 
-from config import DOWNLOAD_DIR, GEMINI, PREFIX, TEXT_LENGTH
+from config import DOWNLOAD_DIR, GEMINI, GPT, PREFIX, TEXT_LENGTH
 from llm.contexts import get_conversation_contexts
-from llm.utils import BOT_TIPS, beautify_llm_response, clean_gemini_sourcemarks, clean_prefix, clean_source_marks
+from llm.utils import BOT_TIPS, beautify_llm_response, clean_cmd_prefix, clean_gemini_sourcemarks, clean_source_marks
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -50,7 +50,7 @@ async def gemini_response(client: Client, message: Message, conversations: list[
     response_modalities = ["TEXT", "IMAGE"] if modality == "image" else ["TEXT"]
     tools = [Tool(google_search=GoogleSearch())] if modality == "text" else None
     try:
-        msg = f"🤖**{model_name}**: 思考中...\n👤**[{info['full_name']}](tg://user?id={info['uid']})**:“{clean_prefix(info['text'])}”"
+        msg = f"🤖**{model_name}**: 思考中...\n👤**[{info['full_name']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"
         status_msg = (await send2tg(client, message, texts=msg, **kwargs))[0]
         kwargs["progress"] = status_msg
         contexts = await get_conversation_contexts(client, conversations, ctx_format="gemini")
@@ -78,31 +78,46 @@ async def gemini_stream(
     retry: int = 0,
     **kwargs,
 ):
-    prefix = f"🤖**{model_name}**: ({BOT_TIPS})\n"
+    prefix = f"🤖**{model_name}**:{BOT_TIPS}\n"
+    answers = ""
     try:
-        status = kwargs.get("progress")
+        status: Message = kwargs.get("progress")  # type: ignore
         api_keys = [x.strip() for x in GEMINI.API_KEYS.split(",") if x.strip()]
         if retry > len(api_keys) - 1:
             return None
         app = genai.Client(api_key=api_keys[retry], http_options=HttpOptions(base_url=GEMINI.BASR_URL, async_client_args={"proxy": GEMINI.PROXY}))
-        answers = ""
+        runtime_texts = ""
         async for chunk in await app.aio.models.generate_content_stream(**params):
             resp = parse_response(chunk.model_dump())
             answer = resp.get("texts", "")
+            runtime_texts += answer
             answers += answer
-            answers = beautify_llm_response(answers)
-            if await count_without_entities(prefix + answers) <= TEXT_LENGTH:
-                if len(answers.removeprefix(prefix)) > 10:  # start response if answer is not empty
-                    await modify_progress(message=status, text=prefix + answers, detail_progress=True)
+            runtime_texts = beautify_llm_response(runtime_texts)
+            length = await count_without_entities(prefix + runtime_texts)
+            if length <= TEXT_LENGTH:
+                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
+                    await modify_progress(message=status, text=prefix + runtime_texts, detail_progress=True)
             else:  # answers is too long, split it into multiple messages
-                parts = await smart_split(prefix + answers)
+                parts = await smart_split(prefix + runtime_texts)
+                if len(parts) == 1:
+                    continue
                 await modify_progress(message=status, text=blockquote(parts[0]), force_update=True)  # force send the first part
-                answers = parts[-1]  # keep the last part
-                status = await client.send_message(message.chat.id, answers)  # the new message
+                runtime_texts = parts[-1]  # keep the last part
+                status = await client.send_message(message.chat.id, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status.id))  # the new message
 
         # all chunks are processed
-        answers = blockquote(beautify_llm_response(answers))  # blockquote AI response
-        await modify_progress(message=status, text=prefix + answers, force_update=True)
+        if not answers.strip():  # empty response
+            return await gemini_stream(client, message, model_name, params, retry + 1, **kwargs)  # type: ignore
+
+        if await count_without_entities(prefix + answers) <= TEXT_LENGTH:  # short answer in single msg
+            if length > GPT.COLLAPSE_LENGTH:  # collapse the response if the answer is too long
+                await modify_progress(message=status, text=f"{prefix}{blockquote(runtime_texts)}", force_update=True)
+            else:
+                await modify_progress(message=status, text=f"{prefix}{runtime_texts}", force_update=True)
+        elif length > GPT.COLLAPSE_LENGTH:
+            await modify_progress(message=status, text=prefix + blockquote(runtime_texts), force_update=True)
+        else:
+            await modify_progress(message=status, text=prefix + runtime_texts, force_update=True)
 
     except Exception as e:
         logger.error(e)
@@ -132,7 +147,7 @@ async def gemini_nonstream(
             return None
         app = genai.Client(api_key=api_keys[retry], http_options=HttpOptions(base_url=GEMINI.BASR_URL, async_client_args={"proxy": GEMINI.PROXY}))
         response = await app.aio.models.generate_content(**params)
-        prefix = f"🤖**{model_name}**: ({BOT_TIPS})\n"
+        prefix = f"🤖**{model_name}**:{BOT_TIPS}\n"
         res = parse_response(response.model_dump(), prefix=prefix)
         await send2tg(client, message, caption_above=True, **res, **kwargs)
         await modify_progress(del_status=True, **kwargs)
@@ -148,6 +163,7 @@ async def gemini_nonstream(
 
 
 def parse_response(data: dict, prefix: str = "") -> dict:
+    """Parse gemini response, includes texts, image and websearch."""
     logger.trace(data)
     parts = glom(data, "candidates.0.content.parts", default=[]) or []
     gemini_logging(parts)
@@ -174,6 +190,7 @@ def parse_response(data: dict, prefix: str = "") -> dict:
 
 
 def gemini_logging(contexts: list):
+    """Print logs of gemini contexts."""
     msg = ""
     with contextlib.suppress(Exception):
         for item in contexts:
src/llm/gpt.py
@@ -13,7 +13,7 @@ from llm.models import get_context_type, get_gpt_config, get_model_id
 from llm.response import send_to_gpt
 from llm.response_stream import send_to_gpt_stream
 from llm.tools import merge_tools_response
-from llm.utils import BOT_TIPS, clean_prefix, image_emoji, llm_cleanup_files
+from llm.utils import BOT_TIPS, clean_cmd_prefix, image_emoji, llm_cleanup_files
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -44,7 +44,11 @@ HELP = f"""🤖**GPT对话**
 
 def is_gpt_conversation(message: Message) -> bool:
     info = parse_msg(message)
-    if info["is_bot"]:  # do not process bot message
+    # to avoid potential infinitely loop,
+    # we do not respond to bot message & GPT responses.
+    if info["is_bot"]:
+        return False
+    if BOT_TIPS in info["text"]:
         return False
     if startswith_prefix(info["text"], prefix=[PREFIX.GPT, PREFIX.GENIMG, "/gpt", "/gemini", "/ds", "/qwen", "/doubao", "/grok"]):
         return True
@@ -109,7 +113,7 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = G
 
     config["completions"]["messages"] = await get_conversation_contexts(client, conversations)
 
-    msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name']}](tg://user?id={info['uid']})**:“{clean_prefix(info['text'])}”"
+    msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"
     status_msg = (await send2tg(client, message, texts=msg, **kwargs))[0]
     kwargs["progress"] = status_msg
     if context_type.get("error"):
@@ -118,7 +122,7 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = G
     config, response = await merge_tools_response(config, **kwargs)
     # skip send a new request if tool_model is the same as the current model
     if response and config["completions"]["model"] == GPT.TOOLS_MODEL and response.get("content"):
-        texts = f"🤖**{config['friendly_name']}**: ({BOT_TIPS})\n\n{response['content']}"
+        texts = f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n\n{response['content']}"
         length = await count_without_entities(texts)
         if length <= TEXT_LENGTH:
             await modify_progress(message=status_msg, text=texts, force_update=True, **kwargs)
@@ -133,9 +137,9 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = G
         if content := response.get("content"):
             if reasoning := response.get("reasoning"):
                 content = f"{reasoning}\n{content}"
-                texts = f"🤖**{response['model']}**: ({BOT_TIPS})\n{content}"
+                texts = f"🤖**{response['model']}**:{BOT_TIPS}\n{content}"
             else:
-                texts = f"🤖**{response['model']}**: ({BOT_TIPS})\n\n{content}"
+                texts = f"🤖**{response['model']}**:{BOT_TIPS}\n\n{content}"
             logger.debug(texts)
             await send2tg(client, message, texts=texts, **kwargs)
             await modify_progress(message=status_msg, del_status=True, **kwargs)
src/llm/response.py
@@ -10,7 +10,7 @@ from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXP
 
 from config import GPT
 from llm.hooks import pre_hooks
-from llm.utils import add_search_results_to_response, beautify_llm_response, beautify_model_name, extract_reasoning
+from llm.utils import REASONING_BEGIN, REASONING_END, add_search_results_to_response, beautify_llm_response, beautify_model_name, extract_reasoning
 from messages.progress import modify_progress
 
 
@@ -92,7 +92,7 @@ async def parse_response(config: dict, response: dict) -> dict[str, str]:
             reasoning = glom(choice, Coalesce("message.reasoning_content", "message.reasoning"), default="") or ""
         if reasoning and str(reasoning) != "None":  # add expandable block quotation mark for reasoning
             # if change this line, remember to remove the reasoning from contexts (`llm/contexts.py`)
-            reasoning = f"{BLOCKQUOTE_EXPANDABLE_DELIM}🤔{reasoning.strip()}💡\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}"
+            reasoning = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{reasoning.strip()}{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}"
 
         primary_model = glom(config, "completions.model", default="") or ""
         used_model = glom(response, "model", default="") or ""
src/llm/response_stream.py
@@ -9,13 +9,13 @@ from loguru import logger
 from openai import AsyncOpenAI
 from pyrogram.client import Client
 from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
-from pyrogram.types import Message
+from pyrogram.types import Message, ReplyParameters
 
 from config import GPT, TEXT_LENGTH
 from llm.hooks import pre_hooks
-from llm.utils import BOT_TIPS, add_search_results_to_response, beautify_llm_response
+from llm.utils import BOT_TIPS, REASONING_BEGIN, REASONING_END, add_search_results_to_response, beautify_llm_response, split_reasoning
 from messages.progress import modify_progress
-from messages.utils import count_without_entities, smart_split
+from messages.utils import blockquote, count_without_entities, smart_split
 
 
 async def send_to_gpt_stream(client: Client, status: Message, config: dict, retry: int = 0, **kwargs) -> dict:
@@ -25,13 +25,13 @@ async def send_to_gpt_stream(client: Client, status: Message, config: dict, retr
         {"content": str, "reasoning": str, "model": str}
     """
     # ruff: noqa: RUF001, RUF003
-    prefix = f"🤖**{config['friendly_name']}**: ({BOT_TIPS})\n"
+    prefix = f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n"
     try:
         pre_hooks(config["client"], config["completions"], message_info=kwargs.get("message_info"))
         openai = AsyncOpenAI(**config["client"])
         logger.trace(config)
         answers = prefix
-        sent_answers = []
+        all_answers = ""
         is_reasoning = False
         reasoning_in_response = None
         gen = await openai.chat.completions.create(**config["completions"], stream=True)
@@ -50,12 +50,12 @@ async def send_to_gpt_stream(client: Client, status: Message, config: dict, retr
                 reasoning_in_response = True
             if reasoning_content and not is_reasoning:  # 首次收到推理内容
                 is_reasoning = True
-                answers += f"{BLOCKQUOTE_EXPANDABLE_DELIM}🤔{reasoning_content.lstrip()}"
+                answers += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{reasoning_content.lstrip()}"
             elif reasoning_content and is_reasoning:  # 收到推理内容且正在思考
                 answers += reasoning_content
             elif reasoning_in_response is True and is_reasoning:  # 收到回答, 关闭推理标志
                 is_reasoning = False
-                answers = f"{answers.rstrip()}💡\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}\n" + answer.lstrip()
+                answers = f"{answers.rstrip()}{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}\n" + answer.lstrip()
             else:
                 answers += answer
 
@@ -63,46 +63,49 @@ async def send_to_gpt_stream(client: Client, status: Message, config: dict, retr
             # handle "<think>...</think>\n\n"
             if answers.removeprefix(prefix).lstrip().startswith("<think>"):
                 is_reasoning = True
-                answers = answers.replace("<think>", f"{BLOCKQUOTE_EXPANDABLE_DELIM}🤔")
+                answers = answers.replace("<think>", f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}")
             if "</think>" in answers:
                 is_reasoning = False
-                answers = re.sub(r"</think>\s*", f"💡\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", answers, count=1)
-
-            # handle ">Reasoning...Reasoned(.*?)seconds"
-            if re.search(r"^>?\s*Reasoning", answers.removeprefix(prefix).lstrip(), re.DOTALL):
-                is_reasoning = True
-                answers = re.sub(r">?\s*Reasoning\s*", f"{BLOCKQUOTE_EXPANDABLE_DELIM}🤔", answers, count=1, flags=re.DOTALL)
-            if re.search(r"🤔(.*?)Reasoned(.*?)seconds", answers.removeprefix(prefix).lstrip(), re.DOTALL):
-                is_reasoning = False
-                answers = re.sub(r"Reasoned(.*?)seconds\s*", f"💡\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", answers, count=1, flags=re.DOTALL)
-
-            # handle ">正在推理...,持续(.*?)秒"
-            if re.search(r"^>?(正在)?推理", answers.removeprefix(prefix).lstrip(), re.DOTALL):
-                is_reasoning = True
-                answers = re.sub(r">?(正在)?推理\s*", f"{BLOCKQUOTE_EXPANDABLE_DELIM}🤔", answers, count=1, flags=re.DOTALL)
-            if re.search(r"🤔(.*?),持续(.*?)秒", answers.removeprefix(prefix).lstrip(), re.DOTALL):
-                is_reasoning = False
-                answers = re.sub(r",持续(.*?)秒\s*", f"💡\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", answers, count=1, flags=re.DOTALL)
+                answers = re.sub(r"</think>\s*", f"{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", answers, count=1)
 
             answers = beautify_llm_response(answers)
-            if await count_without_entities(answers) <= TEXT_LENGTH:
-                if len(answers.removeprefix(prefix)) > 3:  # start response if answer is not empty
+            if await count_without_entities(answers) <= TEXT_LENGTH - 10:  # leave some flexibility
+                if len(answers.removeprefix(prefix)) > 10:  # start response if answer is not empty
                     await modify_progress(message=status, text=answers, detail_progress=True)
             else:  # answers is too long, split it into multiple messages
                 parts = await smart_split(answers)
-                await modify_progress(message=status, text=parts[0], force_update=True)  # force send the first part
-                sent_answers.append(parts[0])
+                if len(parts) == 1:
+                    continue
+                if parts[0].startswith(prefix):  # remove prefix from the first part
+                    reasoning_resp, response = split_reasoning(parts[0])
+                    content = reasoning_resp + "\n\n" + response
+                    await modify_progress(message=status, text=f"{prefix}{blockquote(content.strip())}", force_update=True)
+                else:
+                    await modify_progress(message=status, text=blockquote(parts[0]), force_update=True)
+                all_answers += parts[0]
                 answers = parts[-1]  # keep the last part
                 if is_reasoning:
                     answers = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{answers.lstrip()}"
-                status = await client.send_message(status.chat.id, answers)
-
-        sent_answers.append(answers)
-        answers = add_search_results_to_response(config.get("search_results", []), "".join(sent_answers))
-        answers = (await smart_split(answers))[-1]
-        # Finally, force update the message
-        await modify_progress(message=status, text=beautify_llm_response(answers), force_update=True)
+                status = await client.send_message(status.chat.id, text=prefix + answers, reply_parameters=ReplyParameters(message_id=status.id))
+        # all chunks are processed
+        all_answers += answers
+        all_answers = add_search_results_to_response(config.get("search_results", []), all_answers)
+        length = await count_without_entities(answers)
 
+        answers = (await smart_split(all_answers))[-1]  # the last part (or the only one part)
+        reasoning_resp, response = split_reasoning(answers)
+        if answers.startswith(prefix):
+            if length > GPT.COLLAPSE_LENGTH:
+                content = reasoning_resp + "\n\n" + response
+                await modify_progress(message=status, text=f"{prefix}{blockquote(content.strip())}", force_update=True)
+            elif reasoning_resp:
+                await modify_progress(message=status, text=f"{prefix}{blockquote(reasoning_resp)}\n{response}", force_update=True)
+            else:
+                await modify_progress(message=status, text=f"{prefix}{response}", force_update=True)
+        elif length > GPT.COLLAPSE_LENGTH:
+            await modify_progress(message=status, text=prefix + blockquote(response), force_update=True)
+        else:
+            await modify_progress(message=status, text=prefix + response, force_update=True)
     except Exception as e:
         error = f"🤖{config['friendly_name']}请求失败, 重试次数: {retry + 1}/{GPT.MAX_RETRY + 1}\n{e}"
         logger.error(error)
src/llm/utils.py
@@ -7,11 +7,14 @@ from typing import BinaryIO
 import tiktoken
 from loguru import logger
 from markitdown import MarkItDown
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
 
 from config import DOWNLOAD_DIR, GPT, PREFIX
 from utils import number_to_emoji, remove_consecutive_newlines, remove_dash, remove_pound, zhcn
 
-BOT_TIPS = "回复以继续"
+BOT_TIPS = "(回复以继续)"  # noqa: RUF001
+REASONING_BEGIN = "🤔"  # use emoji to separate model reasoning and content
+REASONING_END = "💡"
 
 
 def llm_cleanup_files(messages: list[dict]):
@@ -133,18 +136,18 @@ def extract_reasoning(text: str) -> tuple[str, str]:
     {content}"
     """
     reasoning = ""
-    if matched := re.search(r"^<think>(.*?)</think>", text.lstrip(), re.DOTALL):
+    if matched := re.search(r"^<think>(.*?)</think>", text.lstrip(), flags=re.DOTALL):
         reasoning = matched.group(1)
         text = re.sub(r"<think>(.*?)</think>", "", text, count=1, flags=re.DOTALL)  # remove <think>...</think>
-    if matched := re.search(r"^<thinking>(.*?)</thinking>", text.lstrip(), re.DOTALL):
+    if matched := re.search(r"^<thinking>(.*?)</thinking>", text.lstrip(), flags=re.DOTALL):
         reasoning = matched.group(1)
         text = re.sub(r"<thinking>(.*?)</thinking>", "", text, count=1, flags=re.DOTALL)
 
     # Reverse engineered Web API
-    if matched := re.search(r"^>?(正在)?推理(.*?)(,持续.*?)秒\n\n(.*)", text.lstrip(), re.DOTALL):  # noqa: RUF001
+    if matched := re.search(r"^>?(正在)?推理(.*?)(,持续.*?)秒\n\n(.*)", text.lstrip(), flags=re.DOTALL):  # noqa: RUF001
         reasoning = matched.group(2)
         text = matched.group(4)
-    if matched := re.search(r"^>?\s?Reasoning(.*?)Reasoned(.*?)seconds\n\n(.*)", text.lstrip(), re.DOTALL):
+    if matched := re.search(r"^>?\s?Reasoning(.*?)Reasoned(.*?)seconds\n\n(.*)", text.lstrip(), flags=re.DOTALL):
         reasoning = matched.group(1)
         text = matched.group(3)
 
@@ -169,21 +172,31 @@ def image_emoji(capability: bool) -> str:  # noqa: FBT001
     return "(🏞)" if capability else ""
 
 
-def clean_prefix(text: str) -> str:
+def clean_cmd_prefix(text: str) -> str:
     for prefix in [PREFIX.GPT, PREFIX.GENIMG, "/gpt", "/gemini", "/ds", "/qwen", "/grok", "/doubao"]:
         text = text.removeprefix(prefix).lstrip()
     return text
 
 
-def clean_response(text: str) -> str:
-    """Remove bot prefix and reasoning content."""
+def clean_bot_tips(text: str) -> str:
     if not text:
         return ""
-    text = clean_prefix(text)
-    # remove bot tips
-    text = re.sub(rf"(.*?){BOT_TIPS}\)", "", text, flags=re.DOTALL).strip()
-    # remove reasoning
-    return re.sub(r"^🤔(.*?)💡", "", text, flags=re.DOTALL).strip()
+    return re.sub(rf"(.*?){BOT_TIPS}", "", text, flags=re.DOTALL).strip()
+
+
+def clean_reasoning(text: str) -> str:
+    if not text:
+        return ""
+    text = re.sub(rf"{REASONING_BEGIN}(.*?){REASONING_END}", "", text, flags=re.DOTALL).strip()
+    text = text.removeprefix(BLOCKQUOTE_EXPANDABLE_DELIM).lstrip()
+    return text.removeprefix(BLOCKQUOTE_EXPANDABLE_END_DELIM).lstrip()
+
+
+def clean_response(text: str) -> str:
+    """Remove bot prefix and reasoning content."""
+    text = clean_cmd_prefix(text)
+    text = clean_bot_tips(text)
+    return clean_reasoning(text)
 
 
 def clean_gemini_sourcemarks(contexts: list[dict]) -> None:
@@ -207,3 +220,20 @@ def convert_md(path: str | Path | BinaryIO) -> str:
         logger.error(f"Failed to convert to markdown: {e}")
         return ""
     return result.text_content
+
+
+def split_reasoning(text: str) -> tuple[str, str]:
+    """Split reasoning from text.
+
+    Args:
+        text: LLM response
+    Returns:
+        (reasoning, content)
+    """
+    text = clean_cmd_prefix(text)
+    text = clean_bot_tips(text)
+    content = clean_reasoning(text)
+    reasoning = ""
+    if matched := re.search(rf"{REASONING_BEGIN}(.*?){REASONING_END}", text, flags=re.DOTALL):
+        reasoning = REASONING_BEGIN + matched.group(1) + REASONING_END
+    return reasoning.strip(), content.strip()
src/messages/progress.py
@@ -9,6 +9,7 @@ from pyrogram.errors import FloodWait, MessageNotModified
 from pyrogram.types import Message
 
 from config import TEXT_LENGTH, cache
+from messages.utils import count_without_entities, smart_split
 
 
 async def modify_progress(
@@ -57,7 +58,9 @@ async def modify_progress(
         if not detail_progress:
             return
         logger.trace(f"Progress: {text!r}")
-        await message.edit_text(text[:TEXT_LENGTH])
+        if len(text) > TEXT_LENGTH and await count_without_entities(text) > TEXT_LENGTH:
+            text = (await smart_split(text))[0]
+        await message.edit_text(text)
         cache.set("modify_progress", "1", ttl=ttl)
     except FloodWait as e:
         logger.warning(e)
src/messages/utils.py
@@ -130,7 +130,7 @@ async def smart_split(text: str, chars_per_string: int = TEXT_LENGTH, mode: Pars
 
 def blockquote(s: str) -> str:
     """Block quote texts."""
-    return BLOCKQUOTE_EXPANDABLE_DELIM + s + BLOCKQUOTE_EXPANDABLE_END_DELIM
+    return BLOCKQUOTE_EXPANDABLE_DELIM + s + "\n" + BLOCKQUOTE_EXPANDABLE_END_DELIM
 
 
 def warp_comments(texts: str) -> str:
src/config.py
@@ -166,6 +166,7 @@ class GPT:  # see `llm/README.md`
     TOKEN_ENCODING = os.getenv("GPT_TOKEN_ENCODING", "o200k_base")  # https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
     MAX_RETRY = int(os.getenv("GPT_MAX_RETRY", "2"))
     HELICONE_API_KEY = os.getenv("HELICONE_API_KEY", "")
+    COLLAPSE_LENGTH = int(os.getenv("GEMINI_COLLAPSE_LENGTH", "500"))  # Collapse the response if the length is greater than this value
 
     # default command (/ai).
     # set a string contains "gemini" to switch to gemini (see class GEMINI  below for details)