Commit eba0769

benny-dou <60535774+benny-dou@users.noreply.github.com>
2026-04-09 03:05:24
feat(ai): support hiding thinking process
1 parent f0f32c8
src/ai/texts/claude.py
@@ -37,6 +37,7 @@ async def anthropic_responses(
     anthropic_media_send_as: Literal["base64", "file_id"] = "file_id",
     anthropic_append_citation: bool = True,
     skills: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     max_retries: int = 3,
     **kwargs,
@@ -94,6 +95,7 @@ async def anthropic_responses(
                 anthropic,
                 params=params,
                 prefix=prefix,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 max_retries=max_retries,
                 append_citation=anthropic_append_citation,
@@ -124,6 +126,7 @@ async def single_api_response(
     *,
     prefix: str = "",
     append_citation: bool = True,
+    hide_thinking: bool = False,
     silent: bool = False,
     retry: int = 0,
     max_retries: int = 3,
@@ -142,9 +145,7 @@ async def single_api_response(
     status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
     status_mid = status_msg.id if isinstance(status_msg, Message) else 0
     sent_messages = []
-    full_response = {}
     try:
-        tool_calls: list[dict] = []  # tool_call results
         is_reasoning = False
         async with anthropic.beta.messages.stream(**params) as stream:
             async for chunk in stream:
@@ -171,7 +172,10 @@ async def single_api_response(
 
                 if not chunk_answer and not chunk_thinking:
                     continue
-
+                thoughts += chunk_thinking
+                answers += chunk_answer
+                if hide_thinking and is_reasoning:
+                    continue
                 runtime_texts = beautify_llm_response(runtime_texts)
                 length = await count_without_entities(prefix + runtime_texts)
                 if length <= TEXT_LENGTH - 10:  # leave some flexibility
@@ -192,9 +196,6 @@ async def single_api_response(
                             sent_messages.append(status_msg)
                             status_mid = status_msg.id
 
-                thoughts += chunk_thinking
-                answers += chunk_answer
-
         # all chunks are processed
         if not answers.strip() and not thoughts.strip():  # empty response
             return await single_api_response(
@@ -205,6 +206,7 @@ async def single_api_response(
                 prefix=prefix,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )
@@ -233,6 +235,7 @@ async def single_api_response(
                 append_citation=append_citation,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )
src/ai/texts/gemini.py
@@ -34,6 +34,7 @@ async def gemini_chat_completion(
     gemini_proxy: str | None = PROXY.GOOGLE,
     gemini_append_grounding: bool = True,
     skills: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     max_retries: int = 3,
     **kwargs,
@@ -71,6 +72,7 @@ async def gemini_chat_completion(
                 silent=silent,
                 max_retries=max_retries,
                 append_grounding=gemini_append_grounding,
+                hide_thinking=hide_thinking,
                 **kwargs,
             )
             if resp.get("texts"):
@@ -99,6 +101,7 @@ async def single_api_generate_content(
     retry: int = 0,
     max_retries: int = 3,
     append_grounding: bool = True,
+    hide_thinking: bool = False,
     silent: bool = False,
     **kwargs,
 ) -> dict:
@@ -136,6 +139,11 @@ async def single_api_generate_content(
             else:
                 runtime_texts += chunk_answer
 
+            thoughts += chunk_thinking
+            answers += chunk_answer
+            if hide_thinking and is_reasoning:
+                continue
+
             runtime_texts = beautify_llm_response(runtime_texts)
             length = await count_without_entities(prefix + runtime_texts)
             if length <= TEXT_LENGTH:
@@ -156,9 +164,6 @@ async def single_api_generate_content(
                         sent_messages.append(status_msg)
                         status_mid = status_msg.id
 
-            thoughts += chunk_thinking
-            answers += chunk_answer
-
         await gemini.aio.aclose()
         # all chunks are processed
         if not answers.strip() and not thoughts.strip():  # empty response
@@ -172,6 +177,7 @@ async def single_api_generate_content(
                 retry=retry + 1,
                 max_retries=max_retries,
                 append_grounding=append_grounding,
+                hide_thinking=hide_thinking,
                 **kwargs,
             )
         if append_grounding:  # add grounding to the response
@@ -200,6 +206,7 @@ async def single_api_generate_content(
                 retry=retry + 1,
                 max_retries=max_retries,
                 append_grounding=append_grounding,
+                hide_thinking=hide_thinking,
                 **kwargs,
             )
     return {"texts": answers, "thoughts": thoughts, "sent_messages": sent_messages}
src/ai/texts/openai_chat.py
@@ -34,6 +34,7 @@ async def openai_chat_completions(
     openai_contexts: list[dict] | None = None,
     openai_tools: list[dict] | None = None,
     skills: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     max_retries: int = 3,
     **kwargs,
@@ -87,6 +88,7 @@ async def openai_chat_completions(
                 openai,
                 params=params,
                 prefix=prefix,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 max_retries=max_retries,
                 **kwargs,
@@ -113,6 +115,7 @@ async def single_api_chat_completions(
     params: dict,
     *,
     prefix: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     retry: int = 0,
     max_retries: int = 3,
@@ -161,6 +164,8 @@ async def single_api_chat_completions(
 
             thoughts += chunk_thinking
             answers += chunk_answer
+            if hide_thinking and is_reasoning and not tool_args:
+                continue
             runtime_texts = beautify_llm_response(runtime_texts)
             length = await count_without_entities(prefix + runtime_texts)
             if length <= TEXT_LENGTH - 10:  # leave some flexibility
@@ -193,6 +198,7 @@ async def single_api_chat_completions(
                 prefix=prefix,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )
@@ -222,6 +228,7 @@ async def single_api_chat_completions(
                 prefix=prefix,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )
src/ai/texts/openai_response.py
@@ -40,6 +40,7 @@ async def openai_responses_api(
     openai_allow_file: bool = False,  # whether to allow file in input modalities
     openai_media_send_as: Literal["base64", "file_id"] = "file_id",
     skills: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     max_retries: int = 3,
     **kwargs,
@@ -105,6 +106,7 @@ async def openai_responses_api(
                 openai,
                 params=params,
                 prefix=prefix,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 max_retries=max_retries,
                 **kwargs,
@@ -146,6 +148,7 @@ async def single_api_response(
     params: dict,
     *,
     prefix: str = "",
+    hide_thinking: bool = False,
     silent: bool = False,
     retry: int = 0,
     max_retries: int = 3,
@@ -182,6 +185,7 @@ async def single_api_response(
                     prefix=prefix,
                     retry=retry + 1,
                     max_retries=max_retries,
+                    hide_thinking=hide_thinking,
                     silent=silent,
                     **kwargs,
                 )
@@ -195,15 +199,7 @@ async def single_api_response(
             # 设置推理标志
             if response_type in {"response.reasoning_summary_part.added", "response.reasoning_summary_text.delta"}:  # 正在推理
                 is_reasoning = True
-            elif response_type in {
-                "response.reasoning_summary_part.done",
-                "response.reasoning_summary_text.done",
-                "response.content_part.added",
-                "response.output_text.delta",
-                "response.output_text.done",
-                "response.content_part.done",
-                "response.completed",
-            }:  # 推理结束
+            elif response_type in {"response.content_part.added", "response.output_text.delta"}:  # 推理结束
                 is_reasoning = False
 
             if response_type == "response.reasoning_summary_part.added" and len(thoughts) == 0:  # 首次收到推理内容
@@ -216,6 +212,11 @@ async def single_api_response(
             else:
                 runtime_texts += chunk_answer
 
+            thoughts += chunk_thinking
+            answers += chunk_answer
+            if hide_thinking and is_reasoning:
+                continue
+
             runtime_texts = beautify_llm_response(runtime_texts)
             length = await count_without_entities(prefix + runtime_texts)
             if length <= TEXT_LENGTH - 10:  # leave some flexibility
@@ -236,8 +237,6 @@ async def single_api_response(
                         sent_messages.append(status_msg)
                         status_mid = status_msg.id
 
-            thoughts += chunk_thinking
-            answers += chunk_answer
             if response_type == "response.reasoning_summary_text.done":
                 thoughts = resp.get("text", thoughts)
             elif response_type == "response.output_text.done":
@@ -259,6 +258,7 @@ async def single_api_response(
                 prefix=prefix,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )
@@ -286,6 +286,7 @@ async def single_api_response(
                 prefix=prefix,
                 retry=retry + 1,
                 max_retries=max_retries,
+                hide_thinking=hide_thinking,
                 silent=silent,
                 **kwargs,
             )