Commit eba0769
Changed files (4)
src/ai/texts/claude.py
@@ -37,6 +37,7 @@ async def anthropic_responses(
anthropic_media_send_as: Literal["base64", "file_id"] = "file_id",
anthropic_append_citation: bool = True,
skills: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
max_retries: int = 3,
**kwargs,
@@ -94,6 +95,7 @@ async def anthropic_responses(
anthropic,
params=params,
prefix=prefix,
+ hide_thinking=hide_thinking,
silent=silent,
max_retries=max_retries,
append_citation=anthropic_append_citation,
@@ -124,6 +126,7 @@ async def single_api_response(
*,
prefix: str = "",
append_citation: bool = True,
+ hide_thinking: bool = False,
silent: bool = False,
retry: int = 0,
max_retries: int = 3,
@@ -142,9 +145,7 @@ async def single_api_response(
status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
status_mid = status_msg.id if isinstance(status_msg, Message) else 0
sent_messages = []
- full_response = {}
try:
- tool_calls: list[dict] = [] # tool_call results
is_reasoning = False
async with anthropic.beta.messages.stream(**params) as stream:
async for chunk in stream:
@@ -171,7 +172,10 @@ async def single_api_response(
if not chunk_answer and not chunk_thinking:
continue
-
+ thoughts += chunk_thinking
+ answers += chunk_answer
+ if hide_thinking and is_reasoning:
+ continue
runtime_texts = beautify_llm_response(runtime_texts)
length = await count_without_entities(prefix + runtime_texts)
if length <= TEXT_LENGTH - 10: # leave some flexibility
@@ -192,9 +196,6 @@ async def single_api_response(
sent_messages.append(status_msg)
status_mid = status_msg.id
- thoughts += chunk_thinking
- answers += chunk_answer
-
# all chunks are processed
if not answers.strip() and not thoughts.strip(): # empty response
return await single_api_response(
@@ -205,6 +206,7 @@ async def single_api_response(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
@@ -233,6 +235,7 @@ async def single_api_response(
append_citation=append_citation,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
src/ai/texts/gemini.py
@@ -34,6 +34,7 @@ async def gemini_chat_completion(
gemini_proxy: str | None = PROXY.GOOGLE,
gemini_append_grounding: bool = True,
skills: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
max_retries: int = 3,
**kwargs,
@@ -71,6 +72,7 @@ async def gemini_chat_completion(
silent=silent,
max_retries=max_retries,
append_grounding=gemini_append_grounding,
+ hide_thinking=hide_thinking,
**kwargs,
)
if resp.get("texts"):
@@ -99,6 +101,7 @@ async def single_api_generate_content(
retry: int = 0,
max_retries: int = 3,
append_grounding: bool = True,
+ hide_thinking: bool = False,
silent: bool = False,
**kwargs,
) -> dict:
@@ -136,6 +139,11 @@ async def single_api_generate_content(
else:
runtime_texts += chunk_answer
+ thoughts += chunk_thinking
+ answers += chunk_answer
+ if hide_thinking and is_reasoning:
+ continue
+
runtime_texts = beautify_llm_response(runtime_texts)
length = await count_without_entities(prefix + runtime_texts)
if length <= TEXT_LENGTH:
@@ -156,9 +164,6 @@ async def single_api_generate_content(
sent_messages.append(status_msg)
status_mid = status_msg.id
- thoughts += chunk_thinking
- answers += chunk_answer
-
await gemini.aio.aclose()
# all chunks are processed
if not answers.strip() and not thoughts.strip(): # empty response
@@ -172,6 +177,7 @@ async def single_api_generate_content(
retry=retry + 1,
max_retries=max_retries,
append_grounding=append_grounding,
+ hide_thinking=hide_thinking,
**kwargs,
)
if append_grounding: # add grounding to the response
@@ -200,6 +206,7 @@ async def single_api_generate_content(
retry=retry + 1,
max_retries=max_retries,
append_grounding=append_grounding,
+ hide_thinking=hide_thinking,
**kwargs,
)
return {"texts": answers, "thoughts": thoughts, "sent_messages": sent_messages}
src/ai/texts/openai_chat.py
@@ -34,6 +34,7 @@ async def openai_chat_completions(
openai_contexts: list[dict] | None = None,
openai_tools: list[dict] | None = None,
skills: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
max_retries: int = 3,
**kwargs,
@@ -87,6 +88,7 @@ async def openai_chat_completions(
openai,
params=params,
prefix=prefix,
+ hide_thinking=hide_thinking,
silent=silent,
max_retries=max_retries,
**kwargs,
@@ -113,6 +115,7 @@ async def single_api_chat_completions(
params: dict,
*,
prefix: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
retry: int = 0,
max_retries: int = 3,
@@ -161,6 +164,8 @@ async def single_api_chat_completions(
thoughts += chunk_thinking
answers += chunk_answer
+ if hide_thinking and is_reasoning and not tool_args:
+ continue
runtime_texts = beautify_llm_response(runtime_texts)
length = await count_without_entities(prefix + runtime_texts)
if length <= TEXT_LENGTH - 10: # leave some flexibility
@@ -193,6 +198,7 @@ async def single_api_chat_completions(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
@@ -222,6 +228,7 @@ async def single_api_chat_completions(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
src/ai/texts/openai_response.py
@@ -40,6 +40,7 @@ async def openai_responses_api(
openai_allow_file: bool = False, # whether to allow file in input modalities
openai_media_send_as: Literal["base64", "file_id"] = "file_id",
skills: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
max_retries: int = 3,
**kwargs,
@@ -105,6 +106,7 @@ async def openai_responses_api(
openai,
params=params,
prefix=prefix,
+ hide_thinking=hide_thinking,
silent=silent,
max_retries=max_retries,
**kwargs,
@@ -146,6 +148,7 @@ async def single_api_response(
params: dict,
*,
prefix: str = "",
+ hide_thinking: bool = False,
silent: bool = False,
retry: int = 0,
max_retries: int = 3,
@@ -182,6 +185,7 @@ async def single_api_response(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
@@ -195,15 +199,7 @@ async def single_api_response(
# 设置推理标志
if response_type in {"response.reasoning_summary_part.added", "response.reasoning_summary_text.delta"}: # 正在推理
is_reasoning = True
- elif response_type in {
- "response.reasoning_summary_part.done",
- "response.reasoning_summary_text.done",
- "response.content_part.added",
- "response.output_text.delta",
- "response.output_text.done",
- "response.content_part.done",
- "response.completed",
- }: # 推理结束
+ elif response_type in {"response.content_part.added", "response.output_text.delta"}: # 推理结束
is_reasoning = False
if response_type == "response.reasoning_summary_part.added" and len(thoughts) == 0: # 首次收到推理内容
@@ -216,6 +212,11 @@ async def single_api_response(
else:
runtime_texts += chunk_answer
+ thoughts += chunk_thinking
+ answers += chunk_answer
+ if hide_thinking and is_reasoning:
+ continue
+
runtime_texts = beautify_llm_response(runtime_texts)
length = await count_without_entities(prefix + runtime_texts)
if length <= TEXT_LENGTH - 10: # leave some flexibility
@@ -236,8 +237,6 @@ async def single_api_response(
sent_messages.append(status_msg)
status_mid = status_msg.id
- thoughts += chunk_thinking
- answers += chunk_answer
if response_type == "response.reasoning_summary_text.done":
thoughts = resp.get("text", thoughts)
elif response_type == "response.output_text.done":
@@ -259,6 +258,7 @@ async def single_api_response(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)
@@ -286,6 +286,7 @@ async def single_api_response(
prefix=prefix,
retry=retry + 1,
max_retries=max_retries,
+ hide_thinking=hide_thinking,
silent=silent,
**kwargs,
)