Commit d118a05

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-15 09:36:31
feat(gpt): support `openai` and `gemini` SDK
1 parent cd235ec
src/asr/gemini_asr.py
@@ -70,7 +70,7 @@ Notes:
 - Maintain consistent formatting to ensure clarity and readability."""
     path = Path(path)
     status = None if silent else kwargs.get("progress")
-    api_keys = shuffle_keys(GEMINI.API_KEYS)
+    api_keys = shuffle_keys(GEMINI.API_KEY)
     if model_id is None:
         model_id = GEMINI.ASR_MODEL
     for api_key in api_keys.split(","):
@@ -152,7 +152,7 @@ async def gemini_nonstream_asr(path: str | Path, voice_format: str, *, prompt: s
     https://ai.google.dev/gemini-api/docs/audio
     """
     path = Path(path)
-    api_keys = [x.strip() for x in GEMINI.API_KEYS.split(",") if x.strip()]
+    api_keys = [x.strip() for x in GEMINI.API_KEY.split(",") if x.strip()]
     random.shuffle(api_keys)
     res = ""
     for key in api_keys:
src/asr/utils.py
@@ -64,8 +64,8 @@ def get_tencent_asr_method(duration: float, file_size: int) -> tuple[str, list[s
 def get_gemini_asr_method(duration: float) -> tuple[str, list[str]]:
     if duration > GEMINI.ASR_MAX_DURATION:
         return f"无法识别时长超过{GEMINI.ASR_MAX_DURATION}秒的音频, 当前音频时长: {duration}秒", []
-    if not GEMINI.API_KEYS:
-        return "请设置`GEMINI_API_KEYS`环境变量", []
+    if not GEMINI.API_KEY:
+        return "请设置`GEMINI_API_KEY`环境变量", []
     return "gemini", ["aac", "aiff", "flac", "mp3", "oga", "ogg", "opus", "wav"]
 
 
src/llm/contexts.py
@@ -28,13 +28,13 @@ def get_conversations(message: Message) -> list[Message]:
     return messages
 
 
-async def get_conversation_contexts(client: Client, conversations: list[Message], ctx_format: str = "gpt") -> list[dict]:
+async def get_conversation_contexts(client: Client, conversations: list[Message], ctx_format: str = "openai") -> list[dict]:
     """Generate contexts for GPT conversation.
 
     From old to new messages.
     """
     # parse context for each message
-    if ctx_format.lower() == "gpt":
+    if ctx_format.lower() == "openai":
         contexts = [await single_gpt_context(client, message) for message in conversations]
         contexts = [x for x in contexts if x.get("content")]
     else:
src/llm/gemini.py
@@ -47,7 +47,7 @@ async def gemini_response(client: Client, message: Message, conversations: list[
     info = parse_msg(message)
     model = GEMINI.TEXT_MODEL if modality == "text" else GEMINI.IMG_MODEL
     model_name = GEMINI.TEXT_MODEL_NAME if modality == "text" else GEMINI.IMG_MODEL_NAME
-    if not GEMINI.API_KEYS:
+    if not GEMINI.API_KEY:
         await send2tg(client, message, texts="⚠️**未配置Gemini API, 请尝试其他模型", **kwargs)
     response_modalities = ["TEXT", "IMAGE"] if modality == "image" else ["TEXT"]
     thinking_budget = GEMINI.IMG_THINKING_BUDGET if modality == "image" else GEMINI.TEXT_THINKING_BUDGET
@@ -102,7 +102,7 @@ async def gemini_stream(
     status_msg = init_status_msg
     status_mid = status_msg.id if isinstance(status_msg, Message) else message.id
     if not kwargs.get("gemini_api_keys"):
-        kwargs["gemini_api_keys"] = shuffle_keys(GEMINI.API_KEYS)
+        kwargs["gemini_api_keys"] = shuffle_keys(GEMINI.API_KEY)
     api_keys = [x.strip() for x in kwargs["gemini_api_keys"].split(",") if x.strip()]
     max_retry = len(api_keys) - 1 if max_retry is None else max_retry
     try:
@@ -138,7 +138,18 @@ async def gemini_stream(
 
         # all chunks are processed
         if not answers.strip():  # empty response
-            return await gemini_stream(client, message, model_name, params, prefix=prefix, retry=retry + 1, last_error=last_error, append_grounding=append_grounding, **kwargs)
+            return await gemini_stream(
+                client,
+                message,
+                model_name,
+                params,
+                prefix=prefix,
+                retry=retry + 1,
+                last_error=last_error,
+                silent=silent,
+                append_grounding=append_grounding,
+                **kwargs,
+            )
 
         if await count_without_entities(prefix + answers) <= TEXT_LENGTH:  # short answer in single msg
             if length > GPT.COLLAPSE_LENGTH:  # collapse the response if the answer is too long
@@ -158,7 +169,18 @@ async def gemini_stream(
         with contextlib.suppress(Exception):
             await modify_progress(message=init_status_msg, text=error, force_update=True)
             [await modify_progress(msg, del_status=True) for msg in sent_messages]
-        return await gemini_stream(client, message, model_name, params, prefix=prefix, retry=retry + 1, last_error=error, append_grounding=append_grounding, **kwargs)
+        return await gemini_stream(
+            client,
+            message,
+            model_name,
+            params,
+            prefix=prefix,
+            retry=retry + 1,
+            last_error=error,
+            silent=silent,
+            append_grounding=append_grounding,
+            **kwargs,
+        )
     return {"texts": answers, "prefix": prefix, "model_name": model_name, "sent_messages": sent_messages}
 
 
@@ -176,7 +198,7 @@ async def gemini_nonstream(
     try:
         if clean_marks:
             clean_gemini_sourcemarks(params["contents"])
-        api_keys = [x.strip() for x in GEMINI.API_KEYS.split(",") if x.strip()]
+        api_keys = [x.strip() for x in GEMINI.API_KEY.split(",") if x.strip()]
         if kwargs.get("gemini_api_keys"):
             api_keys = [x.strip() for x in kwargs["gemini_api_keys"].split(",") if x.strip()]
         if retry > len(api_keys) - 1:
src/llm/gpt.py
@@ -25,20 +25,20 @@ HELP = f"""🤖**GPT对话**
 暂不支持视频/音频, 可先用`{PREFIX.ASR}`命令转为文字后再调用`{PREFIX.GPT}`
 
 ⚙️模型配置:
-`{PREFIX.GPT}`默认模型: **{GPT.DEFAULT_MODEL_NAME}**
+`{PREFIX.GPT}`默认使用 **{GPT.DEFAULT_PROVIDER.lower()}** 模型
 
 🔄使用以下命令强制切换模型:
-`/gpt`: **{GPT.OPENAI_MODEL_NAME}** {image_emoji(GPT.OPENAI_IMAGE_CAPABILITY)}
-`/gemini`: **{GEMINI.TEXT_MODEL_NAME}** {image_emoji(capability=True)}
-`/ds`: **{GPT.DEEPSEEK_MODEL_NAME}** {image_emoji(GPT.DEEPSEEK_IMAGE_CAPABILITY)}
-`/qwen`: **{GPT.QWEN_MODEL_NAME}** {image_emoji(GPT.QWEN_IMAGE_CAPABILITY)}
-`/doubao`: **{GPT.DOUBAO_MODEL_NAME}** {image_emoji(GPT.DOUBAO_IMAGE_CAPABILITY)}
-`/grok`: **{GPT.GROK_MODEL_NAME}** {image_emoji(GPT.GROK_IMAGE_CAPABILITY)}
+`/gpt`: **{GPT.OPENAI_MODEL_NAME}** {image_emoji(GPT.OPENAI_ACCEPT_IMAGE)}
+`/gemini`: **{GPT.GEMINI_MODEL_NAME}** {image_emoji(GPT.GEMINI_ACCEPT_IMAGE)}
+`/ds`: **{GPT.DEEPSEEK_MODEL_NAME}** {image_emoji(GPT.DEEPSEEK_ACCEPT_IMAGE)}
+`/qwen`: **{GPT.QWEN_MODEL_NAME}** {image_emoji(GPT.QWEN_ACCEPT_IMAGE)}
+`/doubao`: **{GPT.DOUBAO_MODEL_NAME}** {image_emoji(GPT.DOUBAO_ACCEPT_IMAGE)}
+`/grok`: **{GPT.GROK_MODEL_NAME}** {image_emoji(GPT.GROK_ACCEPT_IMAGE)}
 
 ⚠️注意:
 若对话历史包含图片
 但模型不支持图片(无🏞图标)
-会自动切换为: **{GPT.OMNI_MODEL_NAME}**
+会自动切换为 **{GPT.OMNI_PROVIDER.lower()}** 模型
 """
 
 
@@ -63,13 +63,13 @@ def is_gpt_conversation(message: Message) -> bool:
         GPT.QWEN_MODEL_NAME,
         GPT.DOUBAO_MODEL_NAME,
         GPT.GROK_MODEL_NAME,
-        GEMINI.TEXT_MODEL_NAME,
+        GPT.GEMINI_MODEL_NAME,
         GEMINI.IMG_MODEL_NAME,
     ]
     return startswith_prefix(reply_info["text"], prefix=[f"🤖{x}".lower() for x in model_names])
 
 
-async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = GPT.STREAM_MODE, **kwargs):
+async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = True, **kwargs):
     """Get GPT response from Various API.
 
     Args:
@@ -101,8 +101,8 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = G
     kwargs["message_info"] = info  # save trigger message info
     conversations = get_conversations(message)
     context_type = get_context_type(conversations)  # {"type": "text", "error": None}  # text, image
-    model_id, resp_modality = get_model_id(info["text"], reply_text, context_type["type"])
-    if "gemini" in model_id.lower():
+    model_id, resp_modality, sdk = get_model_id(info["text"], reply_text, context_type["type"])
+    if "gemini" in model_id.lower() and sdk == "gemini":
         return await gemini_response(client, message, conversations, resp_modality, **kwargs)
 
     config = get_gpt_config(model_id)
@@ -111,8 +111,7 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = G
     if not config["completions"]["model"].strip():
         return await send2tg(client, message, texts=f"⚠️**{config['friendly_name']}** 未配置模型ID, 请尝试其他命令\n\n{HELP}", **kwargs)
 
-    config["completions"]["messages"] = await get_conversation_contexts(client, conversations)
-
+    config["completions"]["messages"] = await get_conversation_contexts(client, conversations, ctx_format=sdk)
     msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"
     status_msg = (await send2tg(client, message, texts=msg, **kwargs))[0]
     kwargs["progress"] = status_msg
src/llm/hooks.py
@@ -21,7 +21,7 @@ def pre_openrouter_hook(client: dict, completions: dict) -> None:
     """Add special parameters for OpenRouter."""
     if "openrouter" not in client["base_url"]:
         return
-    if models := [x.strip() for x in GPT.FALLBACK_MODELS.split(",") if x.strip()]:
+    if models := [x.strip() for x in GPT.OPENROUTER_FALLBACK_MODELS.split(",") if x.strip()]:
         completions["extra_body"] = {"models": models}
 
 
src/llm/models.py
@@ -5,6 +5,7 @@ from openai import DefaultAsyncHttpxClient
 from pyrogram.types import Message
 
 from config import GEMINI, GPT, PREFIX, PROXY
+from llm.utils import sample_key
 from messages.parser import parse_msg
 from messages.utils import startswith_prefix
 
@@ -27,23 +28,18 @@ def get_context_type(conversations: list[Message]) -> dict:
     return res
 
 
-def get_model_id(text: str, reply_text: str, context_type: str) -> tuple[str, str]:
+def get_model_id(text: str, reply_text: str, context_type: str) -> tuple[str, str, str]:
     """Get model id based on the reply text, prefix command and context type.
 
     /gpt = OpenAI, /gemini = Gemini, /ds = DeepSeek, /qwen = Qwen, /doubao = Doubao, /grok = Grok
 
     Returns:
-        tuple[str, str]: (model_id, response_modality)
+        tuple[str, str, str]: (model_id, response_modality, sdk)
     """
     model_id = ""
-    response_modality = "text"
-
-    # parse from reply bot message. If reply to DeepSeek bot message, use DeepSeek model.
-    if reply_text.startswith(f"🤖{GPT.DEFAULT_MODEL_NAME}"):
-        model_id = GPT.DEFAULT_MODEL
-    elif reply_text.startswith(f"🤖{GPT.OMNI_MODEL_NAME}"):
-        model_id = GPT.OMNI_MODEL
-    elif reply_text.startswith(f"🤖{GPT.OPENAI_MODEL_NAME}"):
+    # Parse from reply bot message.
+    # For example, reply to DeepSeek bot message, use DeepSeek model.
+    if reply_text.startswith(f"🤖{GPT.OPENAI_MODEL_NAME}"):
         model_id = GPT.OPENAI_MODEL
     elif reply_text.startswith(f"🤖{GPT.DEEPSEEK_MODEL_NAME}"):
         model_id = GPT.DEEPSEEK_MODEL
@@ -53,15 +49,23 @@ def get_model_id(text: str, reply_text: str, context_type: str) -> tuple[str, st
         model_id = GPT.DOUBAO_MODEL
     elif reply_text.startswith(f"🤖{GPT.GROK_MODEL_NAME}"):
         model_id = GPT.GROK_MODEL
-    elif reply_text.startswith(f"🤖{GEMINI.TEXT_MODEL_NAME}"):
-        model_id = GEMINI.TEXT_MODEL
-    elif reply_text.startswith(f"🤖{GEMINI.IMG_MODEL_NAME}"):  # gemini can generate image
+    elif reply_text.startswith(f"🤖{GPT.GEMINI_MODEL_NAME}"):
+        model_id = GPT.GEMINI_MODEL
+    elif reply_text.startswith(f"🤖{GEMINI.IMG_MODEL_NAME}"):
         model_id = GEMINI.IMG_MODEL
-        response_modality = "image"
 
-    # parse from command prefix. If use /gemini command, force use Gemini model.
+    # map providers to model_ids
+    providers = {
+        "openai": GPT.OPENAI_MODEL,
+        "deepseek": GPT.DEEPSEEK_MODEL,
+        "qwen": GPT.QWEN_MODEL,
+        "doubao": GPT.DOUBAO_MODEL,
+        "grok": GPT.GROK_MODEL,
+        "gemini": GPT.GEMINI_MODEL,
+    }
+    # parse from command prefix. If use /ds command, force use DeepSeek model.
     if startswith_prefix(text, prefix=[PREFIX.GPT]):
-        model_id = GPT.DEFAULT_MODEL
+        model_id = providers.get(GPT.DEFAULT_PROVIDER.lower(), GPT.OPENAI_MODEL)
     elif startswith_prefix(text, prefix=["/gpt"]):
         model_id = GPT.OPENAI_MODEL
     elif startswith_prefix(text, prefix=["/ds"]):
@@ -74,47 +78,61 @@ def get_model_id(text: str, reply_text: str, context_type: str) -> tuple[str, st
         model_id = GPT.GROK_MODEL
     elif startswith_prefix(text, prefix=[PREFIX.GENIMG]):
         model_id = GEMINI.IMG_MODEL
-        response_modality = "image"  # /gen command is for image response.
     elif startswith_prefix(text, prefix=["/gemini"]):
-        model_id = GEMINI.TEXT_MODEL
-        response_modality = "text"  # /gemini command is for text response.
+        model_id = GPT.GEMINI_MODEL
 
     # fallback to omni model if needed
+    omni_providers = {
+        "openai": "/gpt",
+        "deepseek": "/ds",
+        "qwen": "/qwen",
+        "doubao": "/doubao",
+        "grok": "/grok",
+        "gemini": "/gemini",
+    }
+    if model_id and (model_id == GEMINI.IMG_MODEL or reply_text.startswith(f"🤖{GEMINI.IMG_MODEL_NAME}")):
+        response_modality = "image"
+        sdk = "gemini"
+    else:
+        response_modality = "text"
+        sdk = GPT.GEMINI_SDK
+
     if model_id and context_type == "text":  # no need to fallback if context type is text
-        return model_id, response_modality
+        return model_id, response_modality, sdk
     if (
-        (model_id == GPT.OPENAI_MODEL and not GPT.OPENAI_IMAGE_CAPABILITY)
-        or (model_id == GPT.DEEPSEEK_MODEL and not GPT.DEEPSEEK_IMAGE_CAPABILITY)
-        or (model_id == GPT.QWEN_MODEL and not GPT.QWEN_IMAGE_CAPABILITY)
-        or (model_id == GPT.DOUBAO_MODEL and not GPT.DOUBAO_IMAGE_CAPABILITY)
-        or (model_id == GPT.GROK_MODEL and not GPT.GROK_IMAGE_CAPABILITY)
+        (model_id == GPT.OPENAI_MODEL and not GPT.OPENAI_ACCEPT_IMAGE)
+        or (model_id == GPT.GEMINI_MODEL and not GPT.GEMINI_ACCEPT_IMAGE)
+        or (model_id == GPT.DEEPSEEK_MODEL and not GPT.DEEPSEEK_ACCEPT_IMAGE)
+        or (model_id == GPT.QWEN_MODEL and not GPT.QWEN_ACCEPT_IMAGE)
+        or (model_id == GPT.DOUBAO_MODEL and not GPT.DOUBAO_ACCEPT_IMAGE)
+        or (model_id == GPT.GROK_MODEL and not GPT.GROK_ACCEPT_IMAGE)
     ):
-        model_id = GPT.OMNI_MODEL
-        response_modality = "text"
-    return model_id, response_modality
+        prefix = omni_providers.get(GPT.OMNI_PROVIDER.lower(), "/gpt")
+        return get_model_id(prefix, reply_text, context_type)  # parse again
+
+    return model_id, response_modality, sdk
 
 
 def get_gpt_config(model_id: str = "") -> dict:
     """Get GPT configurations."""
-    model_factory = {GPT.DEFAULT_MODEL: {"api_key": GPT.DEFAULT_API_KEY, "base_url": GPT.DEFAULT_BASE_URL, "model_name": GPT.DEFAULT_MODEL_NAME}}
-    model_factory |= {
-        GPT.OPENAI_MODEL: {"api_key": GPT.OPENAI_API_KEY, "base_url": GPT.OPENAI_BASE_URL, "model_name": GPT.OPENAI_MODEL_NAME},
-        GPT.DEEPSEEK_MODEL: {"api_key": GPT.DEEPSEEK_API_KEY, "base_url": GPT.DEEPSEEK_BASE_URL, "model_name": GPT.DEEPSEEK_MODEL_NAME},
-        GPT.QWEN_MODEL: {"api_key": GPT.QWEN_API_KEY, "base_url": GPT.QWEN_BASE_URL, "model_name": GPT.QWEN_MODEL_NAME},
-        GPT.DOUBAO_MODEL: {"api_key": GPT.DOUBAO_API_KEY, "base_url": GPT.DOUBAO_BASE_URL, "model_name": GPT.DOUBAO_MODEL_NAME},
-        GPT.GROK_MODEL: {"api_key": GPT.GROK_API_KEY, "base_url": GPT.GROK_BASE_URL, "model_name": GPT.GROK_MODEL_NAME},
+    model_factory = {
+        GPT.OPENAI_MODEL: {"api_key": sample_key(GPT.OPENAI_API_KEY), "base_url": GPT.OPENAI_BASE_URL, "model_name": GPT.OPENAI_MODEL_NAME},
+        GPT.DEEPSEEK_MODEL: {"api_key": sample_key(GPT.DEEPSEEK_API_KEY), "base_url": GPT.DEEPSEEK_BASE_URL, "model_name": GPT.DEEPSEEK_MODEL_NAME},
+        GPT.QWEN_MODEL: {"api_key": sample_key(GPT.QWEN_API_KEY), "base_url": GPT.QWEN_BASE_URL, "model_name": GPT.QWEN_MODEL_NAME},
+        GPT.DOUBAO_MODEL: {"api_key": sample_key(GPT.DOUBAO_API_KEY), "base_url": GPT.DOUBAO_BASE_URL, "model_name": GPT.DOUBAO_MODEL_NAME},
+        GPT.GROK_MODEL: {"api_key": sample_key(GPT.GROK_API_KEY), "base_url": GPT.GROK_BASE_URL, "model_name": GPT.GROK_MODEL_NAME},
+        GPT.GEMINI_MODEL: {"api_key": sample_key(GPT.GEMINI_API_KEY), "base_url": GPT.GEMINI_BASE_URL, "model_name": GPT.GEMINI_MODEL_NAME},
     }
-    model_factory |= {GPT.OMNI_MODEL: {"api_key": GPT.OMNI_API_KEY, "base_url": GPT.OMNI_BASE_URL, "model_name": GPT.OMNI_MODEL_NAME}}
-    model_factory |= {GPT.SUMMARY_MODEL: {"api_key": GPT.SUMMARY_API_KEY, "base_url": GPT.SUMMARY_BASE_URL, "model_name": GPT.SUMMARY_MODEL_NAME}}
 
-    client = {
-        "timeout": round(float(GPT.TIMEOUT)),
-        "http_client": DefaultAsyncHttpxClient(proxy=PROXY.GPT),
-    }
+    client = {"http_client": DefaultAsyncHttpxClient(proxy=PROXY.GPT)}
+    if GPT.TIMEOUT is not None:
+        client |= {"timeout": int(GPT.TIMEOUT)}
 
     model_id_config = model_factory.get(model_id, {})
     model_name = model_id_config.get("model_name", "")
     model_id_config.pop("model_name", None)
     client |= model_id_config
-
-    return {"friendly_name": model_name, "client": client, "completions": {"model": model_id, "temperature": float(GPT.TEMPERATURE)}}
+    completions = {"model": model_id}
+    if GPT.TEMPERATURE is not None:
+        completions |= {"temperature": float(GPT.TEMPERATURE)}
+    return {"friendly_name": model_name, "client": client, "completions": completions}
src/llm/summary.py
@@ -7,14 +7,14 @@ from datetime import datetime, timedelta
 from zoneinfo import ZoneInfo
 
 from loguru import logger
+from openai import DefaultAsyncHttpxClient
 from pyrogram.client import Client
 from pyrogram.types import Chat, Message
 
-from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ
-from llm.models import get_gpt_config
+from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, PROXY, TID, TZ
 from llm.prompts import refine_prompts
 from llm.response import send_to_gpt
-from llm.utils import count_tokens
+from llm.utils import count_tokens, sample_key
 from messages.chat_history import get_parsed_chat_history
 from messages.parser import parse_msg
 from messages.progress import modify_progress
@@ -147,7 +147,11 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
         await modify_progress(del_status=True, **kwargs)
         return
     await modify_progress(text=f"🤖**{summary_model_name}**总结中...\n{msg}", force_update=True, **kwargs)
-    config = get_gpt_config(summary_model)
+    config = {
+        "friendly_name": summary_model_name,
+        "client": {"api_key": sample_key(GPT.SUMMARY_API_KEY), "base_url": GPT.SUMMARY_BASE_URL, "http_client": DefaultAsyncHttpxClient(proxy=PROXY.GPT)},
+        "completions": {"model": summary_model},
+    }
     config["completions"]["messages"] = contexts
     # set max_tokens for the model
     if "o1" in summary_model or "o3" in summary_model:  # o1 or newer models use `max_completion_tokens`
src/llm/tools.py
@@ -5,9 +5,8 @@ import json
 
 from glom import glom
 from loguru import logger
-from openai import AsyncOpenAI, DefaultAsyncHttpxClient
 
-from config import GPT, PROXY, TOKEN, TZ
+from config import GPT, TOKEN, TZ
 from llm.prompts import add_search_results_to_prompts, modify_prompts
 from llm.response import send_to_gpt
 from llm.tool_scheme import ONLINE_SEARCH
@@ -21,11 +20,7 @@ async def get_online_search_result(query: str) -> list[dict]:
     if GPT.PRIMARY_SEARCH_ENGINE == "google":
         results = await google_search(query)
         if not results:
-            return await glm_search(query)
-    elif GPT.PRIMARY_SEARCH_ENGINE == "glm":
-        results = await glm_search(query)
-        if not results:
-            return await google_search(query)
+            return []
     return results
 
 
@@ -38,36 +33,6 @@ async def google_search(query: str) -> list[dict]:
     return results[: int(GPT.SEARCH_NUM_RESULTS)]
 
 
-async def glm_search(query: str) -> list[dict]:
-    if not (GPT.GLM_API_KEY and GPT.GLM_BASE_URL):
-        return []
-    try:
-        client = AsyncOpenAI(
-            api_key=GPT.GLM_API_KEY,
-            base_url=GPT.GLM_BASE_URL,
-            http_client=DefaultAsyncHttpxClient(proxy=PROXY.GPT),
-        )
-        tools = [{"type": "web_search", "web_search": {"enable": True, "search_query": query, "search_result": True}}]
-        response = await client.chat.completions.create(
-            model="web-search-pro",
-            messages=[{"role": "user", "content": query}],
-            temperature=0,
-            stream=False,
-            tools=tools,  # type: ignore
-        )
-        tool_calls = glom(response.model_dump(), "choices.0.message.tool_calls", default=[]) or []
-        results = next((x["search_result"] for x in tool_calls if x.get("search_result")), [])
-        for x in results:
-            x.pop("icon", None)
-            x.pop("index", None)
-            x.pop("refer", None)
-        if results:
-            return results[: int(GPT.SEARCH_NUM_RESULTS)]
-    except Exception as e:
-        logger.error(e)
-    return []
-
-
 def add_tools(params: dict) -> dict:
     """Add tools for GPT.
 
@@ -82,7 +47,7 @@ def add_tools(params: dict) -> dict:
             }
     """
     tools = []
-    if (GPT.PRIMARY_SEARCH_ENGINE == "google" and TOKEN.GOOGLE_SEARCH_API_KEY and TOKEN.GOOGLE_SEARCH_CX) or (GPT.PRIMARY_SEARCH_ENGINE == "glm" and GPT.GLM_API_KEY and GPT.GLM_BASE_URL):
+    if GPT.PRIMARY_SEARCH_ENGINE == "google" and TOKEN.GOOGLE_SEARCH_API_KEY and TOKEN.GOOGLE_SEARCH_CX:
         tools = [ONLINE_SEARCH]
         system_prompt = f"你是一个具备网络访问能力的智能助手. 在需要时可以访问互联网进行相关搜索获取信息以确保用户得到最新、准确的帮助。当前时间是{nowdt(TZ):%Y-%m-%d %H:%M:%S} ({TZ})。"
         params["messages"] = modify_prompts(params["messages"], system_prompt, method="overwrite")
src/llm/utils.py
@@ -255,3 +255,16 @@ def shuffle_keys(keys: str | list[str]) -> str:
         return ""
     random.shuffle(keys)
     return ",".join(keys)
+
+
+def sample_key(keys: str | list[str]) -> str:
+    """Sample one key from comma speparated string."""
+    if isinstance(keys, str):
+        keys = [x.strip() for x in keys.split(",") if x.strip()]
+    elif isinstance(keys, list):
+        keys = [x.strip() for x in keys if x.strip()]
+    else:
+        return ""
+    if not keys:
+        return ""
+    return random.choice(keys)
src/config.py
@@ -147,82 +147,6 @@ class COOKIE:  # See: https://github.com/easychen/CookieCloud
     CLOUD_PASS = os.getenv("COOKIE_CLOUD_PASS", "")
 
 
-class GPT:  # see `llm/README.md`
-    # See class GEMINI for the GEMINI configurations
-    STREAM_MODE = os.getenv("GPT_STREAM_MODE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    TOOLS_MODEL = os.getenv("GPT_TOOLS_MODEL", "gpt-4o-mini")  # this model should be fast and cheap
-    # comma separated fallback models for OpenRouter (e.g. openai/gpt-4o,anthropic/claude-3.5-sonnet)
-    FALLBACK_MODELS = os.getenv("GPT_FALLBACK_MODELS", "")
-    GLM_API_KEY = os.getenv("GPT_GLM_API_KEY", "")
-    GLM_BASE_URL = os.getenv("GPT_GLM_BASE_URL", "https://open.bigmodel.cn/api/paas/v4")
-    SEARCH_NUM_RESULTS = os.getenv("GPT_SEARCH_NUM_RESULTS", "10")
-    PRIMARY_SEARCH_ENGINE = os.getenv("GPT_PRIMARY_SEARCH_ENGINE", "google")  # google or glm
-    TIMEOUT = os.getenv("GPT_TIMEOUT", "300")
-    TEMPERATURE = os.getenv("GPT_TEMPERATURE", "1.0")
-    HISTORY_CONTEXT = os.getenv("GPT_HISTORY_CONTEXT", "20")  # 最多携带多少条历史消息
-    TOOLS_API_KEY = os.getenv("GPT_TOOLS_API_KEY", "")
-    TOOLS_BASE_URL = os.getenv("GPT_TOOLS_BASE_URL", "https://api.openai.com/v1")
-    TOKEN_ENCODING = os.getenv("GPT_TOKEN_ENCODING", "o200k_base")  # https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
-    MAX_RETRY = int(os.getenv("GPT_MAX_RETRY", "2"))
-    HELICONE_API_KEY = os.getenv("HELICONE_API_KEY", "")
-    COLLAPSE_LENGTH = int(os.getenv("GEMINI_COLLAPSE_LENGTH", "500"))  # Collapse the response if the length is greater than this value
-
-    # default command (/ai).
-    # set a string contains "gemini" to switch to gemini (see class GEMINI  below for details)
-    DEFAULT_MODEL = os.getenv("GPT_DEFAULT_MODEL", "")  # model id,
-    DEFAULT_MODEL_NAME = os.getenv("GPT_DEFAULT_MODEL_NAME", "")  # custom model name
-    DEFAULT_API_KEY = os.getenv("GPT_DEFAULT_API_KEY", "")
-    DEFAULT_BASE_URL = os.getenv("GPT_DEFAULT_BASE_URL", "https://api.openai.com/v1")
-    DEFAULT_IMAGE_CAPABILITY = os.getenv("GPT_DEFAULT_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-
-    # omni model (this should be a full modality model, like gpt-4o.)
-    # Used when the contexts contain multi-modelity data (text, image), but other model can not handle it.
-    # For example, /ds command can only handle text, but the contexts contain image.
-    OMNI_MODEL = os.getenv("GPT_OMNI_MODEL", "gpt-4o")
-    OMNI_MODEL_NAME = os.getenv("GPT_OMNI_MODEL_NAME", "GPT-4o")
-    OMNI_API_KEY = os.getenv("GPT_OMNI_API_KEY", "")
-    OMNI_BASE_URL = os.getenv("GPT_OMNI_BASE_URL", "https://api.openai.com/v1")
-
-    # /gpt command
-    OPENAI_MODEL = os.getenv("GPT_OPENAI_MODEL", "")
-    OPENAI_MODEL_NAME = os.getenv("GPT_OPENAI_MODEL_NAME", "")
-    OPENAI_API_KEY = os.getenv("GPT_OPENAI_API_KEY", "")
-    OPENAI_BASE_URL = os.getenv("GPT_OPENAI_BASE_URL", "https://api.openai.com/v1")
-    OPENAI_IMAGE_CAPABILITY = os.getenv("GPT_OPENAI_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /ds command
-    DEEPSEEK_MODEL = os.getenv("GPT_DEEPSEEK_MODEL", "")
-    DEEPSEEK_MODEL_NAME = os.getenv("GPT_DEEPSEEK_MODEL_NAME", "")
-    DEEPSEEK_API_KEY = os.getenv("GPT_DEEPSEEK_API_KEY", "")
-    DEEPSEEK_BASE_URL = os.getenv("GPT_DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1")
-    DEEPSEEK_IMAGE_CAPABILITY = os.getenv("GPT_DEEPSEEK_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /qwen command
-    QWEN_MODEL = os.getenv("GPT_QWEN_MODEL", "")
-    QWEN_MODEL_NAME = os.getenv("GPT_QWEN_MODEL_NAME", "")
-    QWEN_API_KEY = os.getenv("GPT_QWEN_API_KEY", "")
-    QWEN_BASE_URL = os.getenv("GPT_QWEN_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
-    QWEN_IMAGE_CAPABILITY = os.getenv("GPT_QWEN_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /grok command
-    GROK_MODEL = os.getenv("GPT_GROK_MODEL", "")
-    GROK_MODEL_NAME = os.getenv("GPT_GROK_MODEL_NAME", "")
-    GROK_API_KEY = os.getenv("GPT_GROK_API_KEY", "")
-    GROK_BASE_URL = os.getenv("GPT_GROK_BASE_URL", "https://api.x.ai/v1")
-    GROK_IMAGE_CAPABILITY = os.getenv("GPT_GROK_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /doubao command
-    DOUBAO_MODEL = os.getenv("GPT_DOUBAO_MODEL", "")
-    DOUBAO_MODEL_NAME = os.getenv("GPT_DOUBAO_MODEL_NAME", "")
-    DOUBAO_API_KEY = os.getenv("GPT_DOUBAO_API_KEY", "")
-    DOUBAO_BASE_URL = os.getenv("GPT_DOUBAO_BASE_URL", "https://ark.cn-beijing.volces.com/api/v3")
-    DOUBAO_IMAGE_CAPABILITY = os.getenv("GPT_DOUBAO_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /summary command
-    SUMMARY_MODEL = os.getenv("GPT_SUMMARY_MODEL", "")
-    SUMMARY_MODEL_NAME = os.getenv("GPT_SUMMARY_MODEL_NAME", "")
-    SUMMARY_MODEL_MAX_OUTPUT_LENGTH = os.getenv("GPT_SUMMARY_MODEL_MAX_OUTPUT_LENGTH", "8192")  # 8K
-    SUMMARY_API_KEY = os.getenv("GPT_SUMMARY_API_KEY", "")
-    SUMMARY_BASE_URL = os.getenv("GPT_SUMMARY_BASE_URL", "https://api.openai.com/v1")
-    SUMMARY_TIMEOUT = os.getenv("GPT_SUMMARY_TIMEOUT", "600")  # should be larger than default timeout
-    SUMMARY_IMAGE_CAPABILITY = os.getenv("GPT_SUMMARY_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-
-
 class TID:  # see more TID usecase in `src/permission.py`
     ADMIN = os.getenv("TID_ADMIN", "")
     # back up ytdlp audio if the user does not request it
@@ -272,17 +196,97 @@ class ASR:
     DEEPGRAM_API = os.getenv("ASR_DEEPGRAM_API", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
 
 
+class GPT:
+    """This is for OpenAI compatible API.
+
+    See class GEMINI for the GEMINI configurations
+    """
+
+    PRIMARY_SEARCH_ENGINE = os.getenv("GPT_PRIMARY_SEARCH_ENGINE", "google")  # currently, only `google` is supported
+    SEARCH_NUM_RESULTS = os.getenv("GPT_SEARCH_NUM_RESULTS", "10")
+    TIMEOUT = os.getenv("GPT_TIMEOUT", None)  # timeout in seconds
+    TEMPERATURE = os.getenv("GPT_TEMPERATURE", None)
+    HISTORY_CONTEXT = os.getenv("GPT_HISTORY_CONTEXT", "20")  # Max number of history messages as context
+    TOKEN_ENCODING = os.getenv("GPT_TOKEN_ENCODING", "o200k_base")  # https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
+    MAX_RETRY = int(os.getenv("GPT_MAX_RETRY", "2"))
+    HELICONE_API_KEY = os.getenv("HELICONE_API_KEY", "")  # https://docs.helicone.ai/getting-started/integration-method/gateway
+    COLLAPSE_LENGTH = int(os.getenv("GEMINI_COLLAPSE_LENGTH", "500"))  # Collapse the response if the length is larger than this value
+    # comma separated fallback models for OpenRouter (e.g. openai/gpt-4o,anthropic/claude-3.5-sonnet)
+    OPENROUTER_FALLBACK_MODELS = os.getenv("GPT_OPENROUTER_FALLBACK_MODELS", "")
+
+    # default command (/ai).
+    # set a string contains "gemini" to switch to gemini (see class GEMINI  below for details)
+    DEFAULT_PROVIDER = os.getenv("GPT_DEFAULT_PROVIDER", "openai")
+    # omni provider (this should be a multi-modality model, like gpt-4o.)
+    # Used when the contexts contain multi-modelity data (text, image), but other model can not handle it.
+    # For example, /ds command can only handle text, but the contexts contains images.
+    OMNI_PROVIDER = os.getenv("GPT_OMNI_PROVIDER", "openai")
+
+    # /gpt command
+    OPENAI_MODEL = os.getenv("GPT_OPENAI_MODEL", "")
+    OPENAI_MODEL_NAME = os.getenv("GPT_OPENAI_MODEL_NAME", "")
+    OPENAI_API_KEY = os.getenv("GPT_OPENAI_API_KEY", "")
+    OPENAI_BASE_URL = os.getenv("GPT_OPENAI_BASE_URL", "https://api.openai.com/v1")
+    OPENAI_ACCEPT_IMAGE = os.getenv("GPT_OPENAI_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    # /gemini command
+    GEMINI_SDK = os.getenv("GEMINI_SDK", "openai")  # "openai" or "gemini". If set to "gemini", see class GEMINI below for details
+    GEMINI_MODEL = os.getenv("GEMINI_TEXT_MODEL", "")
+    GEMINI_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "")
+    GEMINI_API_KEY = os.getenv("GPT_GEMINI_API_KEY", "")
+    GEMINI_BASE_URL = os.getenv("GPT_GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai")
+    GEMINI_ACCEPT_IMAGE = os.getenv("GPT_GEMINI_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    # /ds command
+    DEEPSEEK_MODEL = os.getenv("GPT_DEEPSEEK_MODEL", "")
+    DEEPSEEK_MODEL_NAME = os.getenv("GPT_DEEPSEEK_MODEL_NAME", "")
+    DEEPSEEK_API_KEY = os.getenv("GPT_DEEPSEEK_API_KEY", "")
+    DEEPSEEK_BASE_URL = os.getenv("GPT_DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1")
+    DEEPSEEK_ACCEPT_IMAGE = os.getenv("GPT_DEEPSEEK_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    # /qwen command
+    QWEN_MODEL = os.getenv("GPT_QWEN_MODEL", "")
+    QWEN_MODEL_NAME = os.getenv("GPT_QWEN_MODEL_NAME", "")
+    QWEN_API_KEY = os.getenv("GPT_QWEN_API_KEY", "")
+    QWEN_BASE_URL = os.getenv("GPT_QWEN_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+    QWEN_ACCEPT_IMAGE = os.getenv("GPT_QWEN_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    # /grok command
+    GROK_MODEL = os.getenv("GPT_GROK_MODEL", "")
+    GROK_MODEL_NAME = os.getenv("GPT_GROK_MODEL_NAME", "")
+    GROK_API_KEY = os.getenv("GPT_GROK_API_KEY", "")
+    GROK_BASE_URL = os.getenv("GPT_GROK_BASE_URL", "https://api.x.ai/v1")
+    GROK_ACCEPT_IMAGE = os.getenv("GPT_GROK_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    # /doubao command
+    DOUBAO_MODEL = os.getenv("GPT_DOUBAO_MODEL", "")
+    DOUBAO_MODEL_NAME = os.getenv("GPT_DOUBAO_MODEL_NAME", "")
+    DOUBAO_API_KEY = os.getenv("GPT_DOUBAO_API_KEY", "")
+    DOUBAO_BASE_URL = os.getenv("GPT_DOUBAO_BASE_URL", "https://ark.cn-beijing.volces.com/api/v3")
+    DOUBAO_ACCEPT_IMAGE = os.getenv("GPT_DOUBAO_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+
+    # /summary command
+    SUMMARY_MODEL = os.getenv("GPT_SUMMARY_MODEL", "")
+    SUMMARY_MODEL_NAME = os.getenv("GPT_SUMMARY_MODEL_NAME", "")
+    SUMMARY_MODEL_MAX_OUTPUT_LENGTH = os.getenv("GPT_SUMMARY_MODEL_MAX_OUTPUT_LENGTH", "8192")  # 8K
+    SUMMARY_API_KEY = os.getenv("GPT_SUMMARY_API_KEY", "")
+    SUMMARY_BASE_URL = os.getenv("GPT_SUMMARY_BASE_URL", "https://api.openai.com/v1")
+    SUMMARY_TIMEOUT = os.getenv("GPT_SUMMARY_TIMEOUT", "600")  # should be larger than default timeout
+    SUMMARY_ACCEPT_IMAGE = os.getenv("GPT_SUMMARY_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+
+    # For tool_call. Some models doesn't support tool call, so we use this model to do the tool_call first.
+    # Then construct the new questions for the original model.
+    TOOLS_MODEL = os.getenv("GPT_TOOLS_MODEL", "gpt-4o-mini")  # this model should be fast and cheap
+    TOOLS_BASE_URL = os.getenv("GPT_TOOLS_BASE_URL", "https://api.openai.com/v1")
+    TOOLS_API_KEY = os.getenv("GPT_TOOLS_API_KEY", "")
+
+
 class GEMINI:  # Official Gemini
     # https://ai.google.dev/gemini-api/docs/image-generation
     BASR_URL = os.getenv("GEMINI_BASR_URL", "https://generativelanguage.googleapis.com/")
-    API_KEYS = os.getenv("GEMINI_API_KEYS", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
+    API_KEY = os.getenv("GEMINI_API_KEY", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
     PROXY = os.getenv("GEMINI_PROXY", None)
     PREFER_LANG = os.getenv("GEMINI_PREFER_LANG", "")  # Set a prefer response language for Gemini
     MAX_THINKING_BUDGET = int(os.getenv("GEMINI_MAX_THINKING_BUDGET", "24576"))  # 24K
 
     # response modality: text
-    TEXT_MODEL = os.getenv("GEMINI_TEXT_MODEL", "gemini-2.5-pro-exp-03-25")
-    TEXT_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "Gemini-2.5-Pro")
+    TEXT_MODEL = os.getenv("GEMINI_TEXT_MODEL", "gemini-2.5-flash-preview-04-17")
+    TEXT_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "Gemini-2.5-Flash")
     TEXT_THINKING_BUDGET = os.getenv("GEMINI_TEXT_THINKING_BUDGET", None)  # 0 to disable thinking. DO NOT set this if the model is not a thinking model
     TEXT_CONFIG = os.getenv("GEMINI_TEXT_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'