Commit 070f8aa

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-12-07 07:42:56
chore(gemini): remove max token limit
1 parent 1f35f42
Changed files (3)
src/llm/gemini/chat.py
@@ -130,12 +130,6 @@ async def gemini_stream(
         if "conversations" in params:  # convert conversations to contents
             params["contents"] = await get_conversation_contexts(client, params["conversations"], model_id=params["model"], ctx_format="gemini", app=app)
         gemini_logging(params["contents"])
-        tokens = await app.aio.models.count_tokens(model=params["model"], contents=params["contents"])  # type: ignore
-        num_tokens = tokens.total_tokens or 0
-        if num_tokens > GEMINI.TEXT_MAX_TOKEN:
-            logger.warning(f"[Gemini] Content is too long: {num_tokens} tokens, fallback to {GEMINI.TEXT_TOKENS_FALLBACK_MODEL}")
-            params["model"] = GEMINI.TEXT_TOKENS_FALLBACK_MODEL
-            params["config"].thinking_config = None
         is_reasoning = False
         is_reasoning_conversation = None  # to indicate whether it is a reasoning conversation
         genai_params = {"model": params["model"], "contents": params["contents"], "config": params["config"]}
src/llm/utils.py
@@ -42,7 +42,7 @@ def enabled_providers() -> tuple[list[str], list[str]]:
         text_providers.append("doubao")
     if all([GPT.KIMI_MODEL, GPT.KIMI_MODEL_NAME, GPT.KIMI_API_KEY, GPT.KIMI_BASE_URL]):
         text_providers.append("kimi")
-    if all([GEMINI.API_KEY, GEMINI.BASE_URL, GEMINI.TEXT_MODEL, GEMINI.TEXT_MODEL_NAME, GEMINI.TEXT_TOKENS_FALLBACK_MODEL]):
+    if all([GEMINI.API_KEY, GEMINI.BASE_URL, GEMINI.TEXT_MODEL, GEMINI.TEXT_MODEL_NAME]):
         text_providers.append("gemini")
 
     img_providers = []
src/config.py
@@ -466,8 +466,6 @@ class GEMINI:  # Official Gemini
     TEXT_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "Gemini-2.5-Pro")
     TEXT_THINKING_BUDGET = os.getenv("GEMINI_TEXT_THINKING_BUDGET", None)  # 0 to disable thinking. DO NOT set this if the model is not a thinking model
     TEXT_CONFIG = os.getenv("GEMINI_TEXT_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
-    TEXT_MAX_TOKEN = int(os.getenv("GEMINI_TEXT_MAX_TOKEN", "250000"))  # 250K
-    TEXT_TOKENS_FALLBACK_MODEL = os.getenv("GEMINI_TEXT_TOKENS_FALLBACK_MODEL", "gemini-2.0-flash")  # model id when the token count is larger than GEMINI.TEXT_MAX_TOKEN
 
     # ASR related
     ASR_MAX_DURATION = int(os.getenv("GEMINI_ASR_MAX_DURATION", "34200"))  # 9.5 hour