Commit 070f8aa
src/llm/gemini/chat.py
@@ -130,12 +130,6 @@ async def gemini_stream(
if "conversations" in params: # convert conversations to contents
params["contents"] = await get_conversation_contexts(client, params["conversations"], model_id=params["model"], ctx_format="gemini", app=app)
gemini_logging(params["contents"])
- tokens = await app.aio.models.count_tokens(model=params["model"], contents=params["contents"]) # type: ignore
- num_tokens = tokens.total_tokens or 0
- if num_tokens > GEMINI.TEXT_MAX_TOKEN:
- logger.warning(f"[Gemini] Content is too long: {num_tokens} tokens, fallback to {GEMINI.TEXT_TOKENS_FALLBACK_MODEL}")
- params["model"] = GEMINI.TEXT_TOKENS_FALLBACK_MODEL
- params["config"].thinking_config = None
is_reasoning = False
is_reasoning_conversation = None # to indicate whether it is a reasoning conversation
genai_params = {"model": params["model"], "contents": params["contents"], "config": params["config"]}
src/llm/utils.py
@@ -42,7 +42,7 @@ def enabled_providers() -> tuple[list[str], list[str]]:
text_providers.append("doubao")
if all([GPT.KIMI_MODEL, GPT.KIMI_MODEL_NAME, GPT.KIMI_API_KEY, GPT.KIMI_BASE_URL]):
text_providers.append("kimi")
- if all([GEMINI.API_KEY, GEMINI.BASE_URL, GEMINI.TEXT_MODEL, GEMINI.TEXT_MODEL_NAME, GEMINI.TEXT_TOKENS_FALLBACK_MODEL]):
+ if all([GEMINI.API_KEY, GEMINI.BASE_URL, GEMINI.TEXT_MODEL, GEMINI.TEXT_MODEL_NAME]):
text_providers.append("gemini")
img_providers = []
src/config.py
@@ -466,8 +466,6 @@ class GEMINI: # Official Gemini
TEXT_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "Gemini-2.5-Pro")
TEXT_THINKING_BUDGET = os.getenv("GEMINI_TEXT_THINKING_BUDGET", None) # 0 to disable thinking. DO NOT set this if the model is not a thinking model
TEXT_CONFIG = os.getenv("GEMINI_TEXT_CONFIG", "{}") # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
- TEXT_MAX_TOKEN = int(os.getenv("GEMINI_TEXT_MAX_TOKEN", "250000")) # 250K
- TEXT_TOKENS_FALLBACK_MODEL = os.getenv("GEMINI_TEXT_TOKENS_FALLBACK_MODEL", "gemini-2.0-flash") # model id when the token count is larger than GEMINI.TEXT_MAX_TOKEN
# ASR related
ASR_MAX_DURATION = int(os.getenv("GEMINI_ASR_MAX_DURATION", "34200")) # 9.5 hour