Commit d385bc8

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-04-28 09:09:00
chore(summary): remove long context model
Gemini supports 1M context length, so we don't need to use long context model anymore.
1 parent cc41967
Changed files (3)
src/llm/models.py
@@ -110,7 +110,6 @@ def get_gpt_config(model_type: str, contexts: list[dict], force_model: str = "")
         GPT.GROK_MODEL: {"api_key": GPT.GROK_API_KEY, "base_url": GPT.GROK_BASE_URL, "model_name": GPT.GROK_MODEL_NAME},
     }
     model_factory |= {GPT.SUMMARY_MODEL: {"api_key": GPT.SUMMARY_API_KEY, "base_url": GPT.SUMMARY_BASE_URL, "model_name": GPT.SUMMARY_MODEL_NAME}}
-    model_factory |= {GPT.LONG_MODEL: {"api_key": GPT.LONG_API_KEY, "base_url": GPT.LONG_BASE_URL, "model_name": GPT.LONG_MODEL_NAME}}
     force_model_config = model_factory.get(force_model, {})
 
     force_model_name = force_model_config.get("model_name", model_name)
@@ -125,7 +124,6 @@ def get_gpt_config(model_type: str, contexts: list[dict], force_model: str = "")
             or (force_model == GPT.QWEN_MODEL and GPT.QWEN_IMAGE_CAPABILITY)
             or (force_model == GPT.DOUBAO_MODEL and GPT.DOUBAO_IMAGE_CAPABILITY)
             or (force_model == GPT.SUMMARY_MODEL and GPT.SUMMARY_IMAGE_CAPABILITY)
-            or (force_model == GPT.LONG_MODEL and GPT.LONG_IMAGE_CAPABILITY)
             or (force_model == GPT.GROK_MODEL and GPT.GROK_IMAGE_CAPABILITY)
         )
     ):
src/llm/summary.py
@@ -133,14 +133,9 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
     sysmtem_tokens = count_tokens(contexts[0]["content"])
     user_tokens = count_tokens(contexts[-1]["content"])
     total_tokens = sysmtem_tokens + user_tokens
-    if total_tokens < int(GPT.SUMMARY_MODEL_MAX_INPUT_LENGTH):
-        summary_model = GPT.SUMMARY_MODEL
-        summary_model_name = GPT.SUMMARY_MODEL_NAME
-        max_tokens = int(GPT.SUMMARY_MODEL_MAX_OUTPUT_LENGTH)
-    else:
-        summary_model = GPT.LONG_MODEL
-        summary_model_name = GPT.LONG_MODEL_NAME
-        max_tokens = int(GPT.LONG_MODEL_MAX_OUTPUT_LENGTH)
+    summary_model = GPT.SUMMARY_MODEL
+    summary_model_name = GPT.SUMMARY_MODEL_NAME
+    max_tokens = int(GPT.SUMMARY_MODEL_MAX_OUTPUT_LENGTH)
     msg = f"⏩开始时间: {parsed['begin_time']:%m-%d %H:%M:%S}\n"
     msg += f"⏯️结束时间: {parsed['end_time']:%m-%d %H:%M:%S}\n"
     msg += f"🔢消息条数: {len(parsed['user_context'])}\n"
src/config.py
@@ -215,20 +215,11 @@ class GPT:  # see `llm/README.md`
     # /summary command
     SUMMARY_MODEL = os.getenv("GPT_SUMMARY_MODEL", "gpt-4o")
     SUMMARY_MODEL_NAME = os.getenv("GPT_SUMMARY_MODEL_NAME", "GPT-4o")
-    SUMMARY_MODEL_MAX_INPUT_LENGTH = os.getenv("GPT_SUMMARY_MODEL_MAX_INPUT_LENGTH", "57344")  # 56K
     SUMMARY_MODEL_MAX_OUTPUT_LENGTH = os.getenv("GPT_SUMMARY_MODEL_MAX_OUTPUT_LENGTH", "8192")  # 8K
     SUMMARY_API_KEY = os.getenv("GPT_SUMMARY_API_KEY", "")
     SUMMARY_BASE_URL = os.getenv("GPT_SUMMARY_BASE_URL", "https://api.openai.com/v1")
     SUMMARY_TIMEOUT = os.getenv("GPT_SUMMARY_TIMEOUT", "600")  # should be larger than default timeout
     SUMMARY_IMAGE_CAPABILITY = os.getenv("GPT_SUMMARY_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # long context model
-    LONG_MODEL = os.getenv("GPT_LONG_MODEL", "gemini-1.5-pro")
-    LONG_MODEL_NAME = os.getenv("GPT_LONG_MODEL_NAME", "Gemini-1.5-Pro")
-    LONG_MODEL_MAX_INPUT_LENGTH = os.getenv("GPT_LONG_MODEL_MAX_INPUT_LENGTH", "2097152")  # 2M
-    LONG_MODEL_MAX_OUTPUT_LENGTH = os.getenv("GPT_LONG_MODEL_MAX_OUTPUT_LENGTH", "8192")  # 8K
-    LONG_API_KEY = os.getenv("GPT_LONG_API_KEY", "")
-    LONG_BASE_URL = os.getenv("GPT_LONG_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai")
-    LONG_IMAGE_CAPABILITY = os.getenv("GPT_LONG_IMAGE_CAPABILITY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
 
 
 class TID:  # see more TID usecase in `src/permission.py`