Commit c18bdaf

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-22 03:28:31
feat(gemini): support Gemini `URL context` tool
1 parent c00ec2a
Changed files (2)
src/asr/gemini_asr.py
@@ -7,7 +7,7 @@ from pathlib import Path
 
 from glom import glom
 from google import genai
-from google.genai.types import GenerateContentConfig, GoogleSearch, HttpOptions, ThinkingConfig, Tool, UploadFileConfig
+from google.genai.types import GenerateContentConfig, GoogleSearch, HttpOptions, ThinkingConfig, Tool, UploadFileConfig, UrlContext
 from loguru import logger
 from pydantic import BaseModel
 from pyrogram.client import Client
@@ -91,7 +91,7 @@ Notes:
                 thinking_budget = min(round(float(GEMINI.ASR_THINKING_BUDGET)), GEMINI.MAX_THINKING_BUDGET)
                 genconfig |= {"thinking_config": ThinkingConfig(include_thoughts=False, thinking_budget=thinking_budget)}
             if GEMINI.ASR_USE_GROUNDING:
-                genconfig |= {"tools": [Tool(google_search=GoogleSearch())]}
+                genconfig |= {"tools": [Tool(url_context=UrlContext()), Tool(google_search=GoogleSearch())]}
             contents = [prompt, uploaded_audio] if prompt else [uploaded_audio]
             params = {"model": model_id, "contents": contents, "config": GenerateContentConfig(**genconfig)}
             res = await gemini_stream(
src/llm/gemini.py
@@ -7,7 +7,7 @@ from pathlib import Path
 
 from glom import glom
 from google import genai
-from google.genai.types import ContentUnionDict, GenerateContentConfig, GoogleSearch, HttpOptions, Part, ThinkingConfig, Tool
+from google.genai.types import ContentUnionDict, GenerateContentConfig, GoogleSearch, HttpOptions, Part, ThinkingConfig, Tool, UrlContext
 from loguru import logger
 from PIL import Image
 from pyrogram.client import Client
@@ -61,7 +61,7 @@ async def gemini_response(client: Client, message: Message, conversations: list[
         await send2tg(client, message, texts="⚠️**未配置Gemini API, 请尝试其他模型", **kwargs)
     response_modalities = ["TEXT", "IMAGE"] if modality == "image" else ["TEXT"]
     thinking_budget = GEMINI.IMG_THINKING_BUDGET if modality == "image" else GEMINI.TEXT_THINKING_BUDGET
-    tools = [Tool(google_search=GoogleSearch())] if modality == "text" else None
+    tools = [Tool(url_context=UrlContext()), Tool(google_search=GoogleSearch())] if modality == "text" else None
     # parse config from environment variable
     genconfig = {}
     with contextlib.suppress(Exception):