Commit 79e4e9c

benny-dou <60535774+benny-dou@users.noreply.github.com>
2026-01-16 07:27:25
refactor(ai): reorganize AI functions
1 parent c6764da
src/ai/images/models.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+
+from loguru import logger
+from pyrogram.types import Message
+
+from config import AI, PREFIX
+from database.kv import get_cf_kv
+
+
+async def get_image_model_configs(message: Message) -> list[dict]:
+    r"""Get model configs based on the message.
+
+    Model config is retrieved from CF-KV with key: {AI.IMG_MODEL_CONFIG_KEY}
+
+    A sample config:
+    {
+    "docs": "🌠AI生图: `/gen` 后接提示词即可生成\n\n⚙️模型配置:\n/gen: 默认模型 (Seedream)\n/z: Z-Image\n/qwi: Qwen-Image\n/sd: Seedream-4.5\n/flux: Flux",
+    "seedream":
+        [
+            {
+                "model_id": "doubao-seedream-4-5-251128",
+                "model_name": "Seedream-4.5",
+                "api_type": "openai",
+                "api_keys": "key1,key2,key3,...",
+                "support_reference_images": true,
+                "client_config": { "base_url": "https://ark.cn-beijing.volces.com/api/v3" },
+                "generate_config": {
+                    "size": "4K",
+                    "stream": false,
+                    "response_format": "url",
+                    "extra_body": {
+                        "watermark": false,
+                        "sequential_image_generation": "auto",
+                        "sequential_image_generation_options": { "max_images": 4 },
+                        "optimize_prompt_options": { "mode": "standard" }
+                    }
+                }
+            }
+        ],
+
+    "zimage":
+        [
+            {
+                "model_id": "z-image-turbo",
+                "model_name": "Z-Image",
+                "api_type": "openai",
+                "api_keys": "openai-key-1,openai-key-2,openai-key-3,...",
+                "client_config": { "base_url": "https://api.openai.com/v1" },
+                "generate_config": { "response_format": "b64_json" }
+            },
+            {
+                "model_name": "Z-Image",
+                "api_type": "post",
+                "base_url": "https://dashscope.aliyuncs.com",
+                "api_paths": {
+                    "img_gen": "/api/v1/services/aigc/multimodal-generation/generation"
+                },
+                "headers": {
+                    "Authorization": "Bearer DASHSCOPE_API_KEY",
+                    "Content-Type": "application/json"
+                },
+                "body": {
+                    "model": "z-image-turbo",
+                    "input": {"messages":[{"role":"user","content":[{"text":"%PROMPT%"}]}]},
+                    "parameters": {
+                        "prompt_extend": true,
+                        "size": "2048*1152"
+                    }
+                }
+            },
+            {
+                "model_name": "Z-Image",
+                "api_type": "post",
+                "base_url": "https://api-inference.modelscope.cn",
+                "api_paths": {
+                    "img_gen": "/v1/images/generations",
+                    "task_check": "/v1/tasks/%TASK_ID%"
+                },
+                "headers": {
+                    "Authorization": "Bearer MODELSCOPE_API_KEY",
+                    "Content-Type": "application/json",
+                    "X-ModelScope-Async-Mode": "true"
+                },
+                "body": {
+                    "model": "Tongyi-MAI/Z-Image-Turbo",
+                    "prompt": "%PROMPT%"
+                }
+            }
+        ],
+    "qwen-image":
+        [
+            {
+                "model_name": "Qwen-Image-2512",
+                "api_type": "post",
+                "base_url": "https://api-inference.modelscope.cn",
+                "api_paths": {
+                    "img_gen": "/v1/images/generations",
+                    "task_check": "/v1/tasks/%TASK_ID%"
+                },
+                "headers": {
+                    "Authorization": "Bearer MODELSCOPE_API_KEY",
+                    "Content-Type": "application/json",
+                    "X-ModelScope-Async-Mode": "true"
+                },
+                "body": {
+                    "model": "Qwen/Qwen-Image-2512",
+                    "prompt": "%PROMPT%"
+                }
+            },
+            {
+                "model_name": "Qwen-Image",
+                "api_type": "post",
+                "base_url": "https://dashscope.aliyuncs.com",
+                "api_paths": {
+                    "img_gen": "/api/v1/services/aigc/multimodal-generation/generation"
+                },
+                "headers": {
+                    "Authorization": "Bearer DASHSCOPE_API_KEY",
+                    "Content-Type": "application/json"
+                },
+                "body": {
+                    "model": "qwen-image",
+                    "input": { "messages": [{"role":"user","content": [{"text":"%PROMPT%"}]}]},
+                    "parameters": { "watermark": false }
+                }
+            }
+        ]
+    }
+
+    Suppose this message is:
+        Message(text="/gen A cute cat") -> use `default` as model_alias
+        Message(text="/gen @seedream hello") -> use `seedream` as model_alias
+
+    Returns: list of model config
+        [
+            {
+                "model_id": "doubao-seedream-4-5-251128",
+                "model_name": "Seedream-4.5",
+                "api_type": "openai",
+                "api_keys": "key1,key2,key3,...",
+                "client_config": { "base_url": "https://ark.cn-beijing.volces.com/api/v3" },
+                "generate_config": {
+                    "size": "4K",
+                    "stream": false,
+                    "response_format": "url",
+                    "extra_body": {
+                        "watermark": false,
+                        "sequential_image_generation": "auto",
+                        "sequential_image_generation_options": { "max_images": 4 },
+                        "optimize_prompt_options": { "mode": "standard" }
+                    }
+                }
+            }
+        ]
+    """
+    texts = str(message.content).strip()
+    if matched := re.match(rf"^{PREFIX.AI_IMG_GENERATION}\s+@([a-zA-Z0-9_\-\.]+)(\s+)?", texts):  # match /ai @custom_model_id
+        model_alias = matched.group(1).strip()
+        return await get_config_by_model_alias(model_alias)
+    return await get_config_by_model_alias(AI.IMG_GENERATION_DEFAULT_MODEL)
+
+
+async def get_config_by_model_alias(model_alias: str) -> list[dict]:
+    """Get model config by model_alias.
+
+    Returns:
+        model_config
+    """
+    kv = await get_cf_kv(AI.IMG_MODEL_CONFIG_KEY, cache_ttl=600, silent=True)
+
+    custom_config = kv.get(model_alias, [])
+    if not custom_config:
+        logger.warning(f"Model `{model_alias}` is not configured in KV, using default config")
+        default_config = kv.get(AI.IMG_GENERATION_DEFAULT_MODEL, [])
+        if not default_config:
+            logger.warning(f"CF-KV key `{AI.IMG_MODEL_CONFIG_KEY}` does not has default `{AI.IMG_GENERATION_DEFAULT_MODEL}` field")
+        return default_config
+    return custom_config
src/ai/images/openai_img.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import base64
+from pathlib import Path
+
+import anyio
+from glom import glom
+from loguru import logger
+from openai import AsyncOpenAI, DefaultAsyncHttpxClient
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from ai.texts.contexts import base64_media
+from ai.utils import EMOJI_IMG_BOT, clean_cmd_prefix, literal_eval, prettify, trim_none
+from config import DOWNLOAD_DIR, PROXY
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import delete_message
+from networking import download_file
+from utils import rand_string, strings_list
+
+
+async def openai_image_generation(
+    client: Client,
+    message: Message,
+    *,
+    model_id: str = "",
+    model_name: str = "",
+    api_keys: str = "",
+    client_config: str | dict = "",
+    generate_config: str | dict = "",
+    proxy: str | None = PROXY.OPENAI,
+    support_reference_images: bool = False,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Image Generation."""
+    status_msg = kwargs.get("progress") or await message.reply(f"{EMOJI_IMG_BOT}**{model_name}**:\n正在生成图像...", quote=True)
+    try:
+        openai_client = {}
+        if literal_eval(client_config):
+            openai_client |= literal_eval(client_config)
+        if proxy:
+            openai_client |= {"http_client": DefaultAsyncHttpxClient(proxy=proxy)}
+        prompt, reference_images = await get_openai_image_contexts(client, message, support_reference_images=support_reference_images)
+        if not prompt:
+            await modify_progress(status_msg, text=f"❌**{model_name}**:\n请提供提示词", force_update=True, **kwargs)
+            return {}
+        params = {}
+        if literal_eval(generate_config):
+            params |= literal_eval(generate_config)
+        if reference_images:
+            params["extra_body"] = params.get("extra_body", {}) | {"image": reference_images}
+        params |= {"model": model_id, "prompt": prompt}
+        logger.debug(f"openai.images.generate(**{prettify(params)})")
+    except Exception as e:
+        logger.error(f"OpenAI client setup error: {e}")
+        return {}
+    resp = {}
+    for api_key in strings_list(api_keys, shuffle=True):
+        try:
+            openai_client["api_key"] = api_key
+            logger.trace(f"AsyncOpenAI(**{openai_client})")
+            openai = AsyncOpenAI(**openai_client)
+            resp = await openai.images.generate(**params)
+            resp = trim_none(resp.model_dump())
+            if images := await download_generated_images(resp, proxy=proxy):
+                resp.pop("data", None)
+                caption = f"{EMOJI_IMG_BOT}**{model_name}**\n{prettify(resp)}\n"
+                for idx, img in enumerate([x for x in images if x.get("url")]):
+                    caption += f"[P{idx + 1}原图]({img['url']})"
+                await modify_progress(status_msg, text=prettify(resp), force_update=True, **kwargs)
+                await send2tg(client, message, texts=caption, media=[{"photo": img["path"]} for img in images], **kwargs)
+                await delete_message(status_msg)
+                return {}
+        except Exception as e:
+            logger.error(f"OpenAI Image Generation error: {e}\n\n{prettify(resp)}")
+            await modify_progress(status_msg, text=f"❌{e}\n\n{prettify(resp)}", force_update=True, **kwargs)
+    return {}
+
+
+async def get_openai_image_contexts(client: Client, message: Message, *, support_reference_images: bool = False) -> tuple[str, list[str]]:
+    """Generate OpenAI image generation contexts.
+
+    Returns:
+        tuple: prompt, list_of_images
+    """
+    if not support_reference_images:
+        return clean_cmd_prefix(message.content), []
+    messages = [message]
+    while message.reply_to_message:
+        message = message.reply_to_message
+        messages.append(message)
+    messages.reverse()  # old to new
+    images = []
+    prompt = ""
+    for m in messages:
+        group_messages = await client.get_media_group(m.chat.id, m.id) if m.media_group_id else [m]
+        for msg in group_messages:
+            prompt = clean_cmd_prefix(msg.content)
+            if not msg.photo:
+                continue
+            res = await base64_media(client, msg)
+            images.append(f"data:image/{res['ext']};base64,{res['base64']}")
+    return prompt, images
+
+
+async def download_generated_images(response: dict, proxy: str | None = None) -> list[dict]:
+    """Download generated images.
+
+    Response: {
+        "model": "doubao-seedream-4-5-251128",
+        "created": 1757321139,
+        "data": [
+            {
+                "url": "https://...",
+                "size": "3104x1312"
+            },
+            {
+                "b64_json": "/9j/4AAQSkZJRgABA...",
+                "size": "3104x1312"
+            }
+        ],
+        "usage": {
+            "generated_images": 2,
+            "output_tokens": xxx,
+            "total_tokens": xxx
+        }
+    }
+
+    Return:
+    [
+        {
+            "path": "/path/to/image.png"
+            "url": "https://...",
+        }
+    ]
+    """
+    results = []
+    data = glom(response, "data", default=[]) or []
+    for item in data:
+        if url := item.get("url"):
+            img_path = await download_file(url, proxy=proxy)
+            if Path(img_path).is_file():
+                results.append({"path": img_path, "url": url})
+        if b64_json := item.get("b64_json"):
+            image_bytes = base64.b64decode(b64_json)
+            save_path = Path(DOWNLOAD_DIR) / f"{rand_string(10)}.png"
+            async with await anyio.open_file(save_path, "wb") as f:
+                await f.write(image_bytes)
+            results.append({"path": save_path.as_posix()})
+    return results
src/ai/images/post.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+import base64
+import json
+from pathlib import Path
+
+import anyio
+from glom import glom
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from ai.texts.contexts import base64_media
+from ai.utils import EMOJI_IMG_BOT, clean_cmd_prefix, prettify
+from config import DOWNLOAD_DIR, PROXY
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import delete_message
+from networking import download_file, hx_req
+from utils import rand_string
+
+
+async def http_post_image_generation(
+    client: Client,
+    message: Message,
+    *,
+    base_url: str = "",
+    model_name: str = "",
+    api_paths: dict | None = None,
+    headers: dict | None = None,
+    body: dict | None = None,
+    extra_params: dict | None = None,
+    proxy: str | None = PROXY.AI_POST,
+    support_reference_images: bool = False,
+    **kwargs,
+) -> dict:
+    """Get HTTP Post Image Generation."""
+    status_msg = kwargs.get("progress") or await message.reply(f"{EMOJI_IMG_BOT}**{model_name}**:\n正在生成图像...", quote=True)
+    try:
+        prompt, reference_images = await get_image_contexts(client, message, support_reference_images=support_reference_images)
+        if not prompt:
+            await modify_progress(status_msg, text=f"❌**{model_name}**:\n请提供提示词", force_update=True, **kwargs)
+            return {}
+        params = {}
+        api_paths = api_paths or {}
+        if headers:
+            params |= {"headers": headers}
+        if proxy:
+            params |= {"proxy": proxy}
+        url = base_url + api_paths.get("img_gen", "") if not reference_images else base_url + api_paths.get("img_edit", "")
+        params |= {"url": url, "method": "POST"}
+        if body:
+            params |= {"json_data": replace_placeholder(body, pairs={"%PROMPT%": prompt})}
+        if extra_params:
+            params |= extra_params
+        logger.debug(f"hx_req(**{prettify(params)})")
+        resp = await hx_req(**params)
+        if error := resp.get("hx_error"):
+            await modify_progress(status_msg, text=f"❌**{model_name}**:\n{error}", force_update=True, **kwargs)
+            return {}
+
+        image_urls: list[str] = []
+        metadata = ""
+        if task_id := resp.get("task_id"):  # ModelScope
+            image_urls, metadata = await waiting_modelscope_task(task_id, params | {"base_url": base_url, "api_paths": api_paths})
+        if glom(resp, "output.choices.0.message.content.0.image", default=""):  # DashScope Multimodal Generation
+            image_urls.extend(glom(resp, "output.choices.0.message.content.*.image", default=[]))
+            metadata = extract_metadata(resp)
+        if image_urls:
+            images = await download_generated_images(image_urls, proxy=proxy)
+            caption = f"{EMOJI_IMG_BOT}**{model_name}**\n{metadata}\n"
+            for idx, img in enumerate([x for x in images if x.get("url")]):
+                caption += f"[P{idx + 1}原图]({img['url']})"
+            await modify_progress(status_msg, text=caption, force_update=True, **kwargs)
+            await send2tg(client, message, texts=caption, media=[{"photo": img["path"]} for img in images], **kwargs)
+            await delete_message(status_msg)
+            return {}
+    except Exception as e:
+        logger.error(f"HTTP Post Image Generation error: {e}")
+    return {}
+
+
+def extract_metadata(response: dict) -> str:
+    """Extract some useful metadata from response.
+
+    These information will be sent to Telegram caption.
+    """
+    if glom(response, "input.prompt", default=""):  # ModelScope
+        metadata = response.get("input", {})
+        metadata.pop("prompt", None)
+        return prettify(metadata)
+    if glom(response, "output.choices.0.message.content.0.image", default=""):  # DashScope Multimodal Generation
+        return prettify(response.get("usage", ""))
+    return ""
+
+
+async def get_image_contexts(client: Client, message: Message, *, support_reference_images: bool = False) -> tuple[str, list[str]]:
+    """Generate OpenAI image generation contexts.
+
+    Returns:
+        tuple: prompt, list_of_images
+    """
+    if not support_reference_images:
+        return clean_cmd_prefix(message.content), []
+    messages = [message]
+    while message.reply_to_message:
+        message = message.reply_to_message
+        messages.append(message)
+    messages.reverse()  # old to new
+    images = []
+    prompt = ""
+    for m in messages:
+        group_messages = await client.get_media_group(m.chat.id, m.id) if m.media_group_id else [m]
+        for msg in group_messages:
+            prompt = clean_cmd_prefix(msg.content)
+            if not msg.photo:
+                continue
+            res = await base64_media(client, msg)
+            images.append(f"data:image/{res['ext']};base64,{res['base64']}")
+    return prompt, images
+
+
+async def download_generated_images(image_urls: list[str], proxy: str | None) -> list[dict]:
+    """Download generated images.
+
+    Return:
+    [
+        {
+            "path": "/path/to/image.png"
+            "url": "https://...",
+        }
+    ]
+    """
+    results = []
+    for url in image_urls:
+        if url.startswith("http"):
+            img_path = await download_file(url, proxy=proxy)
+            if Path(img_path).is_file():
+                results.append({"path": img_path, "url": url})
+        else:  # base64 json
+            image_bytes = base64.b64decode(url)
+            save_path = Path(DOWNLOAD_DIR) / f"{rand_string(10)}.png"
+            async with await anyio.open_file(save_path, "wb") as f:
+                await f.write(image_bytes)
+            results.append({"path": save_path.as_posix()})
+    return results
+
+
+async def waiting_modelscope_task(task_id: str, params: dict) -> tuple[list[str], str]:
+    """Waiting for async task to be SUCCEED.
+
+    Task Submmited Response:
+    {
+        "task_status": "SUCCEED",
+        "task_id": "5054288",
+        "request_id": "68b91aef-e00e-40dd-a4e8-8611e4826b7a"
+    }
+
+    Task Check Response:
+    {
+      "input": {
+        "guidanceScale": 7.5,
+        "height": 1280,
+        "negativePrompt": "",
+        "numInferenceSteps": 9,
+        "outputs": {},
+        "prompt": "充满活力的特写编辑肖像, 模特眼神犀利, 头戴雕塑感帽子, 色彩拼接丰富, 眼部焦点锐利, 景深较浅, 具有Vogue杂志封面的美学风格, 采用中画幅拍摄, 工作室灯光效果强烈.",
+        "sampler": "Euler a",
+        "seed": 391608538,
+        "timeTaken": 6764.005661010742,
+        "weight": 0
+      },
+      "output_images": [
+        "https://muse-ai.oss-cn-hangzhou.aliyuncs.com/img/8cac52d9b76a41238f02733ef3709fba.png"
+      ],
+      "request_id": "b96147a2-d9ea-4640-b726-4d097235c5a1",
+      "task_id": "",
+      "task_status": "SUCCEED",
+      "time_taken": 6764.005661010742
+    }
+
+    Returns:
+        tuple: list of images, metadata
+    """
+    # get real base_url
+    base_url = params["base_url"]
+    headers = {k.lower(): v for k, v in params.get("headers", {}).items()}
+    if base_url.startswith("https://gateway.helicone.ai"):
+        base_url = headers.get("helicone-target-url", "")
+
+    api_key = headers.get("authorization", "").replace("Bearer ", "")
+    task_url = base_url + glom(params, "api_paths.task_check", default="")
+    url = replace_placeholder(task_url, {"%TASK_ID%": task_id})
+    resp = await hx_req(
+        url,
+        headers={
+            "authorization": f"Bearer {api_key}",
+            "content-type": "application/json",
+            "x-modelscope-task-type": "image_generation",
+        },
+        check_keys=["task_status"],
+        proxy=params.get("proxy"),
+    )
+    while True:
+        if "hx_error" in resp or not glom(resp, "task_status", default="") or resp["task_status"].upper() in {"FAILED", "CANCELLED", "UNKNOWN"}:
+            logger.error(f"Image Generation Task {task_id} error: {resp}")
+            return [], ""
+        if resp["task_status"] == "SUCCEED":
+            return glom(resp, "output_images", default=[]), extract_metadata(resp)
+
+        await asyncio.sleep(5)
+        resp = await hx_req(
+            url,
+            headers={
+                "authorization": f"Bearer {api_key}",
+                "content-type": "application/json",
+                "x-modelscope-task-type": "image_generation",
+            },
+            check_keys=["task_status"],
+            proxy=params.get("proxy"),
+        )
+    return [], extract_metadata(resp)
+
+
+def replace_placeholder(data: dict, pairs: dict[str, str]) -> dict:
+    """Replace placeholder in data.
+
+    Args:
+        data: dict with placeholder
+    Returns:
+        dict with replaced placeholder
+    """
+    data_str = json.dumps(data, ensure_ascii=False)
+    for key, value in pairs.items():
+        data_str = data_str.replace(key, value)
+    return json.loads(data_str)
src/ai/texts/contexts.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+import base64
+import contextlib
+import hashlib
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from glom import glom
+from google import genai
+from google.genai.types import FileState, Part, UploadFileConfig
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from ai.utils import BOT_TIPS, clean_context
+from asr.utils import GEMINI_AUDIO_EXT, downsampe_audio
+from config import AI, DOWNLOAD_DIR
+from database.r2 import head_cf_r2
+from messages.parser import parse_msg
+from utils import convert_md, read_text
+
+if TYPE_CHECKING:
+    from io import BytesIO
+
+
+async def get_openai_completion_contexts(client: Client, message: Message) -> list[dict]:
+    """Generate OpenAI chat completion contexts."""
+    messages = [message]
+    while message.reply_to_message:
+        message = message.reply_to_message
+        messages.append(message)
+    messages = messages[: int(AI.MAX_CONTEXTS_NUM)][::-1]  # old to new
+    return [ctx for msg in messages if (ctx := await single_openai_chat_context(client, msg))]
+
+
+async def single_openai_chat_context(client: Client, message: Message) -> dict:
+    """Generate OpenAI chat completion contexts for a single message.
+
+    Returns:
+    {
+        "role": "user or assistant",
+        "content": [],
+    }
+    """
+    info = parse_msg(message, silent=True)
+    role = "assistant" if BOT_TIPS in info["text"] else "user"
+
+    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document", "web_page"]:
+        return {}
+
+    extra_txt_extensions = [".sh", ".json", ".xml"]  # treat these as txt file
+    extra_markdown_extensions = [".pdf", ".html", ".docx", ".pptx", ".xls", ".xlsx"]  # convert to markdown
+
+    messages = await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message]
+    contexts = []
+    for msg in messages:
+        info = parse_msg(msg, silent=True)
+        sender = info["fwd_full_name"] or info["full_name"]
+        media_path = DOWNLOAD_DIR + "/" + info["file_name"]
+        try:
+            if info["mtype"] == "photo":
+                res = await base64_media(client, msg)
+                contexts.append({"type": "image_url", "image_url": {"url": f"data:image/{res['ext']};base64,{res['base64']}"}})
+            elif info["mtype"] == "document":
+                if info["mime_type"].startswith("text/") or Path(info["file_name"]).suffix in extra_txt_extensions:
+                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                    contexts.append(
+                        {
+                            "type": "text",
+                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{read_text(fpath).strip()}",
+                        }
+                    )
+                elif Path(info["file_name"]).suffix in extra_markdown_extensions:
+                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                    text = convert_md(fpath)
+                    Path(fpath).unlink(missing_ok=True)
+                    contexts.append(
+                        {
+                            "type": "text",
+                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{text.strip()}",
+                        }
+                    )
+            # user message has entity urls, use full html
+            clean_texts = clean_context(info["html"] or info["text"]) if role == "user" and info["entity_urls"] else clean_context(info["text"])
+            if not clean_texts:
+                continue
+            texts = f"[username]: {sender}\n[message]:\n{clean_texts}" if role == "user" and sender else clean_texts
+            contexts.append({"type": "text", "text": texts})
+        except Exception as e:
+            logger.warning(f"Download media from message failed: {e}")
+            continue
+    return {"role": role, "content": contexts} if contexts else {}
+
+
+async def get_openai_response_contexts(client: Client, message: Message, api_key: str, model_id: str, cache_day: int) -> tuple[str, list[dict]]:
+    """Generate OpenAI response contexts.
+
+    Returns:
+        previous_response_id, contexts
+    """
+
+    async def get_previous_response_id(msg: Message) -> str:
+        """Get previous response id from message.
+
+        Returns:
+            previous_response_id: str
+        """
+        key_hash = hashlib.sha256(api_key.encode()).hexdigest()
+        resp = await head_cf_r2(f"TTL/{cache_day}d/OpenAI/{model_id}/{key_hash}/{msg.chat.id}/{msg.id}")
+        return glom(resp, "Metadata.response_id", default="") or ""
+
+    previous_response_id = ""
+    messages = [message]
+    while message.reply_to_message and not previous_response_id:
+        message = message.reply_to_message
+        if pid := await get_previous_response_id(message):
+            previous_response_id = pid
+            break
+        messages.append(message)
+    messages.reverse()  # old to new
+    return previous_response_id, [ctx for msg in messages if (ctx := await single_openai_response_context(client, msg))]
+
+
+async def single_openai_response_context(client: Client, message: Message) -> dict:
+    """Generate OpenAI response contexts for a single message.
+
+    Returns:
+    {
+        "role": "user or assistant",
+        "content": [],
+    }
+    """
+    info = parse_msg(message, silent=True)
+    role = "assistant" if BOT_TIPS in info["text"] else "user"
+
+    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document", "web_page"]:
+        return {}
+
+    extra_txt_extensions = [".sh", ".json", ".xml"]  # treat these as txt file
+    extra_markdown_extensions = [".html", ".docx", ".pptx", ".xls", ".xlsx"]  # convert to markdown
+
+    messages = await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message]
+    contexts = []
+    for msg in messages:
+        info = parse_msg(msg, silent=True)
+        sender = info["fwd_full_name"] or info["full_name"]
+        media_path = DOWNLOAD_DIR + "/" + info["file_name"]
+        try:
+            if info["mtype"] == "photo":
+                res = await base64_media(client, msg)
+                contexts.append({"type": "input_image", "image_url": f"data:image/{res['ext']};base64,{res['base64']}", "detail": "high"})
+            elif info["mtype"] == "document":
+                if info["mime_type"] == "application/pdf" or Path(info["file_name"]).suffix == ".pdf":
+                    res = await base64_media(client, msg)
+                    contexts.append({"type": "input_file", "file_data": f"data:application/pdf;base64,{res['base64']}", "filename": info["file_name"]})
+                elif info["mime_type"].startswith("text/") or Path(info["file_name"]).suffix in extra_txt_extensions:
+                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                    contexts.append(
+                        {
+                            "type": "input_text",
+                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{read_text(fpath).strip()}",
+                        }
+                    )
+                elif Path(info["file_name"]).suffix in extra_markdown_extensions:
+                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                    text = convert_md(fpath)
+                    Path(fpath).unlink(missing_ok=True)
+                    contexts.append(
+                        {
+                            "type": "input_text",
+                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{text.strip()}",
+                        }
+                    )
+            # user message has entity urls, use full html
+            clean_texts = clean_context(info["html"] or info["text"]) if role == "user" and info["entity_urls"] else clean_context(info["text"])
+            if not clean_texts:
+                continue
+            texts = f"[username]: {sender}\n[message]:\n{clean_texts}" if role == "user" and sender else clean_texts
+            contexts.append({"type": "input_text", "text": texts})
+        except Exception as e:
+            logger.warning(f"Download media from message failed: {e}")
+            continue
+    return {"role": role, "content": contexts} if contexts else {}
+
+
+async def get_gemini_contexts(client: Client, message: Message, gemini: genai.Client) -> list[dict]:
+    """Generate Gemini contexts from old to new.
+
+    Returns:
+        contexts: list[dict]
+    """
+    ctx_messages = [message]
+    while message.reply_to_message:
+        message = message.reply_to_message
+        ctx_messages.append(message)
+    ctx_messages = ctx_messages[: int(AI.MAX_CONTEXTS_NUM)][::-1]  # old to new
+    contexts = []
+    for m in ctx_messages:
+        info = parse_msg(m, silent=True)
+        role = "model" if BOT_TIPS in info["text"] else "user"
+        if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document", "web_page"]:
+            continue
+        # gemini has built-in support for these extensions
+        gemini_extensions = [".pdf", ".html", ".css", ".csv", ".xml", ".rtf", ".mp3", ".wav", ".ogg", ".aac", ".flac", ".jpg", ".jpeg", ".webp", ".png", ".heic", ".heif"]
+        # gemini has built-in support for these mime types
+        gemini_mime_types = ["application/pdf", "application/x-javascript", "audio/ogg", "audio/mp4", "image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"]
+        txt_extensions = [".txt", ".js", ".py", ".md", ".sh", ".json"]  # treat these as txt file
+        extra_markdown_extensions = [".docx", ".pptx", ".xls", ".xlsx", ".epub"]  # convert to markdown
+        group_messages = await client.get_media_group(m.chat.id, m.id) if m.media_group_id else [m]
+        parts = []
+        for msg in group_messages:
+            info = parse_msg(msg, silent=True)
+            sender = info["fwd_full_name"] or info["full_name"]
+            media_path = DOWNLOAD_DIR + "/" + info["file_name"]
+            try:
+                if info["mtype"] in ["video", "photo", "audio", "voice"] or info["mime_type"] in gemini_mime_types or any(info["file_name"].endswith(ext) for ext in gemini_extensions):
+                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                    if info["mtype"] in ["audio", "voice"] and Path(fpath).suffix not in GEMINI_AUDIO_EXT:
+                        audio_path = await downsampe_audio(fpath)
+                        fpath = audio_path.as_posix()
+                    upload = await gemini.aio.files.upload(file=fpath, config=UploadFileConfig(display_name=info["file_name"] or f"send from {sender}"))
+                    while upload.state == FileState.PROCESSING:
+                        logger.trace("Waiting for upload to complete...")
+                        await asyncio.sleep(1)
+                        upload = await gemini.aio.files.get(name=upload.name)  # type: ignore
+                    if upload.state == FileState.ACTIVE and upload.uri:
+                        parts.append(Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type))
+                    Path(fpath).unlink(missing_ok=True)
+                elif info["mtype"] == "document":
+                    if info["mime_type"].startswith("text/") or Path(info["file_name"]).suffix in txt_extensions:
+                        fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                        parts.append(Part.from_text(text=f"[filename]: {info['file_name']}\n[file content]:\n{read_text(fpath).strip()}"))
+                    if Path(info["file_name"]).suffix in extra_markdown_extensions:
+                        fpath: str = await client.download_media(msg, media_path)  # type: ignore
+                        text = convert_md(fpath)
+                        Path(fpath).unlink(missing_ok=True)
+                        parts.append(Part.from_text(text=f"[filename]: {info['file_name']}\n[file content]:\n{text.strip()}"))
+                clean_texts = clean_context(info["text"])
+                if not clean_texts:
+                    continue
+                texts = f"[username]: {sender}\n[message]:\n{clean_texts}" if role == "user" and sender else clean_texts
+                parts.append(Part.from_text(text=texts))
+            except Exception as e:
+                logger.warning(f"Download media from message failed: {e}")
+                continue
+        if parts:
+            contexts.append({"role": role, "parts": parts})
+    return contexts
+
+
+async def base64_media(client: Client, message: Message) -> dict:
+    data: BytesIO = await client.download_media(message, in_memory=True)  # type: ignore
+    logger.debug(f"Downloaded message media: {data.name}")
+
+    ext = Path(data.name).suffix.removeprefix(".").replace("jpg", "jpeg")
+
+    # image, video
+    b64_encoding = base64.b64encode(data.getvalue()).decode("utf-8")
+
+    # text document
+    value = ""
+    with contextlib.suppress(Exception):
+        value = data.getvalue().decode("utf-8")
+    return {
+        "ext": ext,
+        "base64": b64_encoding,
+        "value": value,
+    }
src/ai/texts/gemini.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+import contextlib
+
+from glom import glom
+from google import genai
+from google.genai import types
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.types import Message, ReplyParameters
+
+from ai.texts.contexts import get_gemini_contexts
+from ai.utils import BOT_TIPS, EMOJI_REASONING_BEGIN, EMOJI_TEXT_BOT, beautify_llm_response, literal_eval
+from config import AI, PROXY, TEXT_LENGTH
+from messages.progress import modify_progress
+from messages.utils import blockquote, count_without_entities, smart_split
+from networking import flatten_rediercts
+from utils import number_to_emoji, strings_list
+
+
+async def gemini_chat_completion(
+    client: Client,
+    message: Message,
+    *,
+    prefix: str = "",
+    model_id: str = AI.GEMINI_MODEL_ID,
+    model_name: str = AI.GEMINI_MODEL_ID,
+    gemini_base_url: str = AI.GEMINI_BASE_URL,
+    gemini_api_keys: str = AI.GEMINI_API_KEYS,
+    gemini_default_headers: str | dict = AI.GEMINI_DEFAULT_HEADERS,
+    gemini_generate_content_config: str | dict = AI.GEMINI_GENERATE_CONTENT_CONFIG,
+    gemini_proxy: str | None = PROXY.GOOGLE,
+    gemini_append_grounding: bool = True,
+    silent: bool = False,
+    max_retries: int = 3,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Chat Completions.
+
+    Returns:
+            dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
+    """
+    if not prefix:
+        prefix = f"{EMOJI_TEXT_BOT}**{model_name}**:{BOT_TIPS}\n"
+
+    if silent or not kwargs.get("show_progress"):  # noqa: SIM108
+        status_msg = None
+    else:
+        status_msg = kwargs.get("progress") or await message.reply(f"{EMOJI_TEXT_BOT}**{model_name}**: 思考中...", quote=True)
+
+    sent_messages = [status_msg]
+    for api_key in strings_list(gemini_api_keys, shuffle=True):
+        try:
+            http_options = types.HttpOptions(base_url=gemini_base_url, async_client_args={"proxy": gemini_proxy}, headers=literal_eval(gemini_default_headers))
+            gemini = genai.Client(api_key=api_key, http_options=http_options)
+            params = {"model": model_id, "contents": await get_gemini_contexts(client, message, gemini)}
+            if conf := literal_eval(gemini_generate_content_config):
+                params["config"] = conf
+            logger.debug(f"genai.Client().models.generate_content_stream(**{params})")
+            resp = await single_api_generate_content(
+                client,
+                status_msg,
+                gemini,
+                params=params,
+                prefix=prefix,
+                silent=silent,
+                max_retries=max_retries,
+                append_grounding=gemini_append_grounding,
+                **kwargs,
+            )
+            if resp.get("texts"):
+                sent_messages.extend(resp.get("sent_messages", []))
+                return {
+                    "texts": resp["texts"],
+                    "thoughts": resp["thoughts"],
+                    "prefix": prefix,
+                    "model_name": model_name,
+                    "sent_messages": [m for m in sent_messages if isinstance(m, Message)],
+                }
+        except Exception as e:
+            logger.error(f"Gemini API error: {e}")
+    return {}
+
+
+async def single_api_generate_content(
+    client: Client,
+    status_msg: Message | None,
+    gemini: genai.Client,
+    params: dict,
+    *,
+    prefix: str = "",
+    retry: int = 0,
+    max_retries: int = 3,
+    append_grounding: bool = True,
+    silent: bool = False,
+    **kwargs,
+) -> dict:
+    """Get Gemini Chat Completions via single API.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "sent_messages": list[Message]}
+    """
+    if retry > max_retries:
+        return {"texts": "", "thoughts": "", "sent_messages": []}
+    answers = ""  # all model responses
+    thoughts = ""  # all model thoughts
+    runtime_texts = ""  # for a single telegram message
+    status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
+    status_mid = status_msg.id if isinstance(status_msg, Message) else 0
+    sent_messages = []
+    resp = {}
+    try:
+        is_reasoning = False
+        reasoning_chat_flag = None  # 用于指示是否是推理对话
+        async for chunk in await gemini.aio.models.generate_content_stream(**params):
+            resp = parse_chunk(chunk)
+            chunk_answer = resp.get("texts", "")
+            chunk_thinking = resp.get("thinking", "")
+            if chunk_thinking:
+                reasoning_chat_flag = True
+            if chunk_thinking and not is_reasoning:  # 首次收到推理内容
+                is_reasoning = True
+                runtime_texts += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{chunk_thinking.lstrip()}"
+            elif chunk_thinking and is_reasoning:  # 收到推理内容且正在思考
+                runtime_texts += chunk_thinking
+            elif reasoning_chat_flag is True and is_reasoning:  # Receiving response, close reasoning flag
+                is_reasoning = False
+                runtime_texts = chunk_answer.lstrip()
+            else:
+                runtime_texts += chunk_answer
+
+            runtime_texts = beautify_llm_response(runtime_texts)
+            length = await count_without_entities(prefix + runtime_texts)
+            if length <= TEXT_LENGTH:
+                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
+                    await modify_progress(message=status_msg, text=prefix + runtime_texts, detail_progress=True)
+            else:  # answers is too long, split it into multiple messages
+                parts = await smart_split(prefix + runtime_texts)
+                if len(parts) == 1:
+                    continue
+                if is_reasoning:
+                    runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{parts[-1].lstrip()}"  # remove previous thinking
+                    await modify_progress(message=status_msg, text=parts[0], force_update=True)  # force send the first part
+                else:
+                    await modify_progress(message=status_msg, text=blockquote(parts[0]), force_update=True)  # force send the first part
+                    runtime_texts = parts[-1]  # keep the last part
+                    if is_reasoning:
+                        runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{runtime_texts.lstrip()}"
+                    if not silent:
+                        status_msg = await client.send_message(status_cid, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status_mid))  # the new message
+                        sent_messages.append(status_msg)
+                        status_mid = status_msg.id
+
+            thoughts += chunk_thinking
+            answers += chunk_answer
+
+        await gemini.aio.aclose()
+        # all chunks are processed
+        if not answers.strip() and not thoughts.strip():  # empty response
+            return await single_api_generate_content(
+                client,
+                status_msg,
+                gemini,
+                params=params,
+                prefix=prefix,
+                silent=silent,
+                retry=retry + 1,
+                max_retries=max_retries,
+                append_grounding=append_grounding,
+                **kwargs,
+            )
+        if append_grounding:  # add grounding to the response
+            answers = await add_grounding_results(answers, resp["grounding_chunks"], resp["grounding_supports"])
+            runtime_texts = await add_grounding_results(runtime_texts, resp["grounding_chunks"], resp["grounding_supports"])
+        if await count_without_entities(prefix + answers) <= TEXT_LENGTH - 10:  # short answer in single msg
+            quoted = answers.strip()
+            await modify_progress(message=status_msg, text=f"{prefix}{blockquote(quoted)}", force_update=True)
+        else:  # total length is too long, answers are splitted into multiple messages
+            await modify_progress(message=status_msg, text=prefix + blockquote(runtime_texts), force_update=True)
+
+    except Exception as e:
+        error = f"{e}\n{resp}"
+        logger.error(error)
+        with contextlib.suppress(Exception):
+            await modify_progress(message=status_msg, text=error, force_update=True)
+            [await modify_progress(msg, del_status=True) for msg in sent_messages]
+        if retry + 1 < max_retries:
+            return await single_api_generate_content(
+                client,
+                status_msg,
+                gemini,
+                params=params,
+                prefix=prefix,
+                silent=silent,
+                retry=retry + 1,
+                max_retries=max_retries,
+                append_grounding=append_grounding,
+                **kwargs,
+            )
+    return {"texts": answers, "thoughts": thoughts, "sent_messages": sent_messages}
+
+
+def parse_chunk(chunk: types.GenerateContentResponse) -> dict:
+    """Parse gemini response, includes texts, image and websearch."""
+    data = chunk.model_dump()
+    parts = glom(data, "candidates.0.content.parts", default=[]) or []
+
+    texts = "".join([p.get("text", "") for p in parts if not p.get("thought")])
+    thinking = "".join([p.get("text", "") for p in parts if p.get("thought")])
+    logger.trace(texts or thinking)
+    return {
+        "texts": beautify_llm_response(texts, newline_level=2),
+        "thinking": beautify_llm_response(thinking, newline_level=2),
+        "grounding_chunks": glom(data, "candidates.0.grounding_metadata.grounding_chunks", default=[]) or [],
+        "grounding_supports": glom(data, "candidates.0.grounding_metadata.grounding_supports", default=[]) or [],
+    }
+
+
+async def add_grounding_results(answers: str, grounding_chunks: list[dict], grounding_supports: list[dict]) -> str:
+    urls = [glom(chunk, "web.uri", default="https://www.google.com") for chunk in grounding_chunks]
+    tasks = [flatten_rediercts(url) for url in urls]
+    try:
+        flatten_urls = await asyncio.gather(*tasks)
+        index2url = flatten_urls
+    except Exception as e:
+        logger.warning(e)
+        index2url = urls
+    logger.trace(f"Grounding URLs: {index2url}")
+    for support in grounding_supports:
+        indices: list[int] = support.get("grounding_chunk_indices", [])
+        logger.trace(f"Add grounding indices: {indices}")
+        indices_with_url = " ".join([f"[[{idx + 1}]]({glom(index2url, str(idx), default='https://www.google.com')})" for idx in indices])
+        if segment := glom(support, "segment.text", default=""):
+            answers = answers.replace(segment, f"{segment}{indices_with_url}", 1)
+    for idx, grounding in enumerate(grounding_chunks):
+        if idx > 9:
+            break
+        title = glom(grounding, "web.title", default="Web")
+        url = glom(index2url, str(idx), default="https://www.google.com")
+        if url in answers:
+            answers += f"\n{number_to_emoji(idx + 1)}[{title}]({url})"
+    return answers
src/ai/texts/models.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+
+from glom import glom
+from loguru import logger
+from pyrogram.types import Message
+
+from ai.utils import BOT_TIPS, EMOJI_TEXT_BOT, text_generation_docs
+from config import AI, PREFIX
+from database.kv import get_cf_kv
+from messages.utils import startswith_prefix
+
+
+# ruff: noqa: RUF002
+async def get_text_model_config(message: Message) -> dict:
+    r"""Get model config based on the message.
+
+    Model config is retrieved from CF-KV with key: {AI.TEXT_MODEL_CONFIG_KEY}
+
+    A sample config:
+    {
+    "docs": "🤖AI对话: `/ai` + 提示词\n回复消息可将其加入历史上下文\n🔄使用以下命令强制切换模型:\n/gpt: GPT-5.2\n/gemini: Gemini-2.5-Flash\n/grok: Grok-4\n/claude: Claude-Opus-4.5\n/doubao: Doubao-Seed-1.8\n/ds: DeepSeek-R1\n/qwen: Qwen3-Max\n/kimi: Kimi-K2\n/glm: GLM-4.7\n/mimo: MiMo-V2-Flash",
+    "default": {
+        "model_id": "gemini-2.5-flash",
+        "model_name": "Gemini-2.5-Flash",
+        "api_type": "gemini",
+        "gemini_base_url": "https://generativelanguage.googleapis.com",
+        "gemini_api_keys": "key1,key2,key3,...",
+        "gemini_generate_content_config": {
+            "max_output_tokens": 65536,
+            "media_resolution": "MEDIA_RESOLUTION_HIGH",
+            "thinking_config": {"include_thoughts": true, "thinking_budget": 24576},
+            "tools":[{"google_search": {}}, {"url_context": {}}]}
+        }
+    },
+    "gpt": {
+        "model_id": "gpt-4o",
+        "model_name": "GPT-4o",
+        "api_type": "openai_chat",
+        "openai_base_url": "https://api.openai.com/v1",
+        "openai_api_keys": "key1,key2,key3",
+        "openai_completions_config": {
+            "temperature": 1.0,
+            "max_completion_tokens": 4096
+        }
+    },
+    "gpt-helicone": {
+        "model_id": "gpt-4o",
+        "model_name": "GPT-4o",
+        "api_type": "openai_chat",
+        "openai_base_url": "https://gateway.helicone.ai/v1",
+        "openai_api_keys": "key1,key2,key3,...",
+        "openai_default_headers": {
+            "helicone-auth": "Bearer HELICONE_API_KEY",
+            "helicone-target-url": "https://api.openai.com"
+        },
+        "openai_completions_config": {
+            "temperature": 1.0,
+            "max_completion_tokens": 4096
+        }
+    },
+    "doubao": {
+        "model_id": "doubao-seed-1-8-251228",
+        "model_name": "Doubao-Seed-1.8",
+        "api_type": "openai_responses",
+        "cache_response_ttl": 604800,
+        "openai_base_url": "https://ark.cn-beijing.volces.com/api/v3",
+        "openai_api_keys": "key1,key2,key3,...",
+        "openai_responses_config": {
+        "reasoning": { "effort": "high" },
+        "max_output_tokens": 65536,
+        "extra_body": {
+            "thinking": { "type": "enabled" }
+        },
+        "tools": [
+            {
+            "type": "web_search",
+            "max_keyword": 5,
+            "limit": 20
+            }
+        ],
+        "max_tool_calls": 10
+        }
+    },
+    "tool_call_model": {
+        "model_id": "gpt-4o-mini",
+        "model_name": "Web Search",
+        "api_type": "openai_chat"
+        "openai_base_url": "https://api.openai.com/v1",
+        "openai_api_keys": "key1,key2,key3",
+        "openai_completions_config": {
+            "temperature": 1.0,
+            "max_completion_tokens": 4096
+        }
+    }
+    }
+
+
+    Suppose this message is:
+        Message(text="/ai hello") -> use `default` as model identifier
+        Message(text="/ai @gpt-4.1 hello") -> use `gpt-4.1` as model identifier
+
+    Reply to a message:
+        Message(text="🤖Gemini-2.5-Flash:(回复以继续)\nHello") -> find the model_alias via model_name=`Gemini-2.5-Flash`
+        Message(text="🤖GPT-4o:(回复以继续)\nHello") -> find the model_alias via model_name=`GPT-4o`
+
+    Returns:
+        {
+            "model_id": "gpt-4o",
+            "model_name": "GPT-4o",
+            "openai_api_type": "chat",
+            "openai_base_url": "https://api.openai.com/v1",
+            "openai_api_keys": "key1,key2,...",
+            "openai_default_headers": {},
+            "openai_completions_config": {},
+            "openai_responses_config": {},
+            ....  # other fileds will also be passed to the function
+        }
+    """
+    texts = str(message.content).strip()
+    if texts.startswith(EMOJI_TEXT_BOT) and BOT_TIPS in texts:
+        # DO NOT respond to AI responses to avoid potential infinitely loop
+        return {}
+
+    # this message starts with /ai
+    if startswith_prefix(message.content, PREFIX.AI_TEXT_GENERATION):
+        prompt = texts.removeprefix(PREFIX.AI_TEXT_GENERATION).strip()
+        prompt = re.sub(r"^@([a-zA-Z0-9_\-\.]+)(\s+)?", "", prompt, flags=re.DOTALL).strip()
+        if not prompt and not message.reply_to_message:  # no prompt & no reply_msg
+            await message.reply_text(text=await text_generation_docs(), quote=True)
+            return {}
+        if matched := re.match(rf"^{PREFIX.AI_TEXT_GENERATION}\s+@([a-zA-Z0-9_\-\.]+)(\s+)?", texts):  # match /ai @custom_model_id
+            model_id = matched.group(1).strip()
+            return await get_config_by_model_id(model_id)
+        return await get_config_by_model_id("default")
+
+    # this message is not /ai, try to find model id from reply_message
+    reply_msg = message.reply_to_message
+    if not isinstance(reply_msg, Message):
+        return {}
+
+    if matched := re.match(rf"^{EMOJI_TEXT_BOT}(.*?):{BOT_TIPS}", str(reply_msg.content)):
+        model_name = matched.group(1).strip()
+        return await get_config_by_model_name(model_name)
+    return {}
+
+
+async def get_config_by_model_id(model_id: str, *, fallback_to_default: bool = True) -> dict:
+    """Get model config by model_id.
+
+    Returns:
+        model_config
+    """
+    kv = await get_cf_kv(AI.TEXT_MODEL_CONFIG_KEY, cache_ttl=600, silent=True)
+    default_config = kv.get("default", {})
+    if not default_config:
+        logger.warning(f"CF-KV key `{AI.TEXT_MODEL_CONFIG_KEY}` does not has `default` field")
+        default_config = (
+            {
+                "model_id": AI.GEMINI_MODEL_ID,
+                "model_name": AI.GEMINI_MODEL_ID,
+                "api_type": "gemini",
+                "gemini_base_url": AI.GEMINI_BASE_URL,
+                "gemini_api_keys": AI.GEMINI_API_KEYS,
+            }
+            if AI.TEXT_DEFAULT_PROVIDER == "gemini"
+            else {
+                "model_id": AI.OPENAI_MODEL_ID,
+                "model_name": AI.OPENAI_MODEL_ID,
+                "openai_api_type": "chat",
+                "openai_base_url": AI.OPENAI_BASE_URL,
+                "openai_api_keys": AI.OPENAI_API_KEYS,
+            }
+        )
+    custom_config = kv.get(model_id, {})
+    if not custom_config:
+        if fallback_to_default:
+            logger.warning(f"Model `{model_id}` is not configured in KV, using default config")
+            return default_config
+        return {}
+    return default_config | custom_config
+
+
+async def get_config_by_model_name(model_name: str) -> dict:
+    """Get model config by model_name.
+
+    Returns:
+        model_config
+    """
+    kv = await get_cf_kv(AI.TEXT_MODEL_CONFIG_KEY, cache_ttl=600, silent=True)
+    model_names = {glom(v, "model_name", default=""): k for k, v in kv.items()}
+    model_alias = model_names.get(model_name, model_name)
+    return await get_config_by_model_id(model_alias)
src/ai/texts/openai_chat.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import contextlib
+import re
+
+from glom import glom
+from loguru import logger
+from openai import AsyncOpenAI, DefaultAsyncHttpxClient
+from pyrogram.client import Client
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
+from pyrogram.types import Message, ReplyParameters
+
+from ai.texts.contexts import get_openai_completion_contexts
+from ai.utils import BOT_TIPS, EMOJI_REASONING_BEGIN, EMOJI_REASONING_END, EMOJI_TEXT_BOT, beautify_llm_response, literal_eval, split_reasoning, trim_none
+from config import AI, PROXY, TEXT_LENGTH
+from messages.progress import modify_progress
+from messages.utils import blockquote, count_without_entities, delete_message, smart_split
+from utils import strings_list
+
+
+async def openai_chat_completions(
+    client: Client,
+    message: Message,
+    *,
+    prefix: str = "",
+    model_id: str = AI.OPENAI_MODEL_ID,
+    model_name: str = AI.OPENAI_MODEL_ID,
+    openai_base_url: str = AI.OPENAI_BASE_URL,
+    openai_api_keys: str = AI.OPENAI_API_KEYS,
+    openai_client_config: str | dict = AI.OPENAI_CLIENT_CONFIG,
+    openai_default_headers: str | dict = AI.OPENAI_DEFAULT_HEADERS,
+    openai_completions_config: str | dict = AI.OPENAI_COMPLETIONS_CONFIG,
+    openai_proxy: str | None = PROXY.OPENAI,
+    openai_system_prompt: str = "",
+    openai_contexts: list[dict] | None = None,
+    openai_tools: list[dict] | None = None,
+    silent: bool = False,
+    max_retries: int = 3,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Chat Completions.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str,  "sent_messages": list[Message]}
+    """
+    if not prefix:
+        prefix = f"{EMOJI_TEXT_BOT}**{model_name}**:{BOT_TIPS}\n"
+
+    if silent or not kwargs.get("show_progress"):  # noqa: SIM108
+        status_msg = None
+    else:
+        status_msg = kwargs.get("progress") or await message.reply(f"{EMOJI_TEXT_BOT}**{model_name}**: 思考中...", quote=True)
+
+    sent_messages = [status_msg]
+    try:
+        openai_client = {}
+        if literal_eval(openai_client_config):
+            openai_client |= literal_eval(openai_client_config)
+        if literal_eval(openai_default_headers):
+            openai_client |= {"default_headers": literal_eval(openai_default_headers)}
+        if openai_proxy:
+            openai_client |= {"http_client": DefaultAsyncHttpxClient(proxy=openai_proxy)}
+        contexts = openai_contexts or await get_openai_completion_contexts(client, message)
+        if openai_system_prompt and glom(contexts, "0.role", default="") != "system":
+            contexts.insert(0, {"role": "system", "content": openai_system_prompt})
+        params = {"model": model_id, "messages": contexts, "stream": True}
+        if literal_eval(openai_completions_config):
+            params |= literal_eval(openai_completions_config)
+        if openai_tools:
+            params |= {"tools": openai_tools, "tool_choice": "auto"}
+        logger.debug(f"openai.chat.completions.create(**{params})")
+    except Exception as e:
+        logger.error(f"OpenAI client setup error: {e}")
+        return {}
+    for api_key in strings_list(openai_api_keys, shuffle=True):
+        try:
+            openai_client |= {"base_url": openai_base_url, "api_key": api_key}
+            logger.trace(f"AsyncOpenAI(**{openai_client})")
+            openai = AsyncOpenAI(**openai_client)
+            resp = await single_api_chat_completions(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                silent=silent,
+                max_retries=max_retries,
+                **kwargs,
+            )
+            if resp.get("texts") or resp.get("tool_name"):
+                return resp | {"prefix": prefix, "model_name": model_name, "sent_messages": [m for m in sent_messages + resp["sent_messages"] if isinstance(m, Message)]}
+        except Exception as e:
+            logger.error(f"OpenAI API error: {e}")
+
+    return {}
+
+
+async def single_api_chat_completions(
+    client: Client,
+    status_msg: Message | None,
+    openai: AsyncOpenAI,
+    params: dict,
+    *,
+    prefix: str = "",
+    silent: bool = False,
+    retry: int = 0,
+    max_retries: int = 3,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Chat Completions via single API.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "tool_name": str, "tool_args": str, "sent_messages": list[Message]}
+    """
+    if retry > max_retries:
+        return {"texts": "", "thoughts": "", "tool_name": "", "tool_args": "", "sent_messages": []}
+    answers = ""  # all model responses
+    thoughts = ""  # all model thoughts
+    tool_name = ""
+    tool_args = ""
+    runtime_texts = ""  # for a single telegram message
+    status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
+    status_mid = status_msg.id if isinstance(status_msg, Message) else 0
+    sent_messages = []
+    resp = ""
+    try:
+        is_reasoning = False
+        reasoning_chat_flag = None  # 用于指示是否是推理对话
+        async for chunk in await openai.chat.completions.create(**params):
+            resp = chunk.model_dump()
+            logger.trace(trim_none(resp))
+            chunk_answer = glom(resp, "choices.0.delta.content", default="") or ""
+            chunk_thinking = glom(resp, "choices.0.delta.reasoning_content", default="") or ""
+            tool_name = tool_name or glom(resp, "choices.0.delta.tool_calls.0.function.name", default="")
+            tool_args += glom(resp, "choices.0.delta.tool_calls.0.function.arguments", default="") or ""
+            if not chunk_answer and not chunk_thinking:
+                continue
+            if reasoning_chat_flag is None and chunk_thinking:
+                reasoning_chat_flag = True
+            if chunk_thinking and not is_reasoning:  # 首次收到推理内容
+                is_reasoning = True
+                runtime_texts += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{chunk_thinking.lstrip()}"
+            elif chunk_thinking and is_reasoning:  # 收到推理内容且正在思考
+                runtime_texts += chunk_thinking
+            elif reasoning_chat_flag is True and is_reasoning:  # 收到回答, 关闭推理标志
+                is_reasoning = False
+                runtime_texts = chunk_answer.lstrip()
+            else:
+                runtime_texts += chunk_answer
+
+            # Sometimes the reasoning content is included in the content field.
+            # handle "<think>...</think>\n\n"
+            if runtime_texts.removeprefix(prefix).lstrip().startswith("<think>"):
+                is_reasoning = True
+                runtime_texts = runtime_texts.replace("<think>", f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}")
+            if "</think>" in runtime_texts:
+                is_reasoning = False
+                runtime_texts = re.sub(r"</think>\s*", f"{EMOJI_REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", runtime_texts, count=1)
+
+            thoughts += chunk_thinking
+            answers += chunk_answer
+            runtime_texts = beautify_llm_response(runtime_texts)
+            length = await count_without_entities(prefix + runtime_texts)
+            if length <= TEXT_LENGTH - 10:  # leave some flexibility
+                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
+                    await modify_progress(message=status_msg, text=prefix + runtime_texts, detail_progress=True)
+            else:  # answers is too long, split it into multiple messages
+                parts = await smart_split(prefix + runtime_texts)
+                if len(parts) == 1:
+                    continue
+                if is_reasoning:
+                    runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{parts[-1].lstrip()}"  # remove previous thinking
+                    await modify_progress(message=status_msg, text=parts[0], force_update=True)  # force send the first part
+                else:
+                    await modify_progress(message=status_msg, text=blockquote(parts[0]), force_update=True)  # force send the first part
+                    runtime_texts = parts[-1]  # keep the last part
+                    if is_reasoning:
+                        runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{runtime_texts.lstrip()}"
+                    if not silent:
+                        status_msg = await client.send_message(status_cid, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status_mid))  # the new message
+                        sent_messages.append(status_msg)
+                        status_mid = status_msg.id
+        if tool_name.strip():
+            return {"texts": answers, "thoughts": thoughts, "tool_name": tool_name.strip(), "tool_args": tool_args.strip(), "sent_messages": sent_messages}
+        # all chunks are processed
+        if not (answers.strip() or thoughts.strip()):  # empty response
+            await modify_progress(message=status_msg, text=resp, force_update=True)
+            return await single_api_chat_completions(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                retry=retry + 1,
+                max_retries=max_retries,
+                silent=silent,
+                **kwargs,
+            )
+
+        if not thoughts:  # no structured thinking in response
+            thoughts, answers = split_reasoning(answers)
+
+        # answers = add_search_results_to_response(config.get("search_results", []), answers)
+        if await count_without_entities(prefix + answers) <= TEXT_LENGTH - 10:  # short answer in single msg
+            quoted = answers.strip()
+            await modify_progress(message=status_msg, text=f"{prefix}{blockquote(quoted)}", force_update=True)
+        else:  # total length is too long, answers are splitted into multiple messages
+            await modify_progress(message=status_msg, text=prefix + blockquote(runtime_texts), force_update=True)
+
+    except Exception as e:
+        error = f"{EMOJI_TEXT_BOT}BOT请求失败, 重试次数: {retry + 1}/{max_retries}\n{e}\n{resp}"
+        logger.error(error)
+        with contextlib.suppress(Exception):
+            await modify_progress(status_msg, text=error, force_update=True, **kwargs)
+            [await delete_message(msg) for msg in sent_messages]
+        if retry + 1 < max_retries:
+            return await single_api_chat_completions(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                retry=retry + 1,
+                max_retries=max_retries,
+                silent=silent,
+                **kwargs,
+            )
+    return {"texts": answers, "thoughts": thoughts, "tool_name": tool_name.strip(), "tool_args": tool_args.strip(), "sent_messages": sent_messages}
src/ai/texts/openai_response.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import contextlib
+import hashlib
+import time
+
+from glom import Coalesce, glom
+from loguru import logger
+from openai import AsyncOpenAI, DefaultAsyncHttpxClient
+from pyrogram.client import Client
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.types import Message, ReplyParameters
+
+from ai.texts.contexts import get_openai_response_contexts
+from ai.utils import BOT_TIPS, EMOJI_REASONING_BEGIN, EMOJI_TEXT_BOT, beautify_llm_response, literal_eval
+from config import AI, PROXY, TEXT_LENGTH
+from database.r2 import set_cf_r2
+from messages.progress import modify_progress
+from messages.utils import blockquote, count_without_entities, delete_message, smart_split
+from utils import number_to_emoji, strings_list
+
+
+async def openai_responses_api(
+    client: Client,
+    message: Message,
+    *,
+    prefix: str = "",
+    model_id: str = AI.OPENAI_MODEL_ID,
+    model_name: str = AI.OPENAI_MODEL_ID,
+    openai_base_url: str = AI.OPENAI_BASE_URL,
+    openai_api_keys: str = AI.OPENAI_API_KEYS,
+    openai_client_config: str | dict = AI.OPENAI_CLIENT_CONFIG,
+    openai_default_headers: str | dict = AI.OPENAI_DEFAULT_HEADERS,
+    openai_responses_config: str | dict = AI.OPENAI_RESPONSES_CONFIG,
+    openai_proxy: str | None = PROXY.OPENAI,
+    cache_response_ttl: int = 0,
+    silent: bool = False,
+    max_retries: int = 3,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Chat Completions.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "response_id": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
+    """
+    if not prefix:
+        prefix = f"{EMOJI_TEXT_BOT}**{model_name}**:{BOT_TIPS}\n"
+
+    if silent or not kwargs.get("show_progress"):  # noqa: SIM108
+        status_msg = None
+    else:
+        status_msg = kwargs.get("progress") or await message.reply(f"{EMOJI_TEXT_BOT}**{model_name}**: 思考中...", quote=True)
+
+    sent_messages = [status_msg]
+    cache_day = round(cache_response_ttl // 86400)
+    try:
+        openai_client = {}
+        if literal_eval(openai_client_config):
+            openai_client |= literal_eval(openai_client_config)
+        if literal_eval(openai_default_headers):
+            openai_client |= {"default_headers": literal_eval(openai_default_headers)}
+        if openai_proxy:
+            openai_client |= {"http_client": DefaultAsyncHttpxClient(proxy=openai_proxy)}
+    except Exception as e:
+        logger.error(f"OpenAI client setup error: {e}")
+        return {}
+
+    for api_key in strings_list(openai_api_keys, shuffle=True):
+        try:
+            openai_client |= {"base_url": openai_base_url, "api_key": api_key}
+            logger.trace(f"AsyncOpenAI(**{openai_client})")
+            openai = AsyncOpenAI(**openai_client)
+            previous_response_id, contexts = await get_openai_response_contexts(client, message, api_key, model_id, cache_day)
+            params = {"model": model_id, "stream": True, "input": contexts}
+            if literal_eval(openai_responses_config):
+                params |= literal_eval(openai_responses_config)
+            if previous_response_id:
+                params |= {"previous_response_id": previous_response_id}
+            if cache_response_ttl > 0:
+                extra_body = params.get("extra_body", {})
+                params["extra_body"] = extra_body | {"expire_at": int(time.time()) + int(cache_response_ttl)}
+            logger.debug(f"openai.responses.create(**{params})")
+            resp = await single_api_response(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                silent=silent,
+                max_retries=max_retries,
+                **kwargs,
+            )
+            if not resp.get("texts"):
+                continue
+            sent_messages.extend(resp.get("sent_messages", []))
+            sent_messages = [m for m in sent_messages if isinstance(m, Message)]
+            if cache_response_ttl > 0:
+                day = round(cache_response_ttl // 86400)
+                for sent_msg in sent_messages:  # save the reponse to R2
+                    key_hash = hashlib.sha256(api_key.encode()).hexdigest()
+                    await set_cf_r2(
+                        f"TTL/{day}d/OpenAI/{model_id}/{key_hash}/{sent_msg.chat.id}/{sent_msg.id}",
+                        data=resp["full_response"],
+                        metadata={"response_id": resp["response_id"]},
+                        silent=silent,
+                    )
+            return {
+                "texts": resp["texts"],
+                "thoughts": resp["thoughts"],
+                "response_id": resp["response_id"],
+                "prefix": prefix,
+                "model_name": model_name,
+                "sent_messages": sent_messages,
+            }
+        except Exception as e:
+            logger.error(f"OpenAI API error: {e}")
+    return {}
+
+
+async def single_api_response(
+    client: Client,
+    status_msg: Message | None,
+    openai: AsyncOpenAI,
+    params: dict,
+    *,
+    prefix: str = "",
+    silent: bool = False,
+    retry: int = 0,
+    max_retries: int = 3,
+    **kwargs,
+) -> dict:
+    """Get OpenAI Chat Completions via single API.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "full_response":dict, "response_id": str, "sent_messages": list[Message]}
+    """
+    if retry > max_retries:
+        return {"texts": "", "thoughts": "", "sent_messages": []}
+    answers = ""  # all model responses
+    thoughts = ""  # all model thoughts
+    runtime_texts = ""  # for a single telegram message
+    status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
+    status_mid = status_msg.id if isinstance(status_msg, Message) else 0
+    sent_messages = []
+    full_response = {}
+    response_id = ""
+    try:
+        tool_calls: list[dict] = []  # tool_call results
+        is_reasoning = False
+        async for chunk in await openai.responses.create(**params):
+            resp = chunk.model_dump()
+            error = await parse_error(resp, retry, max_retries, status_msg)
+            if error["retry"]:
+                return await single_api_response(
+                    client,
+                    status_msg,
+                    openai,
+                    params=params,
+                    prefix=prefix,
+                    retry=retry + 1,
+                    max_retries=max_retries,
+                    silent=silent,
+                    **kwargs,
+                )
+            if error["error"]:
+                await modify_progress(message=status_msg, text=error["error"], force_update=True, **kwargs)
+                return {}
+            response_type = resp.get("type", "")
+            chunk_answer = resp.get("delta", "") if response_type == "response.output_text.delta" else ""
+            chunk_thinking = resp.get("delta", "") if response_type == "response.reasoning_summary_text.delta" else ""
+
+            # 设置推理标志
+            if response_type in {"response.reasoning_summary_part.added", "response.reasoning_summary_text.delta"}:  # 正在推理
+                is_reasoning = True
+            elif response_type in {
+                "response.reasoning_summary_part.done",
+                "response.reasoning_summary_text.done",
+                "response.content_part.added",
+                "response.output_text.delta",
+                "response.output_text.done",
+                "response.content_part.done",
+                "response.completed",
+            }:  # 推理结束
+                is_reasoning = False
+
+            if response_type == "response.reasoning_summary_part.added":  # 首次收到推理内容
+                runtime_texts += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{chunk_thinking.lstrip()}"
+            elif chunk_thinking:  # 收到推理内容
+                runtime_texts += chunk_thinking
+
+            if response_type == "response.content_part.added":  # 收到初始回答
+                runtime_texts = chunk_answer.lstrip()
+            else:
+                runtime_texts += chunk_answer
+
+            runtime_texts = beautify_llm_response(runtime_texts)
+            length = await count_without_entities(prefix + runtime_texts)
+            if length <= TEXT_LENGTH - 10:  # leave some flexibility
+                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
+                    await modify_progress(message=status_msg, text=prefix + runtime_texts, detail_progress=True)
+            else:  # answers is too long, split it into multiple messages
+                parts = await smart_split(prefix + runtime_texts)
+                if len(parts) == 1:
+                    continue
+                if is_reasoning:
+                    runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{parts[-1].lstrip()}"  # remove previous thinking
+                    await modify_progress(message=status_msg, text=parts[0], force_update=True)  # force send the first part
+                else:
+                    await modify_progress(message=status_msg, text=blockquote(parts[0]), force_update=True)  # force send the first part
+                    runtime_texts = parts[-1]  # keep the last part
+                    if is_reasoning:
+                        runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{EMOJI_REASONING_BEGIN}{runtime_texts.lstrip()}"
+                    if not silent:
+                        status_msg = await client.send_message(status_cid, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status_mid))  # the new message
+                        sent_messages.append(status_msg)
+                        status_mid = status_msg.id
+
+            thoughts += chunk_thinking
+            answers += chunk_answer
+            if response_type == "response.reasoning_summary_text.done":
+                thoughts = resp.get("text", thoughts)
+            elif response_type == "response.output_text.done":
+                answers = resp.get("text", answers)
+            elif response_type == "response.content_part.done":
+                tool_calls = glom(resp, "part.annotations", default=[]) or []
+            elif response_type in {"response.created", "response.in_progress", "response.completed"}:
+                response_id = glom(resp, "response.id", default="")
+
+            if response_type == "response.completed":
+                full_response = resp
+        # all chunks are processed
+        if not answers.strip() and not thoughts.strip():  # empty response
+            return await single_api_response(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                retry=retry + 1,
+                max_retries=max_retries,
+                silent=silent,
+                **kwargs,
+            )
+        answers = add_tool_call_results_to_response(tool_calls, answers)
+        if await count_without_entities(prefix + answers) <= TEXT_LENGTH - 10:  # short answer in single msg
+            quoted = answers.strip()
+            await modify_progress(message=status_msg, text=f"{prefix}{blockquote(quoted)}", force_update=True)
+        else:  # total length is too long, answers are splitted into multiple messages
+            await modify_progress(message=status_msg, text=prefix + blockquote(runtime_texts), force_update=True)
+
+    except Exception as e:
+        error = f"{EMOJI_TEXT_BOT}BOT请求失败, 重试次数: {retry + 1}/{max_retries}\n{e}"
+        if "resp" in locals():
+            error += f"\n{resp}"  # type: ignore
+        logger.error(error)
+        with contextlib.suppress(Exception):
+            await modify_progress(status_msg, text=error, force_update=True, **kwargs)
+            [await delete_message(msg) for msg in sent_messages]
+        if retry + 1 < max_retries:
+            return await single_api_response(
+                client,
+                status_msg,
+                openai,
+                params=params,
+                prefix=prefix,
+                retry=retry + 1,
+                max_retries=max_retries,
+                silent=silent,
+                **kwargs,
+            )
+    return {"texts": answers, "thoughts": thoughts, "full_response": full_response, "response_id": response_id, "sent_messages": [m for m in sent_messages if isinstance(m, Message)]}
+
+
+async def parse_error(resp: dict, retry: int, max_retries: int, status_msg: Message | None) -> dict:
+    """Parse GPT error.
+
+    Returns:
+        {"error": "msg", "retry": bool}
+    """
+    response_type = glom(resp, "type", default="")
+    if response_type not in {"error", "response.failed"}:
+        return {"error": "", "retry": False}
+    logger.warning(resp)
+    await modify_progress(status_msg, text=f"{resp}\n重试次数: {retry + 1}/{max_retries}", force_update=True)
+    if retry < max_retries:
+        return {"error": str(resp), "retry": True}
+    return {"error": str(resp), "retry": False}
+
+
+def add_tool_call_results_to_response(tool_calls: list[dict], answers: str) -> str:
+    if not tool_calls or not isinstance(tool_calls, list):
+        return answers
+    answers = answers.strip()
+    for idx, tool_call in enumerate(tool_calls):
+        title = glom(tool_call, Coalesce("title", "site_name"), default="")
+        link = glom(tool_call, Coalesce("url", "link"), default="")
+        if link.startswith("http"):
+            answers += f"\n{number_to_emoji(idx + 1)} [{title}]({link})"
+    return answers.strip()
src/ai/texts/tool_call.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from glom import glom
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from ai.texts.contexts import get_openai_completion_contexts
+from ai.texts.openai_chat import openai_chat_completions
+from ai.utils import EMOJI_TEXT_BOT, literal_eval
+from config import GOOGLE_SEARCH_GL, PROXY, TOKEN, TZ
+from messages.progress import modify_progress
+from networking import hx_req
+from utils import nowdt
+
+# ruff: noqa: RUF001
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "web_search",
+            "description": "Search the web for current information on a topic",
+            "parameters": {
+                "type": "object",
+                "required": ["query"],
+                "properties": {
+                    "query": {"description": "The search query to look up", "type": "string"},
+                },
+            },
+        },
+    }
+]
+
+
+def remove_tool(tools: list[dict], tool_name: str) -> list:
+    """Remove tool from tool list.
+
+    Returns: list[dict]
+    """
+    return [tool for tool in tools if glom(tool, "function.name") != tool_name]
+
+
+async def web_search(query: str) -> list[dict]:
+    """Search the web for current information on a topic.
+
+    Args:
+        query (str): The search query to look up.
+
+    Returns:
+        list[dict]: A list of dictionaries containing the search results.
+    """
+    if not (TOKEN.GOOGLE_SEARCH_API_KEY and TOKEN.GOOGLE_SEARCH_CX):
+        return []
+    results = []
+    api = "https://www.googleapis.com/customsearch/v1"
+    params = {
+        "key": TOKEN.GOOGLE_SEARCH_API_KEY,
+        "cx": TOKEN.GOOGLE_SEARCH_CX,
+        "q": query,
+        "num": 10,
+        "safe": "off",
+        "gl": GOOGLE_SEARCH_GL,
+    }
+    response = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"], max_retry=0)
+    results = glom(response, "items", default=[]) or []
+
+    keep_keys = ["title", "link", "snippet", "mime"]
+    return [{k: v for k, v in x.items() if k in keep_keys} for x in results]
+
+
+async def get_tool_call_results(client: Client, message: Message, **kwargs) -> dict:
+    """Get OpenAI Tool Call Results.
+
+    Returns:
+        dict: {"texts": str, "thoughts": str, "prefix": str, "sent_messages": list[Message]}
+    """
+    contexts = await get_openai_completion_contexts(client, message)
+    if not contexts:
+        return {}
+
+    default_system_prompt = f"""You are a helpful assistant.
+Current date: {nowdt(TZ):%Y-%m-%d}
+
+# Tools
+
+## web_search
+
+Use the `web_search` tool to access up-to-date information from the web or when responding to the user requires information about their location. Some examples of when to use the `web_search` tool include:
+
+- Local Information: Use the `web_search` tool to respond to questions that require information about the user's location, such as the weather, local businesses, or events.
+- Freshness: If up-to-date information on a topic could potentially change or enhance the answer, call the `web_search` tool any time you would otherwise refuse to answer a question because your knowledge might be out of date.
+- Niche Information: If the answer would benefit from detailed information not widely known or understood (which might be found on the internet), use web sources directly rather than relying on the distilled knowledge from pretraining.
+- Accuracy: If the cost of a small mistake or outdated information is high (e.g., using an outdated version of a software library or not knowing the date of the next game for a sports team), then use the `web_search` tool.
+"""
+    system_prompt = kwargs.get("tool_call_system_prompt", default_system_prompt)
+    contexts.insert(0, {"role": "system", "content": system_prompt})
+    kwargs |= {
+        "openai_tools": TOOLS,
+        "openai_contexts": contexts,
+        "openai_system_prompt": system_prompt,
+    }
+    resp = await openai_chat_completions(client, message, **kwargs)
+    kwargs["progress"] = kwargs.get("progress", glom(resp, "sent_messages.0", default=None))
+    while resp.get("tool_name"):
+        tool_name = resp["tool_name"].strip()
+        tool_args = literal_eval(resp.get("tool_args", "{}"))
+        if tool_name == "web_search" and tool_args:
+            await modify_progress(text=f"{EMOJI_TEXT_BOT}**{kwargs['model_name']}** 开始搜索:\n{tool_args['query']}", force_update=True, **kwargs)
+            results = await web_search(**tool_args)
+            kwargs["openai_contexts"] = add_search_results(contexts, results)
+            kwargs["openai_tools"] = remove_tool(kwargs["openai_tools"], tool_name)
+        if not kwargs["openai_tools"]:
+            break
+        resp = await openai_chat_completions(client, message, **kwargs)
+    result_texts = glom(kwargs, "openai_contexts.0.content", default="").strip()
+    return {
+        "openai_system_prompt": result_texts.removeprefix(system_prompt).strip(),  # add tool results to system prompt
+        "openai_tools": None,  # disable tools after tool call
+        "progress": kwargs["progress"],
+    }
+
+
+def add_search_results(contexts: list[dict], search_results: list[dict]) -> list[dict]:
+    """Add search results to the context.
+
+    Args:
+        contexts (list[dict]): The context to add the search results to.
+        search_results (list[dict]): The search results to add to the context.
+
+    Returns:
+        list[dict]: The context with the search results added.
+    """
+    if not contexts or not search_results:
+        return contexts
+
+    search_msg = ""
+    for idx, result in enumerate(search_results):
+        search_msg += f"[webpage {idx + 1} begin] {result} [webpage {idx + 1} end]\n"
+
+    # modified from DeepSeek's official instructions: https://github.com/deepseek-ai/DeepSeek-R1/tree/ef99616
+    prompt = f"""
+# 以下内容是基于用户发送的消息的搜索结果:
+{search_msg}
+在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的,X代表每篇文章的数字索引。
+在回答时,请注意以下几点:
+- 今天是{nowdt(TZ):%Y-%m-%d}。
+- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
+- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内,并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项;如非必要,不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
+- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。
+- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
+- 请在适当的情况下在句子末尾引用上下文。请按照引用编号 [[X]](url) 的格式在答案中对应部分引用上下文。
+- 如果一句话源自多个上下文,请列出所有相关的引用编号,例如[[1]](url1) [[2]](url2),切记不要将引用集中在最后返回,而是在答案对应部分列出。
+- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。
+- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
+"""
+    if contexts[0]["role"] == "system":
+        contexts[0]["content"] += prompt
+    else:
+        contexts.insert(0, {"role": "system", "content": prompt})
+    return contexts
src/llm/summary.py → src/ai/chat_summary.py
@@ -12,9 +12,9 @@ from pyrogram.client import Client
 from pyrogram.types import Chat, Message
 from pyrogram.types.messages_and_media.message import Str
 
-from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ, cache
-from llm.gpt import gpt_response
-from llm.utils import BOT_TIPS, count_tokens
+from ai.main import ai_text_generation
+from ai.utils import BOT_TIPS
+from config import AI, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ, cache
 from messages.chat_history import get_history_info_list
 from messages.parser import parse_msg
 from messages.progress import modify_progress
@@ -22,7 +22,7 @@ from messages.sender import send2tg
 from messages.utils import equal_prefix, remove_prefix, startswith_prefix, to_int
 from networking import match_social_media_link
 from subtitles.subtitle import get_subtitle
-from utils import nowdt, rand_number, strings_list
+from utils import nowdt, rand_number
 
 HELP = f"""🤖**AI总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
 ⚠️使用`{PREFIX.AI_SUMMARY}`命令生成聊天记录文件 + 聊天记录AI总结
@@ -98,12 +98,11 @@ DAILY_SUMMARY_PREFIX = "🏪**#爬楼助手**\n"
 CONTEXT_FILENAME = "聊天记录.txt"
 
 
-async def ai_summary(
+async def ai_chat_summary(
     client: Client,
     message: Message,
     summary_prefix: str | None = None,
-    summary_model_id: str = GPT.CHAT_SUMMARY_MODEL_ID,
-    summary_model_name: str = GPT.CHAT_SUMMARY_MODEL_NAME,
+    summary_model_id: str = AI.CHAT_SUMMARY_MODEL_ALIAS,
     **kwargs,
 ):
     """GPT summary of the message history.
@@ -113,7 +112,6 @@ async def ai_summary(
         message (Message): The trigger message object.
         summary_prefix (str | None): Prefix string of the response message.
         summary_model_id (str, optional): The model id to use for AI summary.
-        summary_model_name (str, optional): The model name to use for AI summary.
     """
     # send docs if message == "/summary"
     if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]) and not message.reply_to_message:
@@ -138,7 +136,7 @@ async def ai_summary(
                 return
 
     info = parse_msg(message, silent=True)
-    need_summay = startswith_prefix(info["text"], prefix=[PREFIX.AI_SUMMARY])
+    need_summay = startswith_prefix(info["text"], prefix=PREFIX.AI_SUMMARY)
     # replace /combine with /summary, because we need to use `/summary` to match different patterns
     info["text"] = re.sub(r"^" + PREFIX.COMBINATION, PREFIX.AI_SUMMARY, info["text"], flags=re.IGNORECASE)
     num_history = MAX_MESSAGE_SUMMARY
@@ -176,18 +174,13 @@ async def ai_summary(
         return
     # set custom chat_id and message_id (useful for debug)
     if matched := re.search(r"cid=(-?\w+)", info["text"], re.IGNORECASE):
-        # check if cid is in whitelist
-        cid = to_int(matched.group(1))
-        if str(cid) in strings_list(GPT.SUMMARY_WHITELIST_CUSTOM_CHATS):
-            info["cid"] = to_int(matched.group(1))
-        else:
-            await send2tg(client, message, texts="该chatid不在白名单中, 无法对其进行总结", **kwargs)
-            return
+        info["cid"] = to_int(matched.group(1))
     if matched := re.search(r"mid=(\d+)", info["text"], re.IGNORECASE):
         offset_id = int(matched.group(1)) + 1  # include this message
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"📝正在获取历史消息...\n⏩开始时间: {begin_time:%m-%d %H:%M:%S}\n⏯️结束时间: {end_time:%m-%d %H:%M:%S}", **kwargs)
         kwargs["progress"] = res[0]
+    __import__("ipdb").set_trace(context=15, cond=True)
     history_list = await get_history_info_list(client, info["cid"], offset_id, num_history, begin_time, end_time, filter_users)
     # parse the history contexts
     parsed = await parse_history_list(history_list)
@@ -195,11 +188,9 @@ async def ai_summary(
         await send2tg(client, message, texts=f"{num_history}条历史消息中未找到符合条件的消息", **kwargs)
         await modify_progress(del_status=True, **kwargs)
         return
-    num_tokens = count_tokens(SYSTEM_PROMPT + parsed["history"])
     msg = f"⏩开始时间: {parsed['begin_time']:%m-%d %H:%M:%S}\n"
     msg += f"⏯️结束时间: {parsed['end_time']:%m-%d %H:%M:%S}\n"
     msg += f"🔢消息条数: {parsed['num_message']}\n"
-    msg += f"🔠Token数: {num_tokens}"
     # send contexts as txt file
     with io.BytesIO(parsed["txt_format"].encode("utf-8")) as f:
         await client.send_document(to_int(message.chat.id), f, file_name=CONTEXT_FILENAME, caption=msg)
@@ -207,24 +198,14 @@ async def ai_summary(
         await modify_progress(del_status=True, **kwargs)
         return
     await modify_progress(text=f"🤖AI总结中...\n{msg}", force_update=True, **kwargs)
-    # Construct a message to call GPT
-    ai_msg = Message(id=0, chat=message.chat, text=Str(f"/ai {parsed['history']}"))
-    response = await gpt_response(
-        client,
-        ai_msg,
-        custom_model_id=summary_model_id,
-        custom_model_name=summary_model_name,
-        system_prompt=SYSTEM_PROMPT,
-        enable_gpt_tools=False,
-        enable_gemini_tools=False,
-        include_thoughts=False,
-        append_grounding=False,
-        silent=True,
+    ai_msg = Message(  # Construct a message for AI
+        id=rand_number(),
+        chat=message.chat,
+        text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {SYSTEM_PROMPT} {parsed['history']}"),
     )
-    if texts := response.get("texts"):
-        if summary_prefix is None:
-            model_name = response.get("model_name", "AI总结")
-            summary_prefix = f"🤖**{model_name}**:\n"
+    ai_res = await ai_text_generation(client, ai_msg, silent=True)
+    if texts := ai_res.get("texts"):
+        summary_prefix = summary_prefix or f"🤖**{ai_res['model_name']}**:\n"
         kwargs["reply_msg_id"] = -1  # DO NOT send as a reply message
         await send2tg(client, message, texts=f"{summary_prefix}⏩开始时间: {begin_time:%m-%d %H:%M:%S}\n⏯️结束时间: {end_time:%m-%d %H:%M:%S}\n{texts}", **kwargs)
         await modify_progress(del_status=True, **kwargs)
@@ -263,7 +244,7 @@ async def parse_history_list(info_list: list[dict]) -> dict:
     history = json.dumps(messages, ensure_ascii=False)
     """IMPORTANT: We need to remove `BOT_TIPS` in the history!
 
-    Because we need to call `gpt_response` function,
+    Because we need to call `ai_text_generation` function,
     it uses `BOT_TIPS` to check if the message is from GPT model.
 
     If the history contains `BOT_TIPS`, the context of this message will be `model` (not `user`)
@@ -345,12 +326,6 @@ async def daily_summary(client: Client):
         message = Message(
             id=rand_number(),
             chat=Chat(id=target_chat_id),
-            text=f"/summary #{duration}h cid={to_int(source_chat_id)}",  # type: ignore
-        )
-        await ai_summary(
-            client,
-            message,
-            summary_prefix=DAILY_SUMMARY_PREFIX,
-            target_chat=to_int(target_chat_id),
-            reply_msg_id=-1,
+            text=f"{PREFIX.AI_SUMMARY} #{duration}h cid={to_int(source_chat_id)}",  # type: ignore
         )
+        await ai_chat_summary(client, message, summary_prefix=DAILY_SUMMARY_PREFIX, target_chat=to_int(target_chat_id), reply_msg_id=-1)
src/ai/main.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from ai.images.models import get_image_model_configs
+from ai.images.openai_img import openai_image_generation
+from ai.images.post import http_post_image_generation
+from ai.texts.gemini import gemini_chat_completion
+from ai.texts.models import get_config_by_model_id, get_text_model_config
+from ai.texts.openai_chat import openai_chat_completions
+from ai.texts.openai_response import openai_responses_api
+from ai.texts.tool_call import get_tool_call_results
+from ai.utils import img_generation_docs
+from config import AI, PREFIX
+from messages.sender import send2tg
+from messages.utils import startswith_prefix
+
+
+async def ai_text_generation(client: Client, message: Message, *, silent: bool = False, **kwargs) -> dict:
+    texts = str(message.content).strip()
+    this_msg = message
+    prompt = texts.removeprefix(PREFIX.AI_TEXT_GENERATION).strip()
+    prompt = re.sub(r"^@([a-zA-Z0-9_\-\.]+)(\s+)?", "", prompt, flags=re.DOTALL).strip()
+    if not prompt and message.reply_to_message:
+        message = this_msg.reply_to_message
+    model_config = await get_text_model_config(this_msg)
+    if not model_config.get("model_id"):
+        return {}
+    silent = silent or model_config.get("silent", False)
+    params: dict = {"api_type": AI.TEXT_DEFAULT_PROVIDER} | model_config | kwargs | {"silent": silent}
+    if params["api_type"] == "gemini":
+        return await gemini_chat_completion(client, message, **params)
+    if params["api_type"] == "openai_responses":
+        return await openai_responses_api(client, message, **params)
+    if params["api_type"] == "openai_chat":
+        if params.get("openai_enable_tool_call", True):
+            tool_config = await get_config_by_model_id("tool_call_model", fallback_to_default=False)
+            if tool_config:
+                tool_params = params | tool_config
+                tool_results = await get_tool_call_results(client, message, **tool_params)
+                params |= tool_results
+        return await openai_chat_completions(client, message, **params)
+    return {}
+
+
+async def ai_image_generation(client: Client, message: Message, **kwargs) -> dict:
+    if not startswith_prefix(message.content, PREFIX.AI_IMG_GENERATION):
+        return {}
+    texts = str(message.content).strip()
+    this_msg = message
+    prompt = texts.removeprefix(PREFIX.AI_IMG_GENERATION).strip()
+    prompt = re.sub(r"^@([a-zA-Z0-9_\-\.]+)(\s+)?", "", prompt, flags=re.DOTALL).strip()
+    if not prompt:
+        if not message.reply_to_message:
+            await send2tg(client, message, texts=await img_generation_docs(), **kwargs)
+            return {}
+        message = this_msg.reply_to_message
+    model_configs = await get_image_model_configs(this_msg)
+    if not model_configs:
+        return {}
+    for model_config in model_configs:
+        if model_config["api_type"] == "openai":
+            return await openai_image_generation(client, message, **model_config)
+        if model_config["api_type"] == "post":
+            return await http_post_image_generation(client, message, **model_config)
+    return {}
src/ai/utils.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import ast
+import contextlib
+import json
+import re
+from datetime import datetime
+
+from google import genai
+from google.genai.types import HttpOptions
+from loguru import logger
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
+
+from config import AI, PREFIX, PROXY
+from database.kv import get_cf_kv
+from utils import nowdt, remove_consecutive_newlines, remove_dash, remove_pound, strings_list, zhcn
+
+EMOJI_TEXT_BOT = "🤖"
+EMOJI_IMG_BOT = "🌠"
+EMOJI_REASONING_BEGIN = "🤔"  # use emoji to separate model reasoning and content
+EMOJI_REASONING_END = "💡"
+BOT_TIPS = "(回复以继续)"  # noqa: RUF001
+
+
+async def text_generation_docs() -> str:
+    kv = await get_cf_kv(AI.TEXT_MODEL_CONFIG_KEY, cache_ttl=600, silent=True)
+    return kv.get("docs", f"🤖**AI对话**: `{PREFIX.AI_TEXT_GENERATION}` + 提示词")
+
+
+async def img_generation_docs() -> str:
+    kv = await get_cf_kv(AI.IMG_MODEL_CONFIG_KEY, cache_ttl=600, silent=True)
+    return kv.get("docs", f"🌠AI生图: `{PREFIX.AI_IMG_GENERATION}` + 提示词")
+
+
+def literal_eval(string: str | dict) -> dict:
+    if isinstance(string, dict):
+        return string
+    with contextlib.suppress(Exception):
+        string = re.sub(r"\btrue\b", "True", string)
+        string = re.sub(r"\bfalse\b", "False", string)
+        string = re.sub(r"\bnull\b", "None", string)
+        return ast.literal_eval(string)
+    return {}
+
+
+def trim_none(obj: dict) -> dict:
+    if isinstance(obj, dict):
+        return {k: trim_none(v) for k, v in obj.items() if v is not None}
+    if isinstance(obj, list):
+        return [trim_none(item) for item in obj if item is not None]
+    return obj
+
+
+def prettify(data: dict) -> str:
+    with contextlib.suppress(Exception):
+        data = trim_none(data)
+        return json.dumps(data, ensure_ascii=False, indent=2)
+    return str(data)
+
+
+def clean_cmd_prefix(text: str) -> str:
+    for prefix in [PREFIX.AI_TEXT_GENERATION, PREFIX.AI_IMG_GENERATION]:
+        text = text.removeprefix(prefix).lstrip()
+    return re.sub(r"^@([a-zA-Z0-9_\-\.]+)(\s+)?", "", text, flags=re.DOTALL).strip()
+
+
+def clean_bot_tips(text: str) -> str:
+    return re.sub(rf"^{EMOJI_TEXT_BOT}(.*?){BOT_TIPS}", "", text, flags=re.DOTALL).strip()
+
+
+def clean_reasoning(text: str) -> str:
+    text = re.sub(rf"{EMOJI_REASONING_BEGIN}(.*?){EMOJI_REASONING_END}", "", text.strip(), flags=re.DOTALL).strip()
+    text = text.removeprefix(BLOCKQUOTE_EXPANDABLE_DELIM).lstrip()
+    return text.removeprefix(BLOCKQUOTE_EXPANDABLE_END_DELIM).lstrip()
+
+
+def clean_context(text: str) -> str:
+    """Remove bot prefix and reasoning content."""
+    if not text:
+        return ""
+    text = re.sub(r"^👤@.*?\/\/", "", text)  # remove markdown send_from_user
+    text = re.sub(r"^👤\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", text)  # remove html send_from_user
+    text = clean_cmd_prefix(text)
+    text = clean_bot_tips(text)
+    return clean_reasoning(text)
+
+
+def clean_source_marks(text: str) -> str:
+    """Remove [username], [message], ... marks.
+
+    Should align with the tags in `contexts.py`
+    """
+    if not text:
+        return text
+    clean_text = ""
+    for line in text.split("\n"):
+        if line.strip().startswith(("[username]:", "[filename]:", "[fileowner]:")):
+            continue
+        if line.strip() in ["[message]:", "[file content]:"]:
+            continue
+        clean_text += line + "\n"
+    return clean_text.removesuffix("\n")  # remove the last newline
+
+
+def split_reasoning(text: str) -> tuple[str, str]:
+    """Split reasoning from text.
+
+    Args:
+        text: LLM response
+    Returns:
+        (reasoning, content)
+    """
+    text = clean_cmd_prefix(text)
+    text = clean_bot_tips(text)
+    content = clean_reasoning(text)
+    reasoning = ""
+    if matched := re.search(rf"{EMOJI_REASONING_BEGIN}(.*?){EMOJI_REASONING_END}", text, flags=re.DOTALL):
+        reasoning = EMOJI_REASONING_BEGIN + matched.group(1) + EMOJI_REASONING_END
+    return reasoning.strip(), content.strip()
+
+
+def beautify_llm_response(text: str, newline_level: int = 3) -> str:
+    """Beautify LLM response.
+
+    Args:
+        text: LLM response
+    Returns:
+        beautified LLM response
+    """
+    if not text:
+        return text
+    clean_text = clean_source_marks(text)
+    clean_text = remove_pound(clean_text)
+    clean_text = remove_dash(clean_text)
+    clean_text = zhcn(clean_text)
+    return remove_consecutive_newlines(clean_text, newline_level)
+
+
+async def clean_gemini_files():
+    """Clean Gemini files.
+
+    Gemini allows only 20 GB of data.
+    """
+    if AI.GEMINI_FILES_TTL >= 48 * 3600:
+        return
+    now = nowdt()
+    for api_key in strings_list(AI.GEMINI_API_KEYS):
+        app = genai.Client(api_key=api_key, http_options=HttpOptions(async_client_args={"proxy": PROXY.GOOGLE}))
+        for f in await app.aio.files.list():
+            if isinstance(f.update_time, datetime) and isinstance(f.name, str):
+                delta = now - f.update_time
+                if delta.total_seconds() > AI.GEMINI_FILES_TTL:
+                    logger.debug(f"Delete Gemini file: {f.name}")
+                    await app.aio.files.delete(name=f.name)
src/asr/cloudflare.py
@@ -12,7 +12,7 @@ from loguru import logger
 
 from asr.groq import merge_transcripts
 from asr.utils import audio_chunk_to_bytes, convert_single_channel, downsampe_audio, get_file_bytes, load_audio
-from config import ASR
+from config import ASR, PROXY
 from networking import hx_req
 from utils import seconds_to_time, strings_list, zhcn
 
@@ -80,7 +80,7 @@ async def cloudflare_single_file(
                 "POST",
                 headers=headers,
                 json_data=payload,
-                proxy=ASR.CLOUDFLARE_PROXY,
+                proxy=PROXY.CLOUDFLARE,
                 check_kv={"success": True},
                 check_keys=["result"],
             )
src/asr/gemini.py
@@ -7,15 +7,14 @@ from pathlib import Path
 
 from glom import glom
 from google import genai
-from google.genai.types import File, GenerateContentConfig, HttpOptions, ThinkingConfig, UploadFileConfig
+from google.genai.types import File, GenerateContentConfig, HttpOptions, UploadFileConfig
 from loguru import logger
 from pydantic import BaseModel, Field
-from pyrogram.types import Message
 
+from ai.utils import literal_eval
 from asr.groq import merge_transcripts
 from asr.utils import GEMINI_AUDIO_EXT, audio_chunk_to_path, audio_duration, convert_single_channel, downsampe_audio, load_audio
-from config import ASR, DOWNLOAD_DIR, GEMINI
-from llm.hooks import hook_gemini_httpoptions
+from config import AI, ASR, DOWNLOAD_DIR, PROXY
 from utils import guess_mime, rand_string, seconds_to_time, strings_list, zhcn
 
 
@@ -25,14 +24,7 @@ class Transcription(BaseModel):
     end: int = Field(description="end time in seconds of the sentence in the audio")
 
 
-async def gemini_asr(
-    message: Message,
-    path: str | Path,
-    model_id: str = "",
-    prompt: str = "请转录这段音频",
-    *,
-    delete_gemini_file: bool = True,
-) -> dict:
+async def gemini_asr(path: str | Path, prompt: str = "请转录这段音频", *, delete_gemini_file: bool = True) -> dict:
     """Gemini stream ASR.
 
     https://ai.google.dev/gemini-api/docs/audio
@@ -47,14 +39,12 @@ async def gemini_asr(
     audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
     duration = audio_duration(audio_path)
     if duration < ASR.GEMINI_CHUNK_SECONDS:
-        return await gemini_single_file(message, audio_path, model_id=model_id, prompt=prompt, delete_gemini_file=delete_gemini_file)
-    return await gemini_file_chunks(message, audio_path, model_id=model_id, prompt=prompt, delete_gemini_file=delete_gemini_file)
+        return await gemini_single_file(audio_path, prompt=prompt, delete_gemini_file=delete_gemini_file)
+    return await gemini_file_chunks(audio_path, prompt=prompt, delete_gemini_file=delete_gemini_file)
 
 
 async def gemini_single_file(
-    message: Message,
     path: str | Path,
-    model_id: str = "",
     prompt: str = "",
     *,
     start_seconds: int = 0,
@@ -72,25 +62,25 @@ async def gemini_single_file(
     if not path.is_file():
         return {"texts": "", "raw_texts": "", "segments": [], "error": "File not found."}
     res = {}
-    if not model_id:
-        model_id = GEMINI.ASR_MODEL
-    for api_key in strings_list(GEMINI.API_KEY, shuffle=True):
-        logger.debug(f"ASR via {model_id}: {path.as_posix()} , proxy={GEMINI.PROXY}")
-        http_options = HttpOptions(base_url=GEMINI.BASE_URL, async_client_args={"proxy": GEMINI.PROXY})
-        http_options = hook_gemini_httpoptions(http_options, message)
-        app = genai.Client(api_key=api_key, http_options=http_options)
+    for api_key in strings_list(AI.GEMINI_API_KEYS, shuffle=True):
+        logger.debug(f"ASR via {ASR.GEMINI_MODEL}: {path.as_posix()} , proxy={PROXY.GOOGLE}")
+        app = genai.Client(
+            api_key=api_key,
+            http_options=HttpOptions(
+                base_url=AI.GEMINI_BASE_URL,
+                headers=literal_eval(AI.GEMINI_DEFAULT_HEADERS),
+                async_client_args={"proxy": PROXY.GOOGLE},
+            ),
+        )
         uploaded_audio = File()
         try:
             uploaded_audio = await app.aio.files.upload(file=path, config=UploadFileConfig(mime_type=guess_mime(path)))
             genconfig = {}
-            with contextlib.suppress(Exception):
-                genconfig = json.loads(GEMINI.ASR_CONFIG)
+            if ASR.GEMINI_CONFIG:
+                genconfig |= literal_eval(ASR.GEMINI_CONFIG)
             genconfig |= {"response_mime_type": "application/json", "response_schema": list[Transcription]}
-            if GEMINI.ASR_THINKING_BUDGET is not None:
-                thinking_budget = min(round(float(GEMINI.ASR_THINKING_BUDGET)), GEMINI.MAX_THINKING_BUDGET)
-                genconfig |= {"thinking_config": ThinkingConfig(include_thoughts=False, thinking_budget=thinking_budget)}
             contents = [prompt, uploaded_audio] if prompt else [uploaded_audio]
-            params = {"model": model_id, "contents": contents, "config": GenerateContentConfig(**genconfig)}
+            params = {"model": ASR.GEMINI_MODEL, "contents": contents, "config": GenerateContentConfig(**genconfig)}
             answers = ""  # all model responses
             async for chunk in await app.aio.models.generate_content_stream(**params):
                 text = glom(chunk.model_dump(), "candidates.0.content.parts.0.text", default="") or ""
@@ -128,11 +118,9 @@ async def gemini_single_file(
 
 
 async def gemini_file_chunks(
-    message: Message,
     path: str | Path,
     chunk_seconds: float = 600,
     overlap_seconds: float = ASR.GEMINI_OVERLAP_SECONDS,
-    model_id: str = "",
     prompt: str = "",
     *,
     delete_gemini_file: bool = True,
@@ -173,9 +161,7 @@ async def gemini_file_chunks(
         await asyncio.gather(*tasks)  # convert chunks to paths
         tasks = [
             gemini_single_file(
-                message,
                 audio_path,
-                model_id=model_id,
                 prompt=prompt,
                 start_seconds=offset,
                 delete_local_file=False,
src/asr/groq.py
@@ -10,7 +10,7 @@ from glom import glom
 from loguru import logger
 
 from asr.utils import audio_chunk_to_bytes, convert_single_channel, downsampe_audio, get_file_bytes, load_audio
-from config import ASR
+from config import ASR, PROXY
 from networking import hx_req
 from utils import guess_mime, seconds_to_time, strings_list, zhcn
 
@@ -72,7 +72,7 @@ async def groq_single_file(
         files={"file": (file_name, io.BytesIO(audio_bytes), mime)},
         data=data,
         timeout=600,
-        proxy=ASR.GROQ_PROXY,
+        proxy=PROXY.GROQ,
         check_kv={"task": "transcribe"},
         check_keys=["segments"],
     )
src/asr/tecent.py
@@ -15,7 +15,7 @@ from loguru import logger
 
 from asr.groq import merge_transcripts
 from asr.utils import audio_chunk_to_bytes, audio_duration, convert_single_channel, downsampe_audio, get_file_bytes, is_english_word, load_audio
-from config import ASR, FILE_SERVER
+from config import ASR, FILE_SERVER, PROXY
 from database.alist import delete_alist, upload_alist
 from database.uguu import upload_uguu
 from networking import hx_req
@@ -137,7 +137,7 @@ async def tencent_single_asr(path_or_bytes: Path | bytes, language: str, *, offs
         headers=headers,
         content_data=payload.encode("utf-8"),
         timeout=60,
-        proxy=ASR.TENCENT_PROXY,
+        proxy=PROXY.TENCENT,
         check_keys=["Response.WordList"],
     )
     if res.get("hx_error"):
@@ -267,7 +267,7 @@ async def tencent_flash_asr(path: str | Path, engine: str, voice_format: str) ->
             headers=headers,
             content_data=await f.read(),
             timeout=60,
-            proxy=ASR.TENCENT_PROXY,
+            proxy=PROXY.TENCENT,
             check_kv={"code": 0},
             check_keys=["flash_result.0.sentence_list.0.word_list"],
         )
@@ -308,7 +308,7 @@ async def tencent_async_asr(path: str | Path, engine: str) -> dict:
         headers=headers,
         content_data=payload.encode("utf-8"),
         timeout=600,
-        proxy=ASR.TENCENT_PROXY,
+        proxy=PROXY.TENCENT,
         check_keys=["Response.Data.TaskId"],
     )
     if resp.get("hx_error"):
@@ -332,7 +332,7 @@ async def tencent_query_asr(task_id: int, file_name: str, query_times: int = 0)
         headers=headers,
         content_data=payload.encode("utf-8"),
         timeout=600,
-        proxy=ASR.TENCENT_PROXY,
+        proxy=PROXY.TENCENT,
         check_keys=["Response.Data.StatusStr"],
     )
     if result.get("hx_error"):
src/asr/utils.py
@@ -15,7 +15,7 @@ from loguru import logger
 from numpy import ndarray
 from soundfile import LibsndfileError
 
-from config import ASR, GEMINI
+from config import AI, ASR
 from multimedia import convert_to_audio
 from utils import strings_list
 
@@ -39,7 +39,7 @@ def auto_choose_asr_engine(duration: float, engine: str) -> str:
             enabled_engines.append("tencent")
         if all([ASR.CLOUDFLARE_MODEL, ASR.CLOUDFLARE_KEYS, ASR.CLOUDFLARE_CHUNK_SECONDS]):
             enabled_engines.append("cloudflare")
-        if all([GEMINI.ASR_MODEL, GEMINI.API_KEY, GEMINI.BASE_URL, ASR.GEMINI_CHUNK_SECONDS]):
+        if all([ASR.GEMINI_MODEL, AI.GEMINI_API_KEYS, AI.GEMINI_BASE_URL, ASR.GEMINI_CHUNK_SECONDS]):
             enabled_engines.append("gemini")
         if all([ASR.GROQ_MODELS, ASR.GROQ_KEYS, ASR.GROQ_MAX_BYTES, ASR.GROQ_CHUNK_SECONDS]):
             enabled_engines.append("groq")
src/asr/voice_recognition.py
@@ -7,7 +7,7 @@ from glom import glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.enums import ParseMode
-from pyrogram.types import Chat, Message
+from pyrogram.types import Message
 
 from asr.ali import ali_asr
 from asr.cloudflare import cloudflare_asr
@@ -188,6 +188,7 @@ async def voice_to_text(
 async def asr_file(
     path: str | Path,
     engine: str = "",
+    asr_prompt: str = "请转录这段音频",
     *,
     tencent_language: str = "16k_zh-PY",
     delete_local_file: bool = True,
@@ -213,13 +214,7 @@ async def asr_file(
         elif engine == "deepgram":
             res = await deepgram_asr(path)
         elif engine == "gemini":
-            res = await gemini_asr(
-                message=kwargs.get("message", Message(id=0, chat=Chat(id=0))),
-                path=path,
-                model_id=kwargs.get("gemini_asr_model_id", ""),
-                prompt=kwargs.get("gemini_asr_prompt", ""),
-                delete_gemini_file=delete_gemini_file,
-            )
+            res = await gemini_asr(path=path, prompt=asr_prompt, delete_gemini_file=delete_gemini_file)
         elif engine == "cloudflare":
             res = await cloudflare_asr(path, duration, model=kwargs.get("cf_asr_model"), prompt=kwargs.get("cf_asr_prompt"))
         elif engine == "groq":
src/danmu/entrypoint.py
@@ -14,14 +14,13 @@ from danmu.r2 import query_r2
 from danmu.server import query_server
 from danmu.turso import query_turso
 from danmu.utils import file_bytes, merge_txt_files, to_usd
-from llm.utils import convert_html
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
 from messages.utils import blockquote, equal_prefix, smart_split, startswith_prefix
 from others.emoji import CURRENCY
 from publish import publish_telegraph
-from utils import nowdt, number
+from utils import convert_html, nowdt, number
 
 HELP = f"""📖**查询直播合订本**
 `{PREFIX.DANMU}` 使用说明:
src/danmu/server.py
@@ -8,7 +8,7 @@ from zoneinfo import ZoneInfo
 import anyio
 from loguru import logger
 
-from config import DANMU, DOWNLOAD_DIR, TZ
+from config import DANMU, DOWNLOAD_DIR, PROXY, TZ
 from danmu.utils import get_bearer_token, live_date
 from messages.progress import modify_progress
 from networking import hx_req
@@ -37,7 +37,7 @@ async def query_server(dates: list[str], user: str, keyword: str, caption: str,
             payload |= {"message": keyword} if qtype == "弹幕" else {"content": keyword}
         payload["page"] = 1
         logger.debug(f"Query {qtype}: {payload}")
-        resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=DANMU.PROXY, check_kv={"code": 0}, silent=True)
+        resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=PROXY.DANMU, check_kv={"code": 0}, silent=True)
         queried_dates.append(date)
         parsed = await parse_from_server(resp.get("data", []), user, keyword, super_chats, qtype)
         count = parsed.get("count", 0)
@@ -49,7 +49,7 @@ async def query_server(dates: list[str], user: str, keyword: str, caption: str,
         while len(resp.get("data", [])) == payload["limit"] and parsed.get("count", 0):
             payload["page"] += 1
             logger.debug(f"Query {qtype}: {payload}")
-            resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=DANMU.PROXY, check_kv={"code": 0}, silent=True)
+            resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=PROXY.DANMU, check_kv={"code": 0}, silent=True)
             parsed = await parse_from_server(resp.get("data", []), user, keyword, super_chats, qtype)
             total_count += parsed.get("count", 0)
             texts += parsed.get("texts", "")
src/danmu/sync.py
@@ -8,7 +8,7 @@ from zoneinfo import ZoneInfo
 from glom import flatten, glom
 from loguru import logger
 
-from config import DANMU, TZ, cutter
+from config import DANMU, PROXY, TZ, cutter
 from danmu.utils import TURSO_KWARGS, get_bearer_token, merge_json, simplify_json
 from database.r2 import get_cf_r2, list_cf_r2, set_cf_r2
 from database.turso import insert_statement, turso_create_table, turso_exec, turso_parse_resp
@@ -62,7 +62,7 @@ async def sync_server_to_turso(qtype: str) -> None:
         headers = {"Authorization": f"Bearer {await get_bearer_token()}", "X-schema": year}
         params = {"liveDate": year} if qtype == "弹幕" else {"srtCount": 1, "liveDate": year}
         api = f"{DANMU.BASE_URL}/liveInfo/queryListBySelector"
-        liveinfo_list: list[dict] = await hx_req(api, headers=headers, proxy=DANMU.PROXY, params=params, silent=True)  # type: ignore
+        liveinfo_list: list[dict] = await hx_req(api, headers=headers, proxy=PROXY.DANMU, params=params, silent=True)  # type: ignore
         if glom(liveinfo_list, "hx_error", default=""):  # API server is down
             return
         for liveinfo in sorted(liveinfo_list, key=lambda x: x["liveDate"]):
@@ -71,7 +71,7 @@ async def sync_server_to_turso(qtype: str) -> None:
                 continue
             results = []
             payload = {"page": 1, "limit": DANMU.NUM_PER_QUERY, "liveDate": live_date}
-            resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=DANMU.PROXY, check_kv={"code": 0}, silent=True)
+            resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=PROXY.DANMU, check_kv={"code": 0}, silent=True)
             if resp.get("count", 0) == 0:
                 continue
             logger.trace(f"Query {qtype} date: {live_date} - {resp['count']} results")
@@ -85,7 +85,7 @@ async def sync_server_to_turso(qtype: str) -> None:
                     "POST",
                     headers=headers,
                     data={"page": page, "limit": DANMU.NUM_PER_QUERY, "liveDate": live_date},
-                    proxy=DANMU.PROXY,
+                    proxy=PROXY.DANMU,
                     check_kv={"code": 0},
                     silent=True,
                 )
@@ -214,7 +214,7 @@ async def sync_server_to_r2(qtype: str) -> None:
                 logger.trace(f"Query {qtype} date: {date}")
                 headers = {"Authorization": f"Bearer {await get_bearer_token()}", "X-schema": date[:4]}
                 payload = {"page": 1, "limit": DANMU.NUM_PER_QUERY, "liveDate": date}
-                resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=DANMU.PROXY, check_kv={"code": 0}, silent=True)
+                resp = await hx_req(api_url, "POST", headers=headers, data=payload, proxy=PROXY.DANMU, check_kv={"code": 0}, silent=True)
                 if resp.get("count", 0) == 0:
                     continue
                 logger.trace(f"Query {qtype} date: {date} - {resp['count']} results")
@@ -228,7 +228,7 @@ async def sync_server_to_r2(qtype: str) -> None:
                         "POST",
                         headers=headers,
                         data={"page": page, "limit": DANMU.NUM_PER_QUERY, "liveDate": date},
-                        proxy=DANMU.PROXY,
+                        proxy=PROXY.DANMU,
                         check_kv={"code": 0},
                         silent=True,
                     )
@@ -259,7 +259,7 @@ async def sync_server_to_r2(qtype: str) -> None:
         headers = {"Authorization": f"Bearer {await get_bearer_token()}", "X-schema": year}
         params = {"liveDate": year} if qtype == "弹幕" else {"srtCount": 1, "liveDate": year}
         api = f"{DANMU.BASE_URL}/liveInfo/queryListBySelector"
-        resp = await hx_req(api, headers=headers, proxy=DANMU.PROXY, params=params, silent=True)
+        resp = await hx_req(api, headers=headers, proxy=PROXY.DANMU, params=params, silent=True)
         new_dates_map = defaultdict(list)
         # some live dates on server are "2025-04-14_01", "2025-04-14_02", ...
         # norm dates to YYYY-MM-DD
src/danmu/utils.py
@@ -193,7 +193,7 @@ async def get_bearer_token() -> str:
         api_url,
         "POST",
         data={"userName": DANMU.AUTH_USER, "password": DANMU.AUTH_PASS},
-        proxy=DANMU.PROXY,
+        proxy=PROXY.DANMU,
         check_kv={"code": 200},
         silent=True,
     )
@@ -205,7 +205,7 @@ async def get_live_info(year: str | int) -> dict:
     params = {"liveDate": year}
     api = f"{DANMU.BASE_URL}/liveInfo/queryListBySelector"
     headers = {"Authorization": f"Bearer {await get_bearer_token()}", "X-schema": year}
-    resp: list[dict] = await hx_req(api, headers=headers, proxy=DANMU.PROXY, params=params, silent=True)  # type: ignore
+    resp: list[dict] = await hx_req(api, headers=headers, proxy=PROXY.DANMU, params=params, silent=True)  # type: ignore
     if glom(resp, "hx_error", default=""):  # API server is down
         return {}
     dates = {x["liveDate"][:10] for x in resp}
src/database/kv.py
@@ -5,7 +5,7 @@ from urllib.parse import quote_plus, unquote_plus
 from httpx import AsyncClient, AsyncHTTPTransport
 from loguru import logger
 
-from config import DB
+from config import DB, cache
 from networking import hx_req
 
 
@@ -17,11 +17,14 @@ async def get_cf_kv(
     *,
     enabled: bool = DB.CF_KV_ENABLED,
     silent: bool = False,
+    cache_ttl: int = 0,
 ) -> dict:
     """Get from Cloudflare KV."""
     if not all([enabled, account_id, namespace_id, api_token]):
         return {}
     key = quote_plus(unquote_plus(key))
+    if kv := cache.get(f"CFKV-{key}"):
+        return kv
     api = f"https://api.cloudflare.com/client/v4/accounts/{account_id}/storage/kv/namespaces/{namespace_id}/values/{key}"
     headers = {"authorization": f"Bearer {api_token}", "content-type": "application/json"}
     async with AsyncClient(http2=True, follow_redirects=True, transport=AsyncHTTPTransport(retries=3, http2=True)) as hx:
@@ -34,6 +37,8 @@ async def get_cf_kv(
             if data := resp.json():
                 if not silent:
                     logger.success(f"GET CF-KV for {key}: {data}")
+                if cache_ttl:
+                    cache.set(f"CFKV-{key}", data, ttl=cache_ttl)
                 return data
         except Exception as e:
             logger.warning(f"GET CF-KV failed for {key}: {e}")
src/database/r2.py
@@ -191,7 +191,7 @@ async def del_cf_r2(
             logger.warning(f"DEL CF-R2 failed for key={key}: {e}")
 
 
-async def key_exist_cf_r2(
+async def head_cf_r2(
     key: str,
     bucket_name: str = DB.CF_R2_BUCKET_NAME,
     account_id: str = DB.CF_ACCOUNT_ID,
@@ -200,10 +200,10 @@ async def key_exist_cf_r2(
     *,
     enabled: bool = DB.CF_R2_ENABLED,
     silent: bool = False,
-) -> bool:
-    """Check if key exists in Cloudflare R2."""
+) -> dict:
+    """Head Cloudflare R2."""
     if not all([enabled, bucket_name, account_id, aws_access_key_id, aws_secret_access_key]):
-        return False
+        return {}
     async with Session().client(
         service_name="s3",
         endpoint_url=f"https://{account_id}.r2.cloudflarestorage.com",
@@ -212,8 +212,8 @@ async def key_exist_cf_r2(
         region_name="auto",
     ) as s3:  # type: ignore
         try:
-            await s3.head_object(Bucket=bucket_name, Key=key)
+            return await s3.head_object(Bucket=bucket_name, Key=key)
         except Exception:
             if not silent:
                 logger.warning(f"`{key}` is not exist in CF-R2")
-    return False
+    return {}
src/history/query.py
@@ -15,14 +15,13 @@ from database.turso import turso_exec, turso_parse_resp
 from history.d1 import get_d1_chatinfo, save_chatinfo_to_d1
 from history.turso import get_turso_chatinfo, save_chatinfo_to_turso
 from history.utils import TURSO_KWARGS, filter_response, get_chat, get_user_from_chat, is_admin, keyword_query, list_chat_ids
-from llm.utils import convert_html
 from messages.parser import parse_chat, parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
 from messages.utils import blockquote, equal_prefix, smart_split, startswith_prefix
 from others.emoji import MTYPE_EMOJI
 from publish import publish_telegraph
-from utils import myself, nowstr, slim_cid, strings_list, to_int
+from utils import convert_html, myself, nowstr, slim_cid, strings_list, to_int
 
 HELP = f"""🗣**查询当前对话聊天记录**
 `/hist` 使用说明:
src/llm/ali/text2img.py
@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import asyncio
-import json
-from pathlib import Path
-from random import randint
-
-from glom import glom
-from httpx import AsyncClient
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import TEXT2IMG
-from llm.utils import parse_as_dict
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from networking import download_file, hx_req
-from utils import strings_list
-
-
-async def ali_text2img(client: Client, message: Message, model_id: str, prompt: str, *, silent: bool = False, **kwargs) -> dict:
-    """Ali text to image.
-
-    https://help.aliyun.com/zh/model-studio/flux-api-reference
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        prompt (str): Prompt. Defaults to None.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-
-    Return:
-        {"error": str}
-    """
-    if not prompt:
-        if message.reply_to_message:
-            prompt = message.reply_to_message.content
-        else:
-            await message.reply(text="请输入图片描述。", quote=True)
-            return {}
-    model_name = model_id.split("/")[-1].title()
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=f"🌠**{model_name}**:\n{prompt}", **kwargs))[0]
-    error = ""
-    succ = False
-    parsed = parse_as_dict(prompt, need_prefix="config=")
-    payload = {
-        "model": model_id,
-        "input": {"prompt": glom(parsed, "input.prompt", default=prompt)},
-        "parameters": {
-            "size": glom(parsed, "parameters.size", default="1024*1024"),
-            "steps": glom(parsed, "parameters.steps", default=50),
-            "seed": glom(parsed, "parameters.seed", default=randint(0, 2147483647)),
-        },
-    }
-    if "stable-diffusion" in model_id:
-        payload |= {"parameters": {"n": glom(parsed, "parameters.n", default=4)}}
-    for api_key in strings_list(TEXT2IMG.ALI_API_KEY, shuffle=True):
-        headers = {
-            "X-DashScope-Async": "enable",
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json",
-        }
-        api_url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis"
-        httpx_client = AsyncClient(proxy=TEXT2IMG.ALI_PROXY, headers=headers, timeout=10)
-        try:
-            response = await httpx_client.post(api_url, json=payload)
-            resp = response.json()
-            if resp.get("message"):
-                error = resp["message"]
-                logger.error(error)
-                continue
-            finished = await wait_for_response(resp["output"]["task_id"], api_key)
-            if images := finished.get("images"):
-                media = [{"photo": img} for img in images]
-                await send2tg(client, message, texts=json.dumps(payload, ensure_ascii=False, indent=2), media=media, **kwargs)
-                succ = True
-                break
-            if finished.get("error"):
-                error = finished["error"]
-                logger.error(error)
-                continue
-        except Exception as e:
-            logger.error(e)
-    if error and not succ:
-        await modify_progress(text=f"❌{error}", force_update=True, **kwargs)
-    else:
-        await modify_progress(del_status=True, **kwargs)
-    return {"error": error} if error else {}
-
-
-async def wait_for_response(task_id: str, api_key: str) -> dict:
-    api = f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}"
-    headers = {"Authorization": f"Bearer {api_key}"}
-    resp = await hx_req(api, headers=headers, silent=True, proxy=TEXT2IMG.ALI_PROXY, check_keys=["output.task_status"])
-    task_status = resp["output"]["task_status"]
-    if task_status == "FAILED":
-        error = glom(resp, "output.message", default="")
-        return {"error": error}
-    if task_status == "SUCCESS":
-        resp = await hx_req(api, headers=headers, silent=True, proxy=TEXT2IMG.ALI_PROXY, check_keys=["output.result"])
-        return resp["output"]["result"]
-    while task_status in ["PENDING", "RUNNING"]:
-        await asyncio.sleep(1)
-        logger.trace(f"Waiting for Ali Text2IMG, TaskID: {task_id}")
-        resp = await hx_req(api, headers=headers, silent=True, proxy=TEXT2IMG.ALI_PROXY, check_keys=["output.task_status"])
-        task_status = resp["output"]["task_status"]
-    if task_status == "SUCCEEDED":
-        img_urls = glom(resp, "output.results.*.url", default=[])
-        tasks = [download_file(url, proxy=TEXT2IMG.ALI_PROXY) for url in img_urls]
-        paths = await asyncio.gather(*tasks)
-        if all(Path(path).is_file() for path in paths):
-            return {"images": paths}
-    return {}
src/llm/ali/zimage.py
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import base64
-import json
-from pathlib import Path
-
-import anyio
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import DOWNLOAD_DIR, TEXT2IMG
-from llm.utils import parse_as_dict
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from networking import hx_req
-from utils import rand_string
-
-
-async def zimage_text2img(client: Client, message: Message, prompt: str, *, silent: bool = False, **kwargs):
-    """Z-Image text to image.
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        prompt (str): Prompt. Defaults to None.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-    """
-    if not prompt:
-        if message.reply_to_message:
-            prompt = message.reply_to_message.content
-        else:
-            await message.reply(text="请输入图片描述。", quote=True)
-            return
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=f"🌠**Z-Image**:\n{prompt}", **kwargs))[0]
-    resp = await hx_req(
-        TEXT2IMG.ZIMAGE_API_URL,
-        "POST",
-        headers={"Content-Type": "application/json"},
-        json_data=parse_as_dict(prompt, need_prefix="config=") or {"prompt": prompt},
-        proxy=TEXT2IMG.ZIMAGE_PROXY,
-        check_kv={"mime_type": "image/png"},
-        timeout=600,
-        silent=True,
-    )
-    if b64_json := resp.get("b64_json"):
-        image_bytes = base64.b64decode(b64_json)
-        save_path = Path(DOWNLOAD_DIR) / f"{rand_string(10)}.png"
-        async with await anyio.open_file(save_path, "wb") as f:
-            await f.write(image_bytes)
-        media = [{"photo": save_path.as_posix()}]
-        await send2tg(client, message, texts="🌠**Z-Image**:\n" + json.dumps(resp["params"], ensure_ascii=False, indent=2), media=media, **kwargs)
-    await modify_progress(del_status=True, **kwargs)
src/llm/cloudflare/text2img.py
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import base64
-import contextlib
-import json
-from pathlib import Path
-
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import DOWNLOAD_DIR, TEXT2IMG
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from networking import hx_req
-from utils import rand_string, strings_list
-
-
-async def cloudflare_text2img(client: Client, message: Message, model_id: str, prompt: str, *, silent: bool = False, **kwargs) -> dict:
-    """Cloudflare text to image.
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        prompt (str): Prompt. Defaults to None.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-    """
-    if not prompt:
-        if message.reply_to_message:
-            prompt = message.reply_to_message.content
-        else:
-            await message.reply(text="请输入图片描述。", quote=True)
-            return {}
-
-    model_name = model_id.split("/")[-1].title()
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=f"🌠**{model_name}**:\n{prompt}", **kwargs))[0]
-    for api_key in strings_list(TEXT2IMG.CF_API_KEY, shuffle=True):
-        account_id, token = api_key.split(":")
-        resp = await hx_req(
-            f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_id}",
-            method="POST",
-            headers={"Authorization": f"Bearer {token}"},
-            json_data={"prompt": prompt},
-            timeout=300,
-            proxy=TEXT2IMG.CF_PROXY,
-            rformat="content",
-        )
-        if error := resp.get("hx_raw"):
-            await modify_progress(text="❌生成失败\n" + json.dumps(error, ensure_ascii=False, indent=2), force_update=True, **kwargs)
-            continue
-        path = save_img(resp["content"])
-        if path.is_file():
-            await send2tg(client, message, texts=f"🌠**{model_name}**:\n{prompt}", media=[{"photo": path}], **kwargs)
-            await modify_progress(del_status=True, **kwargs)
-            break
-    return {}
-
-
-def save_img(data: bytes) -> Path:
-    """Save image to file."""
-    path = Path(DOWNLOAD_DIR) / f"{rand_string(10)}.png"
-    # flux response
-    with contextlib.suppress(Exception):
-        json_data = json.loads(data.decode())
-        if json_data.get("success"):
-            with open(path, "wb") as f:
-                f.write(base64.b64decode(json_data["result"]["image"]))
-            return path
-
-    # stable diffusion response
-    with contextlib.suppress(Exception), open(path, "wb") as f:
-        f.write(data)
-    return path
src/llm/doubao/text2img.py
@@ -1,85 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import json
-from pathlib import Path
-from random import randint
-
-from glom import glom
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import TEXT2IMG
-from llm.contexts import base64_media
-from llm.utils import parse_as_dict
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from networking import download_file, hx_req
-from utils import strings_list
-
-
-async def doubao_genimg(client: Client, message: Message, model_id: str, prompt: str, *, silent: bool = False, **kwargs) -> dict:
-    """Doubao image generation.
-
-    https://www.volcengine.com/docs/82379/1541523
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        prompt (str): Prompt. Defaults to None.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-
-    Return:
-        {"error": str}
-    """
-    if not prompt:
-        await message.reply(text="缺少提示词", quote=True)
-        return {}
-    model_name = model_id.split("/")[-1].title()
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=f"🌠**{model_name}**:\n{prompt}", **kwargs))[0]
-    error = ""
-    succ = False
-    parsed = parse_as_dict(prompt, need_prefix="config=")
-    config = {
-        "model": model_id,
-        "prompt": parsed.get("prompt", prompt),
-        "size": parsed.get("size", "4K"),
-        "watermark": parsed.get("watermark", False),
-        "seed": parsed.get("seed", randint(0, 2147483647)),
-    }
-    images = await get_ctx_images(client, message)
-    payload = config | {"image": images} if images else config
-    for api_key in strings_list(TEXT2IMG.DOUBAO_API_KEY, shuffle=True):
-        headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-        api_url = "https://ark.cn-beijing.volces.com/api/v3/images/generations"
-        resp = await hx_req(api_url, "POST", json_data=payload, headers=headers, proxy=TEXT2IMG.DOUBAO_PROXY, max_retry=0)
-        if url := glom(resp, "data.0.url", default=""):
-            img_path = await download_file(url, proxy=TEXT2IMG.DOUBAO_PROXY)
-            if Path(img_path).is_file():
-                caption = f"🌠**{model_name}**:\n{json.dumps(config, ensure_ascii=False, indent=2)}\n[下载原图]({url}) (24h内有效)"
-                await send2tg(client, message, texts=caption, media=[{"photo": img_path}], **kwargs)
-                succ = True
-                break
-        elif error := resp.get("hx_raw"):
-            await modify_progress(text=f"❌生成失败\n{json.dumps(error, ensure_ascii=False, indent=2)}", force_update=True, **kwargs)
-            logger.error(error)
-            continue
-    if succ:
-        await modify_progress(del_status=True, **kwargs)
-    return {"error": error} if error else {}
-
-
-async def get_ctx_images(client: Client, message: Message) -> str | list[str]:
-    """Get image contexts from message."""
-    messages = []
-    if reply_msg := message.reply_to_message:
-        messages.extend(await client.get_media_group(reply_msg.chat.id, reply_msg.id) if reply_msg.media_group_id else [reply_msg])
-    messages.extend(await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message])
-    images = []
-    for msg in messages:
-        if not msg.photo:
-            continue
-        info = await base64_media(client, msg)
-        images.append(f"data:image/{info['ext']};base64,{info['base64']}")
-    return images[0] if len(images) == 1 else images
src/llm/gemini/chat.py
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import contextlib
-import json
-
-from google import genai
-from google.genai import types
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
-from pyrogram.types import Message, ReplyParameters
-
-from config import GEMINI, GPT, TEXT_LENGTH
-from llm.contexts import get_conversation_contexts, get_conversations
-from llm.gemini.utils import add_grounding_results, gemini_logging, parse_response
-from llm.hooks import hook_gemini_httpoptions
-from llm.utils import BOT_TIPS, REASONING_BEGIN, REASONING_END, beautify_llm_response, clean_cmd_prefix, shuffle_keys
-from messages.parser import parse_msg
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from messages.utils import blockquote, count_without_entities, smart_split
-
-
-async def gemini_chat_completion(
-    client: Client,
-    message: Message,
-    *,
-    model_id: str = GEMINI.TEXT_MODEL,
-    model_name: str = GEMINI.TEXT_MODEL_NAME,
-    enable_tools: bool = True,
-    append_grounding: bool = True,
-    disable_thinking: bool = False,
-    include_thoughts: bool = True,
-    system_prompt: str | None = None,
-    silent: bool = False,
-    **kwargs,
-) -> dict:
-    r"""Get Gemini response.
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        enable_tools (bool, optional): Whether to enable tools. Defaults to True.
-        append_grounding (bool, optional): Whether to append grounding to the response. Defaults to True.
-        disable_thinking (bool, optional): Whether to disable thinking. Defaults to False.
-        include_thoughts (bool, optional): Whether to include thoughts. Defaults to True.
-        system_prompt (str | None, optional): System prompt. Defaults to None.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-    """
-    info = parse_msg(message, silent=True, use_cache=False)
-    tools = [types.Tool(url_context=types.UrlContext())]
-    if not model_id.startswith("gemini-3"):  # google search tool is paid for gemini-3 models
-        tools.append(types.Tool(google_search=types.GoogleSearch()))
-
-    # parse config from environment variable
-    genconfig = {}
-    with contextlib.suppress(Exception):
-        extra_config_str = GEMINI.TEXT_CONFIG
-        genconfig = json.loads(extra_config_str)
-    try:
-        real_prompt = clean_cmd_prefix(info["text"], model_id) or clean_cmd_prefix(info["reply_text"], model_id)
-        msg = f"🤖**{model_name}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{real_prompt}”"[:TEXT_LENGTH]
-        if not silent and kwargs.get("show_progress"):
-            kwargs["progress"] = (await send2tg(client, message, texts=msg, **kwargs))[0]
-        genconfig |= {"response_modalities": ["TEXT"], "media_resolution": types.MediaResolution.MEDIA_RESOLUTION_HIGH}
-        if enable_tools:
-            genconfig |= {"tools": tools}
-        if system_prompt is not None:
-            genconfig |= {"system_instruction": system_prompt}
-        elif GEMINI.PREFER_LANG:
-            genconfig |= {"system_instruction": f"请优先使用{GEMINI.PREFER_LANG}思考和回复"}
-
-        if GEMINI.TEXT_THINKING_BUDGET is not None and not disable_thinking:
-            thinking_budget = min(round(float(GEMINI.TEXT_THINKING_BUDGET)), GEMINI.MAX_THINKING_BUDGET)
-            genconfig |= {"thinking_config": types.ThinkingConfig(include_thoughts=include_thoughts, thinking_budget=thinking_budget)}
-        if model_id.startswith("gemini-3") and not disable_thinking:
-            genconfig |= {"thinking_config": types.ThinkingConfig(include_thoughts=include_thoughts, thinking_level=types.ThinkingLevel.HIGH)}
-        params = {"model": model_id, "conversations": get_conversations(message), "config": types.GenerateContentConfig(**genconfig)}
-        logger.trace(params)
-        return await gemini_stream(client, message, model_name, params, append_grounding=append_grounding, silent=silent, **kwargs)
-    except Exception as e:
-        logger.error(e)
-    return {}
-
-
-async def gemini_stream(
-    client: Client,
-    message: Message,
-    model_name: str,
-    params: dict,
-    prefix: str | None = None,
-    retry: int = 0,
-    max_retry: int | None = None,
-    last_error: str = "",
-    *,
-    silent: bool = False,
-    append_grounding: bool = True,
-    single_thinking_msg: bool = True,
-    remove_thinking: bool = True,
-    **kwargs,
-) -> dict:
-    """Gemini stream response.
-
-    Args:
-        single_thinking_msg (bool, optional): Only use one message for displaying thinking.
-        remove_thinking (bool, optional): Remove thinking parts once finished.
-
-    Returns:
-        dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
-    """
-    if prefix is None:
-        prefix = f"🤖**{model_name}**:{BOT_TIPS}\n"
-    answers = ""  # all model responses
-    thoughts = ""  # all model thoughts
-    runtime_texts = ""  # for a single telegram message
-    init_status_msg = None if silent else kwargs.get("progress")
-    status_msg = init_status_msg
-    status_mid = status_msg.id if isinstance(status_msg, Message) else message.id
-    if not kwargs.get("gemini_api_keys"):
-        kwargs["gemini_api_keys"] = shuffle_keys(GEMINI.API_KEY)
-    api_keys = [x.strip() for x in kwargs["gemini_api_keys"].split(",") if x.strip()]
-    max_retry = len(api_keys) - 1 if max_retry is None else max_retry
-    resp = {}
-    sent_messages = []
-    try:
-        if retry > min(len(api_keys) - 1, max_retry):
-            logger.error(f"[Gemini] Failed after {retry} retries")
-            await modify_progress(message=init_status_msg, text=last_error, force_update=True)
-            return {"error": last_error}
-        api_key = kwargs.get("gemini_api_key", api_keys[retry])
-        http_options = types.HttpOptions(base_url=GEMINI.BASE_URL, async_client_args={"proxy": GEMINI.PROXY})
-        http_options = hook_gemini_httpoptions(http_options, message)
-        app = genai.Client(api_key=api_key, http_options=http_options)
-        # Construct the request params
-        if "conversations" in params:  # convert conversations to contents
-            params["contents"] = await get_conversation_contexts(client, params["conversations"], model_id=params["model"], ctx_format="gemini", app=app)
-        gemini_logging(params["contents"])
-        is_reasoning = False
-        is_reasoning_conversation = None  # to indicate whether it is a reasoning conversation
-        genai_params = {"model": params["model"], "contents": params["contents"], "config": params["config"]}
-        length = 0
-        async for chunk in await app.aio.models.generate_content_stream(**genai_params):
-            resp = parse_response(chunk.model_dump())
-            answer = resp.get("texts", "")
-            thinking = resp.get("thinking", "")
-            if is_reasoning_conversation is None and thinking:
-                is_reasoning_conversation = True
-
-            if thinking and not is_reasoning:  # First time receiving reasoning content
-                is_reasoning = True
-                runtime_texts += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{thinking.lstrip()}"
-            elif thinking and is_reasoning:  # Receiving reasoning content and is reasoning
-                runtime_texts += thinking
-            elif is_reasoning_conversation is True and is_reasoning:  # Receiving response, close reasoning flag
-                is_reasoning = False
-                runtime_texts = answer.lstrip() if remove_thinking else f"{runtime_texts.rstrip()}{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}\n" + answer.lstrip()
-            else:
-                runtime_texts += answer
-
-            thoughts += thinking
-            answers += answer
-            runtime_texts = beautify_llm_response(runtime_texts)
-            length = await count_without_entities(prefix + runtime_texts)
-            if length <= TEXT_LENGTH:
-                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
-                    await modify_progress(message=status_msg, text=prefix + runtime_texts, detail_progress=True)
-            else:  # answers is too long, split it into multiple messages
-                parts = await smart_split(prefix + runtime_texts)
-                if len(parts) == 1:
-                    continue
-                if is_reasoning and single_thinking_msg:
-                    runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{parts[-1].lstrip()}"  # remove previous thinking
-                    await modify_progress(message=status_msg, text=parts[0], force_update=True)  # force send the first part
-                else:
-                    await modify_progress(message=status_msg, text=blockquote(parts[0]), force_update=True)  # force send the first part
-                    runtime_texts = parts[-1]  # keep the last part
-                    if is_reasoning:
-                        runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{runtime_texts.lstrip()}"
-                    if not silent:
-                        status_msg = await client.send_message(message.chat.id, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status_mid))  # the new message
-                        sent_messages.append(status_msg)
-                        status_mid = status_msg.id
-
-        await app.aio.aclose()
-        # all chunks are processed
-        if is_reasoning or not answers.strip():  # empty response
-            return await gemini_stream(
-                client,
-                message,
-                model_name,
-                params,
-                prefix=prefix,
-                retry=retry + 1,
-                last_error=last_error,
-                silent=silent,
-                append_grounding=append_grounding,
-                **kwargs,
-            )
-        if append_grounding:  # add grounding to the response
-            answers = await add_grounding_results(answers, resp["grounding_chunks"], resp["grounding_supports"])
-            runtime_texts = await add_grounding_results(runtime_texts, resp["grounding_chunks"], resp["grounding_supports"])
-        final_thoughts = "" if remove_thinking else thoughts
-        if await count_without_entities(prefix + final_thoughts + answers) <= TEXT_LENGTH - 10:  # short answer in single msg
-            if length > GPT.COLLAPSE_LENGTH:  # collapse the response if the answer is too long
-                quoted = REASONING_BEGIN + final_thoughts.strip() + REASONING_END + "\n\n" + answers.strip() if final_thoughts.strip() else answers.strip()
-                await modify_progress(message=status_msg, text=f"{prefix}{blockquote(quoted)}", force_update=True)
-            else:
-                quoted = blockquote(REASONING_BEGIN + final_thoughts.strip() + REASONING_END) + "\n" if final_thoughts.strip() else ""
-                await modify_progress(message=status_msg, text=f"{prefix}{quoted}{answers}", force_update=True)
-        # total length is too long, answers are splitted into multiple messages
-        elif length > GPT.COLLAPSE_LENGTH:
-            await modify_progress(message=status_msg, text=prefix + blockquote(runtime_texts), force_update=True)
-        else:
-            await modify_progress(message=status_msg, text=prefix + runtime_texts, force_update=True)
-
-    except Exception as e:
-        error = str(e)
-        if "resp" in locals():
-            error += f"\n{resp}"
-        logger.error(error)
-        with contextlib.suppress(Exception):
-            await modify_progress(message=init_status_msg, text=error, force_update=True)
-            [await modify_progress(msg, del_status=True) for msg in sent_messages]
-        return await gemini_stream(
-            client,
-            message,
-            model_name,
-            params,
-            prefix=prefix,
-            retry=retry + 1,
-            last_error=error,
-            silent=silent,
-            append_grounding=append_grounding,
-            **kwargs,
-        )
-    return {"texts": answers, "thoughts": thoughts, "prefix": prefix, "model_name": model_name, "sent_messages": sent_messages}
src/llm/gemini/text2img.py
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from glom import flatten, glom
-from google import genai
-from google.genai.types import ContentListUnion, GenerateContentConfig, HttpOptions, Part
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import DOWNLOAD_DIR, PREFIX, TEXT2IMG
-from llm.contexts import get_conversations
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from messages.utils import remove_prefix
-from utils import rand_number, strings_list
-
-if TYPE_CHECKING:
-    from io import BytesIO
-
-
-async def gemini_text2img(
-    client: Client,
-    message: Message,
-    model_id: str,
-    prompt: str,
-    *,
-    silent: bool = False,
-    **kwargs,
-) -> dict:
-    """Gemini text to image.
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-
-    Returns:
-        dict: {"texts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
-    """
-    model_name = model_id.split("/")[-1].title()
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=f"🍌**{model_name}**:\n{prompt}", **kwargs))[0]
-
-    for api_key in strings_list(TEXT2IMG.GEMINI_API_KEY, shuffle=True):
-        try:
-            http_options = HttpOptions(base_url=TEXT2IMG.GEMINI_BASE_URL, async_client_args={"proxy": TEXT2IMG.GEMINI_PROXY})
-            app = genai.Client(api_key=api_key, http_options=http_options)
-            contents = await gen_prompts(client, message)
-            logger.trace(contents)
-            response = await app.aio.models.generate_content(
-                model=model_id,
-                contents=contents,
-                config=GenerateContentConfig(response_modalities=["IMAGE"]),
-            )
-            logger.trace(response)
-            await app.aio.aclose()
-            caption = ""
-            media = []
-            finish_reason = glom(response, "candidates.0.finish_reason.name", default="STOP")
-            if finish_reason != "STOP":
-                await modify_progress(text=f"❌生成失败: {finish_reason}", force_update=True, **kwargs)
-                continue
-            for part in flatten(glom(response, "candidates.*.content.parts", default=[])):
-                if part.text:
-                    caption += part.text
-                elif image := part.as_image():
-                    ext = part.inline_data.mime_type.split("/")[-1]
-                    save_path = Path(DOWNLOAD_DIR) / f"{rand_number()}.{ext}"
-                    image.save(save_path)
-                    media.append({"photo": save_path})
-            logger.success(f"🍌{model_name}: {caption}")
-            if media:
-                sent_message = await send2tg(client, message, caption_above=True, texts=f"🍌**{model_name}**:", media=media, **kwargs)
-                await modify_progress(del_status=True, **kwargs)
-                return {
-                    "prefix": f"🍌**{model_name}**:",
-                    "model_name": model_name,
-                    "texts": caption,
-                    "sent_message": sent_message,
-                }
-        except Exception as e:
-            logger.error(e)
-            await modify_progress(text=str(e), force_update=True, **kwargs)
-    return {}
-
-
-async def gen_prompts(client: Client, message: Message) -> ContentListUnion:
-    """Generate prompts."""
-    prompts = []
-    for msg in get_conversations(message):  # old to new
-        messages = await client.get_media_group(msg.chat.id, msg.id) if msg.media_group_id else [msg]
-        role = "model" if any(m.content.startswith(f"🍌{TEXT2IMG.GEMINI_MODEL.title()}") for m in messages) else "user"
-        parts = []
-        for m in messages:
-            try:
-                if m.photo:
-                    buffer: BytesIO = await m.download(in_memory=True)  # type: ignore
-                    ext = Path(buffer.name).suffix.removeprefix(".").replace("jpg", "jpeg")
-                    parts.append(Part.from_bytes(data=buffer.getvalue(), mime_type=f"image/{ext}"))
-                if role == "user" and m.content:
-                    text = remove_prefix(m.content, PREFIX.GENIMG)
-                    text = remove_prefix(text, "@gemini")
-                    parts.append(Part.from_text(text=text))
-            except Exception as e:
-                logger.error(e)
-        prompts.append({"role": role, "parts": parts})
-    return prompts
src/llm/gemini/utils.py
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import asyncio
-import contextlib
-from io import BytesIO
-from pathlib import Path
-
-from glom import glom
-from google.genai import types
-from loguru import logger
-from PIL import Image
-
-from config import DOWNLOAD_DIR
-from llm.utils import beautify_llm_response, clean_source_marks
-from networking import flatten_rediercts
-from utils import number_to_emoji, rand_string
-
-
-def parse_response(data: dict) -> dict:
-    """Parse gemini response, includes texts, image and websearch."""
-    parts = glom(data, "candidates.0.content.parts", default=[]) or []
-    gemini_logging(parts)
-    grounding_chunks = glom(data, "candidates.0.grounding_metadata.grounding_chunks", default=[]) or []
-    grounding_supports = glom(data, "candidates.0.grounding_metadata.grounding_supports", default=[]) or []
-    texts = ""
-    thinking = ""
-    media = []
-    for item in parts:
-        if item.get("text") is not None:
-            if item.get("thought"):
-                thinking += item["text"]
-            else:
-                texts += item["text"]
-        if item.get("inline_data") is not None:
-            image = Image.open(BytesIO(item["inline_data"]["data"]))
-            mime = item["inline_data"]["mime_type"]
-            ext = mime.split("/")[-1]
-            save_path = Path(DOWNLOAD_DIR) / f"{rand_string()}.{ext}"
-            image.save(save_path)
-            media.append({"photo": save_path})
-    return {
-        "texts": beautify_llm_response(texts, newline_level=2),
-        "thinking": beautify_llm_response(thinking, newline_level=2),
-        "media": media,
-        "grounding_chunks": grounding_chunks,
-        "grounding_supports": grounding_supports,
-    }
-
-
-async def add_grounding_results(answers: str, grounding_chunks: list[dict], grounding_supports: list[dict]) -> str:
-    urls = [glom(chunk, "web.uri", default="https://www.google.com") for chunk in grounding_chunks]
-    tasks = [flatten_rediercts(url) for url in urls]
-    try:
-        flatten_urls = await asyncio.gather(*tasks)
-        index2url = flatten_urls
-    except Exception as e:
-        logger.warning(e)
-        index2url = urls
-    logger.trace(f"Grounding URLs: {index2url}")
-    for support in grounding_supports:
-        indices: list[int] = support.get("grounding_chunk_indices", [])
-        logger.trace(f"Add grounding indices: {indices}")
-        indices_with_url = " ".join([f"[[{idx + 1}]]({glom(index2url, str(idx), default='https://www.google.com')})" for idx in indices])
-        if segment := glom(support, "segment.text", default=""):
-            answers = answers.replace(segment, f"{segment}{indices_with_url}", 1)
-    for idx, grounding in enumerate(grounding_chunks):
-        if idx > 9:
-            break
-        title = glom(grounding, "web.title", default="Web")
-        url = glom(index2url, str(idx), default="https://www.google.com")
-        if url in answers:
-            answers += f"\n{number_to_emoji(idx + 1)}[{title}]({url})"
-    return answers
-
-
-def gemini_logging(contexts: list):
-    """Print logs of gemini contexts."""
-    msg = ""
-    with contextlib.suppress(Exception):
-        for item in contexts:
-            if isinstance(item, str):
-                msg += f"{item}\n"
-                continue
-            if isinstance(item, types.File):
-                msg += f"[{item.mime_type}]: {item.name}\n"
-                continue
-            if not isinstance(item, dict):
-                continue
-            role = item.get("role", "").upper() or "MODEL"
-
-            # Request
-            for part in item.get("parts", []):
-                if part.inline_data:
-                    msg += f"[{role}]: Blob_Data  "
-                if part.text:
-                    msg += f"[{role}]: {part.text}  "
-            # Response
-            if item.get("text", ""):
-                msg += f"[{role}]: {item['text']}  "
-            if item.get("inline_data", ""):
-                msg += f"[{role}]: Blob_Data  "
-
-    logger.debug(f"{msg!r}")
-
-
-def openai_context_to_gemini(context: dict, *, keep_marks: bool = True) -> types.ContentUnionDict:
-    r"""(Deprecated) Convert OpenAI context to Gemini format.
-
-    Not needed anymore.
-
-    Args:
-        context (dict): {
-                "role": role,  # assistant or user
-                "content": [
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,encoding"}},
-                        {"type": "text", "text": "[username]: Bob\n[filename]: sample.txt\n[file content]:\nhello"}
-                    ]
-                }
-
-    Returns:
-        dict: {
-            "role": role,  # model or user
-            "parts: [
-                {"inlineData": {"mimeType": "image/jpeg", "data": "base64-encoded string"}},
-                {"text": "hello"}
-            ]
-        }
-    """
-    parts: list[types.Part] = []
-    role = "model" if context["role"] == "assistant" else "user"
-    for item in context["content"]:
-        if item["type"] == "text":
-            if keep_marks:
-                parts.append(types.Part.from_text(text=item["text"]))
-            else:
-                parts.append(types.Part.from_text(text=clean_source_marks(item["text"])))
-        elif item["type"] == "image_url":
-            data = item["image_url"]["url"].split(";base64,")
-            mime = data[0].removeprefix("data:")
-            parts.append(types.Part.from_bytes(mime_type=mime, data=data[1]))
-    return {"role": role, "parts": parts}  # type: ignore
src/llm/contexts.py
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import asyncio
-import base64
-import contextlib
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from google import genai
-from google.genai.types import FileState, Part, UploadFileConfig
-from loguru import logger
-from openai import AsyncOpenAI
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from asr.utils import GEMINI_AUDIO_EXT, downsampe_audio
-from config import DOWNLOAD_DIR, GPT
-from llm.utils import BOT_TIPS, clean_context, convert_md
-from messages.parser import parse_msg
-from utils import read_text
-
-if TYPE_CHECKING:
-    from io import BytesIO
-
-
-def get_conversations(message: Message) -> list[Message]:
-    """Get all conversation messages from old to new."""
-    messages = [message]
-    while message.reply_to_message:
-        message = message.reply_to_message
-        messages.append(message)
-    messages.reverse()  # old to new
-    return messages
-
-
-async def get_conversation_contexts(
-    client: Client,
-    conversations: list[Message],
-    model_id: str = "",
-    ctx_format: str = "openai",
-    app: genai.Client | AsyncOpenAI | None = None,
-) -> list[dict]:
-    """Generate contexts for GPT conversation.
-
-    From old to new messages.
-    """
-    # parse context for each message
-    if ctx_format.lower() == "openai":
-        contexts = [await single_gpt_context(client, message, model_id) for message in conversations]
-        contexts = [x for x in contexts if x.get("content")]
-    else:
-        contexts = [await single_gemini_context(client, message, app, model_id) for message in conversations]  # type: ignore
-        contexts = [x for x in contexts if x.get("parts")]
-
-    return contexts[: int(GPT.HISTORY_CONTEXT)]
-
-
-async def single_gpt_context(client: Client, message: Message, model_id: str = "") -> dict:
-    """Generate GPT contexts for a single message (Without considering reply message).
-
-    Returns:
-    {
-        "role": "user or assistant",
-        "content": [
-            {'type': 'text', 'text': 'caption this img'},
-            {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,base64_image'}},
-            {'type': 'image_url', 'image_url': {'url': 'https://server.com/dir/image.jpg'}},
-        ],
-    }
-    """
-    info = parse_msg(message, silent=True, use_cache=False)
-    role = "assistant" if BOT_TIPS in info["text"] else "user"
-
-    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document", "web_page"]:
-        return {}
-
-    extra_txt_extensions = [".sh", ".json", ".xml"]  # treat these as txt file
-    extra_markdown_extensions = [".pdf", ".html", ".docx", ".pptx", ".xls", ".xlsx"]  # convert to markdown
-
-    messages = await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message]
-    contexts = []
-    for msg in messages:
-        info = parse_msg(msg, silent=True, use_cache=False)
-        sender = info["fwd_full_name"] or info["full_name"]
-        media_path = DOWNLOAD_DIR + "/" + info["file_name"]
-        try:
-            if info["mtype"] == "photo":
-                res = await base64_media(client, msg)
-                contexts.append({"type": "image_url", "image_url": {"url": f"data:image/{res['ext']};base64,{res['base64']}"}})
-            elif info["mtype"] == "document":
-                if info["mime_type"].startswith("text/") or Path(info["file_name"]).suffix in extra_txt_extensions:
-                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
-                    contexts.append(
-                        {
-                            "type": "text",
-                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{read_text(fpath).strip()}",
-                        }
-                    )
-                if Path(info["file_name"]).suffix in extra_markdown_extensions:
-                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
-                    text = convert_md(fpath)
-                    Path(fpath).unlink(missing_ok=True)
-                    contexts.append(
-                        {
-                            "type": "text",
-                            "text": f"[filename]: {info['file_name']}\n[file content]:\n{text.strip()}",
-                        }
-                    )
-            # user message has entity urls, use full html
-            clean_texts = clean_context(info["html"], model_id) if role == "user" and info["entity_urls"] else clean_context(info["text"], model_id)
-            if not clean_texts:
-                continue
-            texts = f"[username]: {sender}\n[message]:\n{clean_texts}" if role == "user" and sender else clean_texts
-            contexts.append({"type": "text", "text": texts})
-        except Exception as e:
-            logger.warning(f"Download media from message failed: {e}")
-            continue
-    return {"role": role, "content": contexts} if contexts else {}
-
-
-async def single_gemini_context(client: Client, message: Message, app: genai.Client, model_id: str = "") -> dict:
-    """Generate Gemini contexts for a single message (Without considering reply message).
-
-    Returns:
-    {
-        "role": role,  # model or user
-            "parts: [
-                {"inlineData": {"mimeType": "image/jpeg", "data": "base64-encoded string"}},
-                {"text": "hello"}
-        ],
-    }
-    """
-    info = parse_msg(message, silent=True, use_cache=False)
-    role = "model" if BOT_TIPS in info["text"] else "user"
-    if info["mtype"] not in ["text", "photo", "audio", "voice", "video", "document", "web_page"]:
-        return {}
-    # gemini has built-in support for these extensions
-    gemini_extensions = [".pdf", ".html", ".css", ".csv", ".xml", ".rtf", ".mp3", ".wav", ".ogg", ".aac", ".flac", ".jpg", ".jpeg", ".webp", ".png", ".heic", ".heif"]
-    # gemini has built-in support for these mime types
-    gemini_mime_types = ["application/pdf", "application/x-javascript", "audio/ogg", "audio/mp4", "image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"]
-    txt_extensions = [".txt", ".js", ".py", ".md", ".sh", ".json"]  # treat these as txt file
-    extra_markdown_extensions = [".docx", ".pptx", ".xls", ".xlsx", ".epub"]  # convert to markdown
-
-    messages = await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message]
-    parts = []
-    for msg in messages:
-        info = parse_msg(msg, silent=True, use_cache=False)
-        sender = info["fwd_full_name"] or info["full_name"]
-        media_path = DOWNLOAD_DIR + "/" + info["file_name"]
-        try:
-            if info["mtype"] in ["video", "photo", "audio", "voice"] or info["mime_type"] in gemini_mime_types or any(info["file_name"].endswith(ext) for ext in gemini_extensions):
-                fpath: str = await client.download_media(msg, media_path)  # type: ignore
-                if info["mtype"] in ["audio", "voice"] and Path(fpath).suffix not in GEMINI_AUDIO_EXT:
-                    audio_path = await downsampe_audio(fpath)
-                    fpath = audio_path.as_posix()
-                upload = await app.aio.files.upload(file=fpath, config=UploadFileConfig(display_name=info["file_name"] or f"send from {sender}"))
-                while upload.state == FileState.PROCESSING:
-                    logger.trace("Waiting for upload to complete...")
-                    await asyncio.sleep(1)
-                    upload = await app.aio.files.get(name=upload.name)  # type: ignore
-                if upload.state == FileState.ACTIVE and upload.uri:
-                    parts.append(Part.from_uri(file_uri=upload.uri, mime_type=upload.mime_type))
-                Path(fpath).unlink(missing_ok=True)
-            elif info["mtype"] == "document":
-                if info["mime_type"].startswith("text/") or Path(info["file_name"]).suffix in txt_extensions:
-                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
-                    parts.append(Part.from_text(text=f"[filename]: {info['file_name']}\n[file content]:\n{read_text(fpath).strip()}"))
-                if Path(info["file_name"]).suffix in extra_markdown_extensions:
-                    fpath: str = await client.download_media(msg, media_path)  # type: ignore
-                    text = convert_md(fpath)
-                    Path(fpath).unlink(missing_ok=True)
-                    parts.append(Part.from_text(text=f"[filename]: {info['file_name']}\n[file content]:\n{text.strip()}"))
-            # user message has entity urls, use full html
-            clean_texts = clean_context(info["html"], model_id) if role == "user" and info["entity_urls"] else clean_context(info["text"], model_id)
-            if not clean_texts:
-                continue
-            texts = f"[username]: {sender}\n[message]:\n{clean_texts}" if role == "user" and sender else clean_texts
-            parts.append(Part.from_text(text=texts))
-        except Exception as e:
-            logger.warning(f"Download media from message failed: {e}")
-            continue
-    return {"role": role, "parts": parts} if parts else {}
-
-
-async def base64_media(client: Client, message: Message) -> dict:
-    data: BytesIO = await client.download_media(message, in_memory=True)  # type: ignore
-    logger.debug(f"Downloaded message media: {data.name}")
-
-    ext = Path(data.name).suffix.removeprefix(".").replace("jpg", "jpeg")
-
-    # image, video
-    b64_encoding = base64.b64encode(data.getvalue()).decode("utf-8")
-
-    # text document
-    value = ""
-    with contextlib.suppress(Exception):
-        value = data.getvalue().decode("utf-8")
-    return {
-        "ext": ext,
-        "base64": b64_encoding,
-        "value": value,
-    }
src/llm/gpt.py
@@ -1,159 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import re
-
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import GEMINI, GPT, PREFIX, TEXT_LENGTH, cache
-from llm.contexts import get_conversation_contexts, get_conversations
-from llm.gemini.chat import gemini_chat_completion
-from llm.models import get_gpt_config, get_model_id
-from llm.response_stream import send_to_gpt_stream
-from llm.text2img import TEXT2IMG_HELP, text2img
-from llm.tools import merge_tools_response
-from llm.utils import BOT_TIPS, clean_cmd_prefix, image_emoji, llm_cleanup_files
-from messages.parser import parse_msg
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from messages.utils import count_without_entities, equal_prefix
-from utils import strings_list
-
-HELP = f"""🤖**GPT对话**
-`{PREFIX.GPT}` 后接提示词即可与GPT对话
-以 `{PREFIX.GPT}` 回复消息可将其加入上下文
-暂不支持视频, 可先用`{PREFIX.ASR}`命令转为文字后再调用`{PREFIX.GPT}`
-
-⚙️模型配置: 默认使用 **{GPT.DEFAULT_PROVIDER.lower()}** 模型
-
-🔄使用以下命令强制切换模型:
-`/gpt`: **{GPT.OPENAI_MODEL_NAME}** {image_emoji(GPT.OPENAI_ACCEPT_IMAGE)}
-`/gemini`: **{GEMINI.TEXT_MODEL_NAME}** 🎬🏞🎧
-`/ds`: **{GPT.DEEPSEEK_MODEL_NAME}** {image_emoji(GPT.DEEPSEEK_ACCEPT_IMAGE)}
-`/qwen`: **{GPT.QWEN_MODEL_NAME}** {image_emoji(GPT.QWEN_ACCEPT_IMAGE)}
-`/doubao`: **{GPT.DOUBAO_MODEL_NAME}** {image_emoji(GPT.DOUBAO_ACCEPT_IMAGE)}
-`/grok`: **{GPT.GROK_MODEL_NAME}** {image_emoji(GPT.GROK_ACCEPT_IMAGE)}
-`/kimi`: **{GPT.KIMI_MODEL_NAME}** {image_emoji(GPT.KIMI_ACCEPT_IMAGE)}
-
-⚠️注意:
-若对话历史包含图片, 但模型不支持图片 (无🏞图标), 会自动切换为 **{GPT.OMNI_PROVIDER.lower()}** 模型
-若对话历史包含视频/音频, 但模型不支持视频/音频 (无🎬/🎧图标), 会自动切换为 **{GEMINI.TEXT_MODEL_NAME}** 模型
-"""
-
-
-async def gpt_response(
-    client: Client,
-    message: Message,
-    *,
-    custom_model_id: str = "",
-    custom_model_name: str = "",
-    enable_gpt_tools: bool = True,
-    enable_gemini_tools: bool = True,
-    silent: bool = False,
-    **kwargs,
-) -> dict:
-    """Get GPT response from Various API.
-
-    `/ai text`: get response from LLM
-    `/ai @gemini-2.5-flash text`: get response from gemini-2.5-flash (custom model id)
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        custom_model_id (str, optional): Custom model id.
-        custom_model_name (str, optional): Custom model name.
-        enable_gpt_tools (bool, optional): Whether to enable GPT tools. Defaults to True.
-        enable_gemini_tools (bool, optional): Whether to enable Gemini tools. Defaults to True.
-        silent (bool, optional): Whether to disable progressing. Defaults to False.
-
-    Returns:
-        dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
-    """
-    info = parse_msg(message, silent=True, use_cache=False)
-    # send docs if message == "/ai"
-    if info["mtype"] == "text" and equal_prefix(info["text"], prefix=PREFIX.GPT):
-        if not message.reply_to_message:  # without reply
-            await send2tg(client, message, texts=HELP, **kwargs)
-            return {}
-        # with reply, change some information
-        info["uid"] = info["reply_uid"]
-        info["full_name"] = info["reply_full_name"]
-    if info["mtype"] == "text" and equal_prefix(info["text"], prefix=PREFIX.GENIMG) and not message.reply_to_message:
-        await send2tg(client, message, texts=TEXT2IMG_HELP, **kwargs)
-        return {}
-    model_id, is_custom_id, resp_modality = get_model_id(info, message)
-    if not model_id:
-        return {}
-    if is_custom_id:
-        custom_model_id = model_id
-    # cache media_group message, only process once
-    if media_group_id := message.media_group_id:
-        if cache.get(f"gpt-{info['cid']}-{media_group_id}"):
-            return {}
-        cache.set(f"gpt-{info['cid']}-{media_group_id}", "1", ttl=120)
-    kwargs["message_info"] = info  # save trigger message info
-    if resp_modality == "image":
-        return await text2img(client, message, enable_tools=enable_gemini_tools, **kwargs)
-
-    # handle custom model_id here
-    if matched := re.match(r"^/ai @([a-zA-Z0-9_\-\.]+)(\s+)?", info["text"]):  # match /ai @custom_model_id
-        custom_model_id = matched.group(1).strip()
-        logger.warning(f"Custom model id: {custom_model_id}")
-    allowed_model_ids = [x.lower() for x in strings_list(GEMINI.ALLOWED_CUSTOM_MODEL_IDS) + strings_list(GPT.ALLOWED_CUSTOM_MODEL_IDS)]
-    if custom_model_id and custom_model_id.lower() not in allowed_model_ids:
-        await send2tg(client, message, texts=f"⚠️不支持自定义模型: {custom_model_id}\n\n⚙️支持自定义模型列表:\n{'\n'.join(allowed_model_ids)}", **kwargs)
-        return {}
-    if custom_model_id.lower() in [x.lower() for x in strings_list(GEMINI.ALLOWED_CUSTOM_MODEL_IDS)]:
-        return await gemini_chat_completion(
-            client,
-            message,
-            model_id=custom_model_id,
-            model_name=custom_model_name or custom_model_id,
-            enable_tools=enable_gemini_tools,
-            silent=silent,
-            **kwargs,
-        )
-    if model_id == GEMINI.TEXT_MODEL and not custom_model_id:
-        return await gemini_chat_completion(client, message, enable_tools=enable_gemini_tools, silent=silent, **kwargs)
-
-    # GPT models
-    if custom_model_id:
-        model_id = custom_model_id
-    config = get_gpt_config(model_id)
-    config["friendly_name"] = custom_model_name or custom_model_id or config["friendly_name"]
-    conversations = get_conversations(message)
-    config["completions"]["messages"] = await get_conversation_contexts(client, conversations, model_id=model_id, ctx_format="openai")
-    real_prompt = clean_cmd_prefix(info["text"], model_id) or clean_cmd_prefix(info["reply_text"], model_id)
-    msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{real_prompt}”"[:TEXT_LENGTH]
-    if not silent and kwargs.get("show_progress"):
-        kwargs["progress"] = (await send2tg(client, message, texts=msg, **kwargs))[0]
-
-    if enable_gpt_tools:
-        config, response = await merge_tools_response(config, **kwargs)
-        # skip send a new request if tool_model is the same as the current model
-        if response and config["completions"]["model"] == GPT.TOOLS_MODEL and response.get("content"):
-            texts = f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n{response['content']}"
-            length = await count_without_entities(texts)
-            if length <= TEXT_LENGTH:
-                await modify_progress(text=texts, force_update=True, **kwargs)
-                final = {
-                    "texts": response["content"],
-                    "prefix": f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n",
-                    "model_name": config["friendly_name"],
-                    "sent_messages": [kwargs["progress"]] if kwargs.get("progress") else [],
-                }
-            else:
-                final = {
-                    "texts": response["content"],
-                    "prefix": f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n",
-                    "model_name": config["friendly_name"],
-                    "sent_messages": await send2tg(client, message, texts=texts, **kwargs),
-                }
-                await modify_progress(del_status=True, **kwargs)
-            llm_cleanup_files(config["completions"]["messages"])
-            return final
-    final = await send_to_gpt_stream(client, kwargs.get("progress"), config, silent=silent, **kwargs)  # type: ignore
-    llm_cleanup_files(config["completions"]["messages"])
-    return final
src/llm/hooks.py
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from google.genai.types import HttpOptions
-from pyrogram.types import Message
-
-from config import GEMINI, GPT
-from llm.prompts import modify_prompts, refine_prompts
-from messages.parser import parse_msg
-from utils import unicode_to_ascii
-
-
-def pre_hooks(client: dict, completions: dict, message_info: dict | None = None, system_prompt: str | None = None):
-    pre_openrouter_hook(client, completions)
-    pre_helicone_hook(client, message_info)
-    if system_prompt is not None:
-        modify_prompts(completions["messages"], prompt=system_prompt, role="system", method="overwrite")
-    elif GEMINI.PREFER_LANG and "gemini" in completions["model"].lower():
-        modify_prompts(completions["messages"], prompt=f"请使用{GEMINI.PREFER_LANG}回复。", role="system", method="append")
-    completions["messages"] = refine_prompts(completions["messages"])
-
-
-def pre_openrouter_hook(client: dict, completions: dict) -> None:
-    """Add special parameters for OpenRouter."""
-    if "openrouter" not in client["base_url"]:
-        return
-    if models := [x.strip() for x in GPT.OPENROUTER_FALLBACK_MODELS.split(",") if x.strip()]:
-        completions["extra_body"] = {"models": models}
-
-
-def pre_helicone_hook(client: dict, message_info: dict | None) -> None:
-    """Add special parameters for helicone gateway."""
-    if not GPT.HELICONE_API_KEY:
-        return
-    headers = client.get("default_headers", {})
-    headers |= {
-        "Helicone-Auth": f"Bearer {GPT.HELICONE_API_KEY}",
-    }
-    message_info = message_info or {}
-    if chat_title := message_info.get("ctitle"):
-        headers |= {"Helicone-Property-Chat": unicode_to_ascii(chat_title), "Helicone-Property-ChatID": str(message_info["cid"])}
-    if user_name := message_info.get("full_name"):
-        headers |= {"Helicone-User-Id": unicode_to_ascii(user_name), "Helicone-Property-User": str(message_info["uid"])}
-    client |= {"default_headers": headers}
-
-
-def hook_gemini_httpoptions(http_options: HttpOptions, message: Message) -> HttpOptions:
-    if http_options.base_url == "https://gateway.helicone.ai" and GPT.HELICONE_API_KEY:
-        info = parse_msg(message, silent=True)
-        headers = {"helicone-auth": f"Bearer {GPT.HELICONE_API_KEY}", "helicone-target-url": "https://generativelanguage.googleapis.com"}
-        if chat_title := info["ctitle"]:
-            headers |= {"Helicone-Property-Chat": unicode_to_ascii(chat_title), "Helicone-Property-ChatID": str(info["cid"])}
-        if user_name := info["full_name"]:
-            headers |= {"Helicone-User-Id": unicode_to_ascii(user_name), "Helicone-Property-User": str(info["uid"])}
-        http_options.headers = headers
-    return http_options
src/llm/models.py
@@ -1,222 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import os
-import re
-
-from openai import DefaultAsyncHttpxClient
-from pyrogram.types import Message
-
-from config import GEMINI, GPT, PREFIX, PROXY, TEXT2IMG, TID
-from llm.contexts import get_conversations
-from llm.utils import BOT_TIPS, enabled_providers, sample_key
-from messages.parser import parse_msg
-from messages.utils import startswith_prefix
-from utils import slim_cid, strings_list, true
-
-
-def get_context_type(conversations: list[Message]) -> str:
-    """Get model type based on message conversations."""
-    context_type = "text"
-    for msg in conversations:
-        info = parse_msg(msg, silent=True)
-        if info["mtype"] == "photo":
-            context_type = "image"
-        if info["mtype"] in ["video", "audio", "voice"]:
-            context_type = info["mtype"]
-    return context_type
-
-
-def get_model_id(minfo: dict, message: Message) -> tuple[str, bool, str]:
-    """Get model id with response modality.
-
-    Returns:
-        (model_id, is_custom_id, response_modality)
-    """
-    # to avoid potential infinitely loop,
-    # we do not respond to bot message & GPT responses.
-    if minfo["is_bot"]:
-        return "", False, ""
-    if BOT_TIPS in minfo["text"]:
-        return "", False, ""
-
-    model_id, response_modality = get_model_id_from_envars(minfo)
-    if model_id:
-        return model_id, False, response_modality
-
-    model_id, is_custom_id, response_modality = get_model_id_from_prefix(minfo)
-    if not model_id:
-        return "", is_custom_id, ""
-
-    # early return for non-text generation
-    if response_modality != "text":
-        return model_id, is_custom_id, response_modality
-
-    # check if we need to fallback to omini model
-    conversations = get_conversations(message)
-    context_type = get_context_type(conversations)  # {"type": "text", "error": None}  # text, image
-    if context_type == "text":  # no need to fallback if context type is text
-        return model_id, is_custom_id, response_modality
-
-    if context_type in ["video", "audio", "voice"]:  # currently, only Gemini supports audio/video
-        return GEMINI.TEXT_MODEL, is_custom_id, "text"
-
-    if (
-        (model_id == GPT.OPENAI_MODEL and not GPT.OPENAI_ACCEPT_IMAGE)
-        or (model_id == GPT.DEEPSEEK_MODEL and not GPT.DEEPSEEK_ACCEPT_IMAGE)
-        or (model_id == GPT.QWEN_MODEL and not GPT.QWEN_ACCEPT_IMAGE)
-        or (model_id == GPT.DOUBAO_MODEL and not GPT.DOUBAO_ACCEPT_IMAGE)
-        or (model_id == GPT.GROK_MODEL and not GPT.GROK_ACCEPT_IMAGE)
-        or (model_id == GPT.KIMI_MODEL and not GPT.KIMI_ACCEPT_IMAGE)
-    ):
-        omni_providers = {
-            "openai": GPT.OPENAI_MODEL,
-            "deepseek": GPT.DEEPSEEK_MODEL,
-            "qwen": GPT.QWEN_MODEL,
-            "doubao": GPT.DOUBAO_MODEL,
-            "grok": GPT.GROK_MODEL,
-            "gemini": GEMINI.TEXT_MODEL,
-            "kimi": GPT.KIMI_MODEL,
-        }
-        text_providers, _ = enabled_providers()
-        # prefer gemini if OMNI_PROVIDER is not set
-        model_id = omni_providers.get(GPT.OMNI_PROVIDER.lower()) or GEMINI.TEXT_MODEL or omni_providers[text_providers[0]]
-        return model_id, is_custom_id, "text"
-
-    return model_id, is_custom_id, response_modality
-
-
-def get_model_id_from_envars(minfo: dict) -> tuple[str, str]:
-    """Useful for running multiple bots in a same chat.
-
-    GPT_{cid}_BAN_{uid}=1 : Ban user for using AI chat
-    GPT_{cid}_ALLOW_USERS={uids} : Only allow users (comma separated userid) for using AI chat.
-    GPT_{cid}_IGNORE_REPLY=1 : Ignore messages that is replying to another message
-    GPT_{cid}_IGNORE_PREFIX=/gpt,/ds : Ignore prefix for specific chat ids
-
-    Returns:
-        (model_id, response_modality)
-    """
-    cid = slim_cid(minfo["cid"])
-    if (uids := os.getenv(f"GPT_{cid}_ALLOW_USERS")) and str(minfo["uid"]) not in strings_list(uids):
-        return "", ""
-    if true(os.getenv(f"GPT_{cid}_BAN_{minfo['uid']}")):
-        return "", ""
-    if true(os.getenv(f"GPT_{cid}_IGNORE_REPLY")) and minfo["reply_mid"]:
-        return "", ""
-    if startswith_prefix(minfo["text"], prefix=os.getenv(f"GPT_{cid}_IGNORE_PREFIX", "")):
-        return "", ""
-
-    # not starts with /prefix, but in specific chat ids
-    if any(str(x) in strings_list(TID.OPENAI_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/gpt " + minfo["text"]
-        return GPT.OPENAI_MODEL, "text"
-    if any(str(x) in strings_list(TID.GEMINI_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/gemini " + minfo["text"]
-        return GEMINI.TEXT_MODEL, "text"
-    if any(str(x) in strings_list(TID.GROK_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/grok " + minfo["text"]
-        return GPT.GROK_MODEL, "text"
-    if any(str(x) in strings_list(TID.DEEPSEEK_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/ds " + minfo["text"]
-        return GPT.DEEPSEEK_MODEL, "text"
-    if any(str(x) in strings_list(TID.QWEN_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/qwen " + minfo["text"]
-        return GPT.QWEN_MODEL, "text"
-    if any(str(x) in strings_list(TID.DOUBAO_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/doubao " + minfo["text"]
-        return GPT.DOUBAO_MODEL, "text"
-    if any(str(x) in strings_list(TID.KIMI_CHATS) for x in [minfo["cid"], slim_cid(minfo["cid"])]):
-        minfo["text"] = "/kimi " + minfo["text"]
-        return GPT.KIMI_MODEL, "text"
-    return "", ""
-
-
-def get_model_id_from_prefix(minfo: dict) -> tuple[str, bool, str]:
-    text_providers, img_providers = enabled_providers()
-    model_id = ""
-    resp_modality = "text"
-    # start with /prefix
-    if startswith_prefix(minfo["text"], prefix="/gpt") and "openai" in text_providers:
-        model_id = GPT.OPENAI_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/gemini") and "gemini" in text_providers:
-        model_id = GEMINI.TEXT_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/ds") and "deepseek" in text_providers:
-        model_id = GPT.DEEPSEEK_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/doubao") and "doubao" in text_providers:
-        model_id = GPT.DOUBAO_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/qwen") and "qwen" in text_providers:
-        model_id = GPT.QWEN_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/kimi") and "kimi" in text_providers:
-        model_id = GPT.KIMI_MODEL
-    elif startswith_prefix(minfo["text"], prefix="/grok") and "grok" in text_providers:
-        model_id = GPT.GROK_MODEL
-    elif startswith_prefix(minfo["text"], prefix=PREFIX.GENIMG):
-        model_id = TEXT2IMG.DEFAULT_MODEL
-        resp_modality = "image"
-    # start with /ai, auto detect model_id
-    elif startswith_prefix(minfo["text"], prefix="/ai") and text_providers:
-        providers = {
-            "openai": GPT.OPENAI_MODEL,
-            "deepseek": GPT.DEEPSEEK_MODEL,
-            "qwen": GPT.QWEN_MODEL,
-            "doubao": GPT.DOUBAO_MODEL,
-            "grok": GPT.GROK_MODEL,
-            "gemini": GEMINI.TEXT_MODEL,
-            "kimi": GPT.KIMI_MODEL,
-        }
-        # prefer gemini if DEFAULT_PROVIDER is not set
-        model_id = providers.get(GPT.DEFAULT_PROVIDER.lower()) or GEMINI.TEXT_MODEL or providers[text_providers[0]]
-    if model_id:
-        return model_id, False, resp_modality
-
-    # is replying to AI response message
-    if startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.OPENAI_MODEL_NAME}:{BOT_TIPS}") and "openai" in text_providers:
-        model_id = GPT.OPENAI_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GEMINI.TEXT_MODEL_NAME}:{BOT_TIPS}") and "gemini" in text_providers:
-        model_id = GEMINI.TEXT_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.DEEPSEEK_MODEL_NAME}:{BOT_TIPS}") and "deepseek" in text_providers:
-        model_id = GPT.DEEPSEEK_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.DOUBAO_MODEL_NAME}:{BOT_TIPS}") and "doubao" in text_providers:
-        model_id = GPT.DOUBAO_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.QWEN_MODEL_NAME}:{BOT_TIPS}") and "qwen" in text_providers:
-        model_id = GPT.QWEN_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.KIMI_MODEL_NAME}:{BOT_TIPS}") and "kimi" in text_providers:
-        model_id = GPT.KIMI_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🤖{GPT.GROK_MODEL_NAME}:{BOT_TIPS}") and "grok" in text_providers:
-        model_id = GPT.GROK_MODEL
-    elif startswith_prefix(minfo["reply_text"], prefix=f"🍌{TEXT2IMG.GEMINI_MODEL.title()}") and "gemini" in img_providers:
-        model_id = TEXT2IMG.GEMINI_MODEL
-        resp_modality = "image"
-    elif matched := re.match(rf"^🤖(.*?):{BOT_TIPS}", minfo["reply_text"]):
-        return matched.group(1).lower(), True, "text"
-    return model_id, False, resp_modality
-
-
-def get_gpt_config(model_id: str = "") -> dict:
-    """Get GPT configurations."""
-    model_factory = {
-        "gpt,chatgpt,o1,o3,o4": {"api_key": sample_key(GPT.OPENAI_API_KEY), "base_url": GPT.OPENAI_BASE_URL, "model_name": GPT.OPENAI_MODEL_NAME},
-        "deepseek": {"api_key": sample_key(GPT.DEEPSEEK_API_KEY), "base_url": GPT.DEEPSEEK_BASE_URL, "model_name": GPT.DEEPSEEK_MODEL_NAME},
-        "qwen,qvq,qwq": {"api_key": sample_key(GPT.QWEN_API_KEY), "base_url": GPT.QWEN_BASE_URL, "model_name": GPT.QWEN_MODEL_NAME},
-        "doubao": {"api_key": sample_key(GPT.DOUBAO_API_KEY), "base_url": GPT.DOUBAO_BASE_URL, "model_name": GPT.DOUBAO_MODEL_NAME},
-        "grok": {"api_key": sample_key(GPT.GROK_API_KEY), "base_url": GPT.GROK_BASE_URL, "model_name": GPT.GROK_MODEL_NAME},
-        "kimi": {"api_key": sample_key(GPT.KIMI_API_KEY), "base_url": GPT.KIMI_BASE_URL, "model_name": GPT.KIMI_MODEL_NAME},
-    }
-
-    client = {"http_client": DefaultAsyncHttpxClient(proxy=PROXY.GPT)}
-    if GPT.TIMEOUT is not None:
-        client |= {"timeout": int(GPT.TIMEOUT)}
-
-    model_id_config = {}
-    for prefix, config in model_factory.items():
-        if startswith_prefix(model_id, prefix):
-            model_id_config = config
-            break
-
-    model_name = model_id_config.get("model_name", "")
-    model_id_config.pop("model_name", None)
-    client |= model_id_config
-    completions = {"model": model_id}
-    if GPT.TEMPERATURE is not None:
-        completions |= {"temperature": float(GPT.TEMPERATURE)}
-    return {"friendly_name": model_name, "client": client, "completions": completions}
src/llm/prompts.py
@@ -1,189 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-
-from loguru import logger
-
-from config import TZ
-from utils import nowdt
-
-
-# ruff: noqa: RUF001
-def modify_prompts(context: list[dict], prompt: str, role: str = "system", method: str = "overwrite") -> list[dict]:
-    if role not in ["system", "user", "assistant"]:
-        logger.warning(f"Invalid method of `modify_prompts`: {method}")
-        return context
-    if not context:
-        return [{"role": role, "content": prompt}]
-    if method not in ["overwrite", "append"]:
-        logger.warning(f"Invalid method of `modify_prompts`: {method}")
-        return context
-    if method == "overwrite":
-        if context[0].get("role") == role:
-            context[0]["content"] = prompt
-        else:
-            context.insert(0, {"role": role, "content": prompt})
-    elif method == "append":
-        if role == "system":  # should in the beginning
-            pos = 0
-            for idx, item in enumerate(context):
-                if item["role"] != "system":
-                    pos = idx
-                    break
-            context.insert(pos, {"role": role, "content": prompt})
-        else:
-            context.append({"role": role, "content": prompt})
-    return context
-
-
-def add_search_results_to_prompts(search_results: list[dict], params: dict) -> dict:
-    """Add search results to contexts.
-
-    # Template: https://github.com/deepseek-ai/DeepSeek-R1/tree/ef99616?tab=readme-ov-file#usage-recommendations
-    """
-    search_msg = ""
-    for idx, result in enumerate(search_results):
-        search_msg += f"[webpage {idx + 1} begin] {result} [webpage {idx + 1} end]\n"
-
-    # modified from DeepSeek's official instructions
-    prompt = f"""# 以下内容是基于用户发送的消息的搜索结果:
-{search_msg}
-在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的,X代表每篇文章的数字索引。
-在回答时,请注意以下几点:
-- 今天是{nowdt(TZ):%Y-%m-%d}。
-- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
-- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内,并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项;如非必要,不要主动告诉用户搜索结果未提供的内容。
-- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
-- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。
-- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
-- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
-- 请在适当的情况下在句子末尾引用上下文。请按照引用编号 [[X]](url) 的格式在答案中对应部分引用上下文。
-- 如果一句话源自多个上下文,请列出所有相关的引用编号,例如[[1]](url1) [[2]](url2),切记不要将引用集中在最后返回,而是在答案对应部分列出。
-- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。
-- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
-
-# 用户消息为:
-"""
-    contexts = params["messages"]
-    # last context is text
-    if isinstance(contexts[-1]["content"], str):
-        contexts[-1]["content"] = f"{prompt}{contexts[-1]['content']}"
-    else:  # list, multi-modality
-        contexts[-1]["content"].insert(0, {"type": "text", "text": prompt})
-    params["messages"] = contexts
-    return params
-
-
-def refine_prompts(contexts: list[dict]) -> list[dict]:
-    contexts = combine_consecutive_role(contexts)
-    return simplify_text_prompts(contexts)
-
-
-def combine_consecutive_role(contexts: list[dict]) -> list[dict]:
-    """Combine consecutive user and assistant contexts into one message.
-
-    Some GPT models don't support consecutive user and assistant contexts. (e.g. Hunyuan)
-
-    Args:
-        contexts (list[dict]): [
-            {
-                "role": "user or assistant",
-                "content": [
-                    {'type': 'text', 'text': 'caption this img'},
-                    {'type': 'image_url', 'image_url': {'url': 'data:image/jpeg;base64,base64_image'}},
-                    {'type': 'image_url', 'image_url': {'url': 'https://server.com/dir/image.jpg'}},
-                ]
-            }
-        ]
-    """
-    contexts = convert_content_to_list_dict(contexts)
-    combined_contexts = []
-    for i, msg in enumerate(contexts):
-        if i == 0:
-            combined_contexts.append(msg)
-            continue
-        if msg["role"] == combined_contexts[-1]["role"]:
-            combined_contexts[-1]["content"].extend(msg["content"])
-        else:
-            combined_contexts.append(msg)
-    return combined_contexts
-
-
-def simplify_text_prompts(contexts: list[dict]) -> list[dict]:
-    """Simplify the plain text content format.
-
-    Some models do not support this format:
-        [{'text': 'hi', 'type': 'text'}], 'role': 'user'}]
-
-    It only supports:
-        [{'content': 'hi', 'role': 'user'}]
-
-    Args:
-        contexts (list[dict]): [
-            {
-                "role": "user or assistant",
-                "content": [
-                    {'type': 'text', 'text': 'caption this img'},
-                ]
-            }
-        ]
-
-    Returns:
-        list[dict]: [
-            {
-                "role": "user or assistant",
-                "content": "caption this img"
-            }
-        ]
-    """
-    fixed_contexts = []
-    for msg in contexts:
-        if not msg.get("content") or not isinstance(msg.get("content"), list):
-            fixed_contexts.append(msg)
-            continue
-        contents = msg.get("content", [])
-        if all(x.get("type") == "text" for x in contents):
-            msg["content"] = "\n".join([x.get("text") for x in contents])
-            fixed_contexts.append(msg)
-        else:
-            fixed_contexts.append(msg)
-    return fixed_contexts
-
-
-def convert_content_to_list_dict(contexts: list[dict]) -> list[dict]:
-    """Reverse `simplify_text_prompts` function.
-
-    Returns:
-        contexts (list[dict]): [
-            {
-                "role": "user or assistant",
-                "content": [
-                    {'type': 'text', 'text': 'caption this img'},
-                ]
-            }
-        ]
-    """
-    fixed_contexts = []
-    for msg in contexts:
-        if not msg.get("content") or isinstance(msg.get("content"), list):
-            fixed_contexts.append(msg)
-            continue
-        content = msg.get("content", "")
-        if isinstance(content, str):
-            msg["content"] = [{"type": "text", "text": content}]
-            fixed_contexts.append(msg)
-        else:
-            fixed_contexts.append(msg)
-    return fixed_contexts
-
-
-def remove_prompt_from_contexts(contexts: list[dict], prompt: str) -> list[dict]:
-    """Remove the prompt from the contexts."""
-    for msg in contexts:
-        if isinstance(msg.get("content"), str):
-            msg["content"] = msg["content"].replace(prompt, "").strip()
-        elif isinstance(msg.get("content"), list):
-            for content in msg["content"]:
-                if content.get("type") == "text":
-                    content["text"] = content["text"].replace(prompt, "").strip()
-    return contexts
src/llm/response.py
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import contextlib
-import json
-
-from glom import Coalesce, glom
-from loguru import logger
-from openai import AsyncOpenAI
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
-
-from config import GPT
-from llm.hooks import pre_hooks
-from llm.utils import REASONING_BEGIN, REASONING_END, add_search_results_to_response, beautify_llm_response, beautify_model_name, extract_reasoning
-from messages.progress import modify_progress
-
-
-async def send_to_gpt(config: dict, retry: int = 0, **kwargs) -> dict[str, str]:
-    """Get GPT response in non-stream mode.
-
-    # See `llm/README.md` for more details.
-
-    Args:
-        config: dict, contains model configuration
-        retry: int, number of retries
-
-    Returns:
-        {"content": str, "reasoning": str, "model": str}
-    """
-    try:
-        pre_hooks(config["client"], config["completions"], message_info=kwargs.get("message_info"))
-        openai = AsyncOpenAI(**config["client"])
-        logger.trace(config)
-        resp = await openai.chat.completions.create(**config["completions"])
-        resp = resp.model_dump()
-        error = await parse_error(resp, retry, **kwargs)
-        if error["retry"]:
-            return await send_to_gpt(config, retry=retry + 1, **kwargs)
-        if not error["error"]:
-            return await parse_response(config, resp)
-    except Exception as e:
-        error = f"🤖{config['friendly_name']}请求失败, 重试次数: {retry + 1}/{GPT.MAX_RETRY + 1}\n{e}"
-        logger.error(error)
-        await modify_progress(text=error, force_update=True, **kwargs)
-        if retry < GPT.MAX_RETRY:
-            return await send_to_gpt(config, retry=retry + 1, **kwargs)
-    return {"content": "", "reasoning": ""}
-
-
-async def parse_error(resp: dict, retry: int, **kwargs) -> dict:
-    """Parse GPT error.
-
-    Returns:
-        {"error": bool, "retry": bool}
-    """
-    error_result = {"error": False, "retry": False}
-    error_code = glom(resp, "error.code", default=0)
-    error_msg = ""
-    content = ""
-    tool_call = {}
-    with contextlib.suppress(Exception):
-        metadata = glom(resp, "error.metadata.raw", default="{}")
-        error_msg = glom(json.loads(metadata), "error.message", default="")
-        choice = glom(resp, "choices.0", default={})
-        content = glom(choice, "message.content", default="") or ""
-        tool_call = glom(choice, "message.tool_calls.0", default={})
-    if error_code != 0 or not (content or tool_call):
-        logger.warning(resp)
-        error_result["error"] = True
-        await modify_progress(text=f"[{error_code}] {error_msg}\n重试次数: {retry + 1}/{GPT.MAX_RETRY + 1}", force_update=True, **kwargs)
-        if retry < GPT.MAX_RETRY:
-            error_result["retry"] = True
-    return error_result
-
-
-async def parse_response(config: dict, response: dict) -> dict[str, str]:
-    """Parse GPT response.
-
-    Returns:
-        {"content": str, "reasoning": str, "model": str}
-    """
-    logger.debug(response)
-    choice = glom(response, "choices.0", default={})
-    if glom(choice, "message.tool_calls.0", default={}):  # this is a function call response
-        return response | {"content": "", "reasoning": ""}
-    try:
-        content = glom(choice, "message.content", default="") or ""
-        content = add_search_results_to_response(config.get("search_results", []), content)
-
-        # parse reasoning
-        reasoning, content = extract_reasoning(content)  # extract reasoning from content (<think>...</think>)
-        if not reasoning:
-            reasoning = glom(choice, Coalesce("message.reasoning_content", "message.reasoning"), default="") or ""
-        if reasoning and str(reasoning) != "None":  # add expandable block quotation mark for reasoning
-            # if change this line, remember to remove the reasoning from contexts (`llm/contexts.py`)
-            reasoning = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{reasoning.strip()}{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}"
-
-        primary_model = glom(config, "completions.model", default="") or ""
-        used_model = glom(response, "model", default="") or ""
-        response = {
-            "content": beautify_llm_response(content.strip()),
-            "model": config["friendly_name"],
-            "reasoning": reasoning.strip(),
-        }
-        if not (used_model in primary_model or primary_model in used_model):
-            # do not use `!=` to compare. (deepseek/deepseek-r1:free != deepseek/deepseek-r1,  gpt-4o != gpt-4o-2024-07-18)
-            used_model = beautify_model_name(used_model)
-            logger.warning(f"Fallback model {primary_model} -> {used_model}")
-            response["model"] = used_model
-    except Exception as e:
-        logger.error(f"Parse  GPT response failed: {e}")
-        raise
-    return response
src/llm/response_stream.py
@@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import contextlib
-import json
-import re
-
-from glom import glom
-from loguru import logger
-from openai import AsyncOpenAI
-from pyrogram.client import Client
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
-from pyrogram.types import Message, ReplyParameters
-
-from config import GPT, TEXT_LENGTH
-from llm.hooks import pre_hooks
-from llm.utils import BOT_TIPS, REASONING_BEGIN, REASONING_END, add_search_results_to_response, beautify_llm_response, split_reasoning
-from messages.progress import modify_progress
-from messages.utils import blockquote, count_without_entities, smart_split
-
-
-async def send_to_gpt_stream(
-    client: Client,
-    status_msg: Message | None,
-    config: dict,
-    prefix: str | None = None,
-    *,
-    retry: int = 0,
-    silent: bool = False,
-    remove_thinking: bool = True,
-    single_thinking_msg: bool = True,
-    system_prompt: str | None = None,
-    **kwargs,
-) -> dict:
-    """Get GPT response in stream mode.
-
-    Args:
-        single_thinking_msg (bool, optional): Only use one message for displaying thinking.
-        remove_thinking (bool, optional): Remove thinking parts once finished.
-
-    Returns:
-        dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
-    """
-    if prefix is None:
-        prefix = f"🤖**{config['friendly_name']}**:{BOT_TIPS}\n"
-
-    answers = ""  # all model responses
-    thoughts = ""  # all model thoughts
-    runtime_texts = ""  # for a single telegram message
-    if silent:
-        status_msg = None
-    status_cid = status_msg.chat.id if isinstance(status_msg, Message) else 0
-    status_mid = status_msg.id if isinstance(status_msg, Message) else 0
-    sent_messages = []
-    try:
-        pre_hooks(config["client"], config["completions"], message_info=kwargs.get("message_info"), system_prompt=system_prompt)
-        openai = AsyncOpenAI(**config["client"])
-        logger.trace(config)
-        is_reasoning = False
-        is_reasoning_conversation = None  # 用于指示是否是推理对话
-        gen = await openai.chat.completions.create(**config["completions"], stream=True)
-        length = 0
-        async for chunk in gen:
-            resp = chunk.model_dump()
-            logger.trace(resp)
-            error = await parse_error(resp, retry, **kwargs)
-            if error["retry"]:
-                return await send_to_gpt_stream(
-                    client,
-                    status_msg,
-                    config,
-                    prefix=prefix,
-                    retry=retry + 1,
-                    silent=silent,
-                    remove_thinking=remove_thinking,
-                    single_thinking_msg=single_thinking_msg,
-                    system_prompt=system_prompt,
-                    **kwargs,
-                )
-            if error["error"]:
-                await modify_progress(message=status_msg, text=error["error"], force_update=True, **kwargs)
-                return {}
-            answer = glom(resp, "choices.0.delta.content", default="") or ""
-            thinking = glom(resp, "choices.0.delta.reasoning_content", default="") or ""
-            if is_reasoning_conversation is None and thinking:
-                is_reasoning_conversation = True
-            if thinking and not is_reasoning:  # 首次收到推理内容
-                is_reasoning = True
-                runtime_texts += f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{thinking.lstrip()}"
-            elif thinking and is_reasoning:  # 收到推理内容且正在思考
-                runtime_texts += thinking
-            elif is_reasoning_conversation is True and is_reasoning:  # 收到回答, 关闭推理标志
-                is_reasoning = False
-                runtime_texts = answer.lstrip() if remove_thinking else f"{runtime_texts.rstrip()}{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}\n" + answer.lstrip()
-            else:
-                runtime_texts += answer
-
-            # Sometimes the reasoning content is included in the content field.
-            # handle "<think>...</think>\n\n"
-            if runtime_texts.removeprefix(prefix).lstrip().startswith("<think>"):
-                is_reasoning = True
-                runtime_texts = runtime_texts.replace("<think>", f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}")
-            if "</think>" in runtime_texts:
-                is_reasoning = False
-                runtime_texts = re.sub(r"</think>\s*", f"{REASONING_END}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}", runtime_texts, count=1)
-
-            thoughts += thinking
-            answers += answer
-            runtime_texts = beautify_llm_response(runtime_texts)
-            length = await count_without_entities(prefix + runtime_texts)
-            if length <= TEXT_LENGTH - 10:  # leave some flexibility
-                if len(runtime_texts.removeprefix(prefix)) > 10:  # start response if answer is not empty
-                    await modify_progress(message=status_msg, text=prefix + runtime_texts, detail_progress=True)
-            else:  # answers is too long, split it into multiple messages
-                parts = await smart_split(prefix + runtime_texts)
-                if len(parts) == 1:
-                    continue
-                if is_reasoning and single_thinking_msg:
-                    runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{parts[-1].lstrip()}"  # remove previous thinking
-                    await modify_progress(message=status_msg, text=parts[0], force_update=True)  # force send the first part
-                else:
-                    await modify_progress(message=status_msg, text=blockquote(parts[0]), force_update=True)  # force send the first part
-                    runtime_texts = parts[-1]  # keep the last part
-                    if is_reasoning:
-                        runtime_texts = f"{BLOCKQUOTE_EXPANDABLE_DELIM}{REASONING_BEGIN}{runtime_texts.lstrip()}"
-                    if not silent:
-                        status_msg = await client.send_message(status_cid, text=prefix + runtime_texts, reply_parameters=ReplyParameters(message_id=status_mid))  # the new message
-                        sent_messages.append(status_msg)
-                        status_mid = status_msg.id
-
-        # all chunks are processed
-        if not answers.strip() and not thoughts.strip():  # empty response
-            return await send_to_gpt_stream(
-                client,
-                status_msg,
-                config,
-                prefix=prefix,
-                retry=retry + 1,
-                silent=silent,
-                remove_thinking=remove_thinking,
-                single_thinking_msg=single_thinking_msg,
-                system_prompt=system_prompt,
-                **kwargs,
-            )
-
-        if not thoughts:  # no structured thinking in response
-            thoughts, answers = split_reasoning(answers)
-
-        answers = add_search_results_to_response(config.get("search_results", []), answers)
-        final_thoughts = "" if remove_thinking else thoughts
-        if await count_without_entities(prefix + final_thoughts + answers) <= TEXT_LENGTH - 10:  # short answer in single msg
-            if length > GPT.COLLAPSE_LENGTH:  # collapse the response if the answer is too long
-                quoted = REASONING_BEGIN + final_thoughts.strip() + REASONING_END + "\n\n" + answers.strip() if final_thoughts.strip() else answers.strip()
-                await modify_progress(message=status_msg, text=f"{prefix}{blockquote(quoted)}", force_update=True)
-            else:
-                quoted = blockquote(REASONING_BEGIN + final_thoughts.strip() + REASONING_END) + "\n" if final_thoughts.strip() else ""
-                await modify_progress(message=status_msg, text=f"{prefix}{quoted}{answers}", force_update=True)
-        # total length is too long, answers are splitted into multiple messages
-        elif length > GPT.COLLAPSE_LENGTH:
-            await modify_progress(message=status_msg, text=prefix + blockquote(runtime_texts), force_update=True)
-        else:
-            await modify_progress(message=status_msg, text=prefix + runtime_texts, force_update=True)
-
-    except Exception as e:
-        error = f"🤖{config['friendly_name']}请求失败, 重试次数: {retry + 1}/{GPT.MAX_RETRY + 1}\n{e}"
-        if "resp" in locals():
-            error += f"\n{resp}"  # type: ignore
-        logger.error(error)
-        with contextlib.suppress(Exception):
-            await modify_progress(text=error, force_update=True, **kwargs)
-            [await modify_progress(msg, del_status=True) for msg in sent_messages]
-        if retry < GPT.MAX_RETRY:
-            return await send_to_gpt_stream(
-                client,
-                status_msg,
-                config,
-                prefix=prefix,
-                retry=retry + 1,
-                silent=silent,
-                remove_thinking=remove_thinking,
-                single_thinking_msg=single_thinking_msg,
-                system_prompt=system_prompt,
-                **kwargs,
-            )
-    return {"texts": answers, "thoughts": thoughts, "prefix": prefix, "model_name": config["friendly_name"], "sent_messages": sent_messages}
-
-
-async def parse_error(resp: dict, retry: int, **kwargs) -> dict:
-    """Parse GPT error.
-
-    Returns:
-        {"error": "msg", "retry": bool}
-    """
-    error_result = {"error": "", "retry": False}
-    error_code = glom(resp, "error.code", default=0)
-    error_msg = ""
-    content = None
-    reasoning_content = None
-    is_finished = False
-    finish_reason = ""
-    tool_call = {}
-    with contextlib.suppress(Exception):
-        metadata = glom(resp, "error.metadata.raw", default="{}")
-        error_msg = glom(json.loads(metadata), "error.message", default="")
-        choice = glom(resp, "choices.0", default={})
-        content = glom(choice, "delta.content", default=None)
-        reasoning_content = glom(choice, "delta.reasoning_content", default=None)
-        tool_call = glom(choice, "delta.tool_calls.0", default=None)
-        finish_reason = glom(choice, "finish_reason", default=None)
-        is_finished = str(finish_reason) == "stop"
-    if is_finished or any(x is not None for x in [content, reasoning_content, tool_call]):
-        return {"error": "", "retry": False}
-    if error_code != 0:
-        logger.warning(resp)
-        error_result["error"] = error_msg
-        await modify_progress(text=f"[{error_code}] {error_msg}\n重试次数: {retry + 1}/{GPT.MAX_RETRY + 1}", force_update=True, **kwargs)
-        if retry < GPT.MAX_RETRY:
-            error_result["retry"] = True
-    if finish_reason is not None:
-        error_result["error"] = finish_reason
-    return error_result
src/llm/text2img.py
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from collections import defaultdict
-
-from glom import glom
-from loguru import logger
-from pyrogram.client import Client
-from pyrogram.types import Message
-
-from config import PREFIX, TEXT2IMG
-from llm.ali.text2img import ali_text2img
-from llm.ali.zimage import zimage_text2img
-from llm.cloudflare.text2img import cloudflare_text2img
-from llm.doubao.text2img import doubao_genimg
-from llm.gemini.text2img import gemini_text2img
-from llm.utils import enabled_providers
-from utils import strings_list
-
-TEXT2IMG_HELP = f"""🌠**AI生图**
-`{PREFIX.GENIMG}` 后接提示词即可生成
-
-⚙️模型配置:
-- `{PREFIX.GENIMG}`: 默认模型 ({TEXT2IMG.DEFAULT_MODEL})
-- `/z`: 阿里Z-Image
-- `/sd`: 豆包Seedream
-- `/flux`: Flux
-- `/stable`: Stable Diffusion
-- `/nano`: Gemini Nano Banana
-
-上下文说明:
-- Gemini模型会把整个回复消息链上的所有消息加入上下文
-- 豆包模型仅会把当前消息及回复的最近一条消息加入上下文
-- 其余模型不会把历史消息加入上下文
-"""
-
-# /sd, /flux, /stable, /nano等命令是通过"别名"功能实现的 (src/others/alias.py)
-# 完整调用方式为 `/gen @doubao`, `/gen @flux`, ...
-
-
-async def text2img(client: Client, message: Message, **kwargs) -> dict:
-    """Text to image generation.
-
-    Args:
-        client (Client): The Pyrogram client.
-        message (Message): The trigger message object.
-        system_prompt (str | None, optional): System prompt. Defaults to None.
-
-    Returns:
-        dict: {"texts": str, "thoughts": str, "prefix": str, "model_name": str, "sent_messages": list[Message]}
-    """
-    texts = message.content.removeprefix(PREFIX.GENIMG).strip()
-    all_models = enabled_models()
-    if not all_models:
-        return {}
-    categories = list(all_models)  # ['gemini', 'flux', 'sd']
-    models = all_models.get(TEXT2IMG.DEFAULT_MODEL, [])
-    prompt = texts
-    if glom(message, "reply_to_message.content", default="").startswith(f"🍌{TEXT2IMG.GEMINI_MODEL.title()}"):
-        models = all_models.get("gemini", [])
-    for category in categories:
-        if texts.lower().startswith(f"@{category}"):
-            models = all_models[category]
-            prompt = texts.removeprefix(f"@{category}").strip()
-            break
-
-    for model in models:
-        provider, model_id = model.split("/", 1)
-        try:
-            if provider == "gemini":
-                await gemini_text2img(client, message, model_id, prompt, **kwargs)
-            elif provider == "ali":
-                await ali_text2img(client, message, model_id, prompt, **kwargs)
-            elif provider == "cloudflare":
-                await cloudflare_text2img(client, message, model_id, prompt, **kwargs)
-            elif provider == "doubao":
-                await doubao_genimg(client, message, model_id, prompt, **kwargs)
-            elif provider == "zimage":
-                await zimage_text2img(client, message, prompt, **kwargs)
-        except Exception as e:
-            logger.error(e)
-    return {}
-
-
-def enabled_models() -> dict[str, list]:
-    """Get all enabled text to image generation model ids.
-
-    model_id format: {provider}/{real_model_id}
-
-    Returns:
-        dict[str,list]: {
-            "gemini": ["gemini/gemini-2.0-flash"],
-            "flux": ["ali/flux-dev", "cloudflare/@cf/black-forest-labs/flux-1-schnell],
-            "sd": ["ali/stable-diffusion-3.5-large", "cloudflare/@cf/bytedance/stable-diffusion-xl-lightning"]}
-    """
-    models = defaultdict(list)
-    _, img_providers = enabled_providers()
-    for provider in img_providers:
-        if provider == "gemini":
-            models["gemini"] = [f"gemini/{TEXT2IMG.GEMINI_MODEL}"]
-        if provider == "ali" and TEXT2IMG.ALI_FLUX_MODEL and "ali" in strings_list(TEXT2IMG.FLUX_PROVIDER):
-            models["flux"].extend([f"ali/{model}" for model in strings_list(TEXT2IMG.ALI_FLUX_MODEL)])
-        if provider == "ali" and TEXT2IMG.ALI_STABLE_DIFFUSION_MODEL and "ali" in strings_list(TEXT2IMG.STABLE_DIFFUSION_PROVIDER):
-            models["sd"].extend([f"ali/{model}" for model in strings_list(TEXT2IMG.ALI_STABLE_DIFFUSION_MODEL)])
-        if provider == "cloudflare" and TEXT2IMG.CF_FLUX_MODEL and "cloudflare" in strings_list(TEXT2IMG.FLUX_PROVIDER):
-            models["flux"].extend([f"cloudflare/{model}" for model in strings_list(TEXT2IMG.CF_FLUX_MODEL)])
-        if provider == "cloudflare" and TEXT2IMG.CF_STABLE_DIFFUSION_MODEL and "cloudflare" in strings_list(TEXT2IMG.STABLE_DIFFUSION_PROVIDER):
-            models["sd"].extend([f"cloudflare/{model}" for model in strings_list(TEXT2IMG.CF_STABLE_DIFFUSION_MODEL)])
-        if provider == "doubao" and TEXT2IMG.DOUBAO_SEEDREAM_MODEL:
-            models["doubao"].extend([f"doubao/{model}" for model in strings_list(TEXT2IMG.DOUBAO_SEEDREAM_MODEL)])
-        if provider == "zimage" and TEXT2IMG.ZIMAGE_API_URL:
-            models["zimage"].extend(["zimage/Z-Image"])
-    return models
src/llm/tool_scheme.py
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-ONLINE_SEARCH = {
-    "type": "function",
-    "function": {
-        "name": "get_online_search_result",
-        "description": "获取联网搜索结果",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "query": {"description": "联网搜索关键词", "type": "string"},
-            },
-            "required": ["query"],
-        },
-    },
-}
src/llm/tools.py
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import copy
-import json
-
-from glom import glom
-from loguru import logger
-
-from config import GPT, TOKEN, TZ
-from llm.prompts import add_search_results_to_prompts, modify_prompts
-from llm.response import send_to_gpt
-from llm.tool_scheme import ONLINE_SEARCH
-from messages.progress import modify_progress
-from others.search_google import query_google
-from utils import nowdt
-
-
-async def get_online_search_result(query: str) -> list[dict]:
-    results = []
-    if GPT.PRIMARY_SEARCH_ENGINE == "google":
-        results = await google_search(query)
-        if not results:
-            return []
-    return results
-
-
-async def google_search(query: str) -> list[dict]:
-    res = await query_google(query)
-    if not res:
-        return []
-    keep_keys = ["title", "link", "snippet", "mime"]
-    results = [{k: v for k, v in x.items() if k in keep_keys} for x in res]
-    return results[: int(GPT.SEARCH_NUM_RESULTS)]
-
-
-def add_tools(params: dict) -> dict:
-    """Add tools for GPT.
-
-    Args:
-        params: dict, params for `openai.chat.completions.create()`
-
-    Returns:
-        tools_params:
-            {
-                "tools": [{tool_1}, {tool_2}, ...],  # list of dict
-                "tool_choice": "auto",
-            }
-    """
-    tools = []
-    if GPT.PRIMARY_SEARCH_ENGINE == "google" and TOKEN.GOOGLE_SEARCH_API_KEY and TOKEN.GOOGLE_SEARCH_CX:
-        tools = [ONLINE_SEARCH]
-        system_prompt = f"你是一个具备网络访问能力的智能助手. 在需要时可以访问互联网进行相关搜索获取信息以确保用户得到最新、准确的帮助。当前时间是{nowdt(TZ):%Y-%m-%d}"
-        params["messages"] = modify_prompts(params["messages"], system_prompt, method="overwrite")
-
-    if tools:
-        params["tools"] = tools
-        params["tool_choice"] = "auto"
-    return params
-
-
-def remove_tool(params: dict, tool_name: str) -> dict:
-    """Remove tool from contexts.
-
-    Returns: list[dict]
-    """
-    if tool_name.upper() == "ALL":
-        params.pop("tools", None)
-        params.pop("tool_choice", None)
-        return params
-
-    keep_tools = [tool for tool in params.get("tools", []) if tool.get("function", {}).get("name") != tool_name]
-
-    if keep_tools:
-        params["tools"] = keep_tools
-    else:
-        params.pop("tools", None)
-        params.pop("tool_choice", None)
-    return params
-
-
-async def merge_tools_response(config: dict, **kwargs) -> tuple[dict, dict]:
-    """Use tool model to get function call result.
-
-    if no function call is triggered, return original config and the tool model response.
-    otherwise, return modified config and an empty response.
-
-    Returns:
-        (config, response)
-    """
-    if not GPT.TOOLS_API_KEY:
-        return config, {}
-    if any(x in config["completions"]["model"].lower() for x in ["search", "搜索"]):  # skip search model
-        return config, {}
-    # tool model should be fast and cheap
-    tool_completions = {
-        "model": GPT.TOOLS_MODEL,
-        "messages": copy.deepcopy(config["completions"]["messages"]),
-    }
-    tool_completions = add_tools(tool_completions)
-    tool_client = {k: v for k, v in config["client"].items() if k != "http_client"} | {"base_url": GPT.TOOLS_BASE_URL, "api_key": GPT.TOOLS_API_KEY}
-    tools_config = {
-        "friendly_name": config["friendly_name"],
-        "client": tool_client,
-        "completions": tool_completions,
-    }
-    try:
-        response = await send_to_gpt(tools_config, retry=0, **kwargs)
-        tool_call = glom(response, "choices.0.message.tool_calls.0", default={})
-        if not tool_call or glom(tool_call, "function.name", default="") != "get_online_search_result":
-            return config, response
-        args = json.loads(glom(tool_call, "function.arguments", default="{}"))
-        logger.debug(f"Online search tool call args: {args}")
-        await modify_progress(text=f"正在联网搜索信息:\n{args.get('query', '')}", force_update=True, **kwargs)
-        if tool_result := await get_online_search_result(**args):
-            config["completions"] = add_search_results_to_prompts(tool_result, config["completions"])
-            config["search_results"] = tool_result  # save search results for future use
-            return config, {}
-    except Exception as e:
-        logger.error(f"Tools_response failed: {e}")
-    return config, {}
src/llm/utils.py
@@ -1,370 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import ast
-import random
-import re
-import tempfile
-from datetime import datetime
-from pathlib import Path
-
-import markdown
-import tiktoken
-from google import genai
-from google.genai.types import HttpOptions
-from loguru import logger
-from markitdown import MarkItDown
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
-
-from config import DOWNLOAD_DIR, GEMINI, GPT, PREFIX, TEXT2IMG
-from utils import nowdt, number_to_emoji, read_text, remove_consecutive_newlines, remove_dash, remove_pound, strings_list, zhcn
-
-BOT_TIPS = "(回复以继续)"  # noqa: RUF001
-REASONING_BEGIN = "🤔"  # use emoji to separate model reasoning and content
-REASONING_END = "💡"
-
-
-def enabled_providers() -> tuple[list[str], list[str]]:
-    """Get enabled providers.
-
-    Returns:
-        (text_providers, img_providers)
-    """
-    text_providers = []
-    if all([GPT.OPENAI_MODEL, GPT.OPENAI_MODEL_NAME, GPT.OPENAI_API_KEY, GPT.OPENAI_BASE_URL]):
-        text_providers.append("openai")
-    if all([GPT.GROK_MODEL, GPT.GROK_MODEL_NAME, GPT.GROK_API_KEY, GPT.GROK_BASE_URL]):
-        text_providers.append("grok")
-    if all([GPT.DEEPSEEK_MODEL, GPT.DEEPSEEK_MODEL_NAME, GPT.DEEPSEEK_API_KEY, GPT.DEEPSEEK_BASE_URL]):
-        text_providers.append("deepseek")
-    if all([GPT.QWEN_MODEL, GPT.QWEN_MODEL_NAME, GPT.QWEN_API_KEY, GPT.QWEN_BASE_URL]):
-        text_providers.append("qwen")
-    if all([GPT.DOUBAO_MODEL, GPT.DOUBAO_MODEL_NAME, GPT.DOUBAO_API_KEY, GPT.DOUBAO_BASE_URL]):
-        text_providers.append("doubao")
-    if all([GPT.KIMI_MODEL, GPT.KIMI_MODEL_NAME, GPT.KIMI_API_KEY, GPT.KIMI_BASE_URL]):
-        text_providers.append("kimi")
-    if all([GEMINI.API_KEY, GEMINI.BASE_URL, GEMINI.TEXT_MODEL, GEMINI.TEXT_MODEL_NAME]):
-        text_providers.append("gemini")
-
-    img_providers = []
-    if all([TEXT2IMG.GEMINI_API_KEY, TEXT2IMG.GEMINI_BASE_URL, TEXT2IMG.GEMINI_MODEL]):
-        img_providers.append("gemini")
-    if all([TEXT2IMG.ALI_API_KEY]):
-        img_providers.append("ali")
-    if all([TEXT2IMG.CF_API_KEY]):
-        img_providers.append("cloudflare")
-    if all([TEXT2IMG.DOUBAO_API_KEY, TEXT2IMG.DOUBAO_SEEDREAM_MODEL]):
-        img_providers.append("doubao")
-    if all([TEXT2IMG.ZIMAGE_API_URL]):
-        img_providers.append("zimage")
-    return text_providers, img_providers
-
-
-def llm_cleanup_files(messages: list[dict]):
-    """Clean downloaded files.
-
-    [
-      {
-          'role': 'user',
-          'content': [
-              {'type': 'text', 'text': 'text'},
-              {'type': 'image_url', 'image_url': {'url': 'https://server.com/dir/image.jpg'}},
-              {"type": "video_url", "video_url": {"url": "https://server.com/dir/video.mp4"}},  # 智谱
-              {"type": "video", "video": ["https://server.com/dir/1.jpg","https://server.com/dir/2.jpg","https://server.com/dir/3.jpg"]},  # 千问
-              {"audio": "https://server.com/dir/audio.mp3"},
-            ]
-      }
-    ]
-    """
-    for item in messages:
-        content = item.get("content", [])
-        if not isinstance(content, list):
-            continue
-        for x in content:
-            if url := x.get("image_url", {}).get("url"):
-                (Path(DOWNLOAD_DIR) / Path(url).name).unlink(missing_ok=True)
-            if url := x.get("video_url", {}).get("url"):
-                (Path(DOWNLOAD_DIR) / Path(url).name).unlink(missing_ok=True)
-            if urls := x.get("video", []):
-                for url in urls:
-                    (Path(DOWNLOAD_DIR) / Path(url).name).unlink(missing_ok=True)
-            if url := x.get("audio"):
-                (Path(DOWNLOAD_DIR) / Path(url).name).unlink(missing_ok=True)
-
-
-def count_tokens(string: str, encoding_name: str | None = None) -> int:
-    """Returns the number of tokens in a text string."""
-    if not string:
-        return 0
-    if encoding_name is None:
-        encoding_name = GPT.TOKEN_ENCODING
-    try:
-        encoding = tiktoken.get_encoding(encoding_name)
-        return len(encoding.encode(string))
-    except Exception as e:
-        logger.error(f"Error in count_tokens: {e}")
-        return 0
-
-
-def parse_as_dict(s: str, need_prefix: str | None = None) -> dict:
-    """Parse the given string as a dictionary.
-
-    If `need_prefix` is provided, only parse the string if it starts with `need_prefix`.
-    """
-    if need_prefix is not None:
-        if not s.startswith(need_prefix):
-            return {}
-        s = s[len(need_prefix) :]
-    s = re.sub(r"\btrue\b", "True", s)
-    s = re.sub(r"\bfalse\b", "False", s)
-    s = re.sub(r"\bnull\b", "None", s)
-    try:
-        data = ast.literal_eval(s)
-        if isinstance(data, dict):
-            return data
-    except (ValueError, SyntaxError):
-        return {}
-    return {}
-
-
-def beautify_model_name(name: str) -> str:
-    """Beautify model name.
-
-    Args:
-        name: model name
-    Returns:
-        beautified model name
-    """
-    if not name:
-        return name
-    # example: openai/gpt-4o:online
-
-    # remove suffix ":"
-    parts = name.split(":")
-    if len(parts) > 1:
-        name = "".join(parts[:-1])  # openai/gpt-4o
-
-    # remove prefix "/"
-    name = name.split("/")[-1]  # gpt-4o
-    # remove "-latest"
-    name = name.replace("-latest", "")
-    # remove "-exp"
-    name = name.replace("-experimental", "").replace("-exp", "")
-    # remove "-preview"
-    name = name.replace("-preview", "")
-
-    return name.replace("gpt", "GPT").replace("gemini", "Gemini").replace("deepseek", "DeepSeek")  # GPT-4o
-
-
-def beautify_llm_response(text: str, newline_level: int = 3) -> str:
-    """Beautify LLM response.
-
-    Args:
-        text: LLM response
-    Returns:
-        beautified LLM response
-    """
-    if not text:
-        return text
-    clean_text = clean_source_marks(text)
-    clean_text = remove_pound(clean_text)
-    clean_text = remove_dash(clean_text)
-    clean_text = zhcn(clean_text)
-    return remove_consecutive_newlines(clean_text, newline_level)
-
-
-def clean_source_marks(text: str) -> str:
-    """Remove [username], [message], ... marks.
-
-    Should align with the tags in `contexts.py`
-    """
-    if not text:
-        return text
-    clean_text = ""
-    for line in text.split("\n"):
-        if line.strip().startswith(("[username]:", "[filename]:", "[fileowner]:")):
-            continue
-        if line.strip() in ["[message]:", "[file content]:"]:
-            continue
-        clean_text += line + "\n"
-    return clean_text.removesuffix("\n")  # remove the last newline
-
-
-def extract_reasoning(text: str) -> tuple[str, str]:
-    """Extract reasoning from text.
-
-    "<think>
-    {reasoning_content}
-    </think>
-
-    {content}"
-    """
-    reasoning = ""
-    if matched := re.search(r"^<think>(.*?)</think>", text.lstrip(), flags=re.DOTALL):
-        reasoning = matched.group(1)
-        text = re.sub(r"<think>(.*?)</think>", "", text, count=1, flags=re.DOTALL)  # remove <think>...</think>
-    if matched := re.search(r"^<thinking>(.*?)</thinking>", text.lstrip(), flags=re.DOTALL):
-        reasoning = matched.group(1)
-        text = re.sub(r"<thinking>(.*?)</thinking>", "", text, count=1, flags=re.DOTALL)
-
-    # Reverse engineered Web API
-    if matched := re.search(r"^>?(正在)?推理(.*?)(,持续.*?)秒\n\n(.*)", text.lstrip(), flags=re.DOTALL):  # noqa: RUF001
-        reasoning = matched.group(2)
-        text = matched.group(4)
-    if matched := re.search(r"^>?\s?Reasoning(.*?)Reasoned(.*?)seconds\n\n(.*)", text.lstrip(), flags=re.DOTALL):
-        reasoning = matched.group(1)
-        text = matched.group(3)
-
-    return reasoning.strip(), text.strip().removeprefix("{content}").strip()
-
-
-def add_search_results_to_response(search_results: list[dict], response: str) -> str:
-    """Add search results to response."""
-    if not search_results or not response:
-        return response
-    response = response.strip()
-    for idx, result in enumerate(search_results):
-        title = result.get("title", "")
-        link = result.get("link", "")
-        if link.startswith("http") and f"({link})" in response:
-            response += f"\n{number_to_emoji(idx + 1)} [{title}]({link})"
-    return response.strip()
-
-
-def image_emoji(capability: bool) -> str:  # noqa: FBT001
-    """Get image capability emoji."""
-    return "🏞" if capability else ""
-
-
-def clean_cmd_prefix(text: str, model_id: str = "") -> str:
-    for prefix in [*strings_list(PREFIX.GPT), PREFIX.GENIMG]:
-        text = text.removeprefix(prefix).lstrip()
-    if model_id:
-        text = text.removeprefix(f"@{model_id}").lstrip()
-    return text
-
-
-def clean_bot_tips(text: str) -> str:
-    if not text:
-        return ""
-    return re.sub(rf"^🤖(.*?){BOT_TIPS}", "", text, flags=re.DOTALL).strip()
-
-
-def clean_reasoning(text: str) -> str:
-    if not text:
-        return ""
-    text = re.sub(rf"{REASONING_BEGIN}(.*?){REASONING_END}", "", text.strip(), flags=re.DOTALL).strip()
-    text = text.removeprefix(BLOCKQUOTE_EXPANDABLE_DELIM).lstrip()
-    return text.removeprefix(BLOCKQUOTE_EXPANDABLE_END_DELIM).lstrip()
-
-
-def clean_context(text: str, model_id: str = "") -> str:
-    """Remove bot prefix and reasoning content."""
-    text = re.sub(r"^👤@.*?\/\/", "", text)  # remove markdown send_from_user
-    text = re.sub(r"^👤\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", text)  # remove html send_from_user
-    text = clean_cmd_prefix(text, model_id)
-    text = clean_bot_tips(text)
-    return clean_reasoning(text)
-
-
-def clean_gemini_sourcemarks(contexts: list[dict]) -> None:
-    """Clean Gemini source marks."""
-    for item in contexts:
-        for part in item.get("parts", []):
-            if part.text:
-                part.text = clean_source_marks(part.text)
-
-
-def convert_md(path: str | Path | None = None, html: str | None = None) -> str:
-    """Convert to markdown format."""
-    md = MarkItDown()
-    if path is not None:
-        path = Path(path).expanduser().resolve()
-        if not path.is_file():
-            return ""
-        result = md.convert(path)
-        return result.text_content
-    if html is not None:
-        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
-            f.write(html)
-        result = md.convert(f.name)
-        Path(f.name).unlink(missing_ok=True)
-        return result.text_content
-    return ""
-
-
-def convert_html(texts: str = "", path: str | Path | None = None) -> str:
-    """Convert to markdown format."""
-    if path is not None:
-        path = Path(path).expanduser().resolve()
-        if not path.is_file():
-            return ""
-        texts = read_text(path)
-    texts = markdown.markdown(texts)
-    return texts.replace("\n", "<br>")
-
-
-def split_reasoning(text: str) -> tuple[str, str]:
-    """Split reasoning from text.
-
-    Args:
-        text: LLM response
-    Returns:
-        (reasoning, content)
-    """
-    text = clean_cmd_prefix(text)
-    text = clean_bot_tips(text)
-    content = clean_reasoning(text)
-    reasoning = ""
-    if matched := re.search(rf"{REASONING_BEGIN}(.*?){REASONING_END}", text, flags=re.DOTALL):
-        reasoning = REASONING_BEGIN + matched.group(1) + REASONING_END
-    return reasoning.strip(), content.strip()
-
-
-def raw_reasoning(text: str) -> str:
-    """Extract raw reasoning from text."""
-    if matched := re.search(rf"{REASONING_BEGIN}(.*?){REASONING_END}", text, flags=re.DOTALL):
-        return matched.group(1)
-    return text
-
-
-def shuffle_keys(keys: str | list[str]) -> str:
-    """Shuffle comma speparated string."""
-    if isinstance(keys, str):
-        keys = [x.strip() for x in keys.split(",") if x.strip()]
-    elif isinstance(keys, list):
-        keys = [x.strip() for x in keys if x.strip()]
-    else:
-        return ""
-    random.shuffle(keys)
-    return ",".join(keys)
-
-
-def sample_key(keys: str | list[str]) -> str:
-    """Sample one key from comma speparated string."""
-    if isinstance(keys, str):
-        keys = [x.strip() for x in keys.split(",") if x.strip()]
-    elif isinstance(keys, list):
-        keys = [x.strip() for x in keys if x.strip()]
-    else:
-        return ""
-    if not keys:
-        return ""
-    return random.choice(keys)
-
-
-async def clean_gemini_files():
-    """Clean Gemini files.
-
-    Gemini allows only 20 GB of data.
-    """
-    if GEMINI.CLEAN_FILES_AFTER_SECONDS >= 48 * 3600:
-        return
-    now = nowdt()
-    for api_key in [x.strip() for x in GEMINI.API_KEY.split(",") if x.strip()]:
-        app = genai.Client(api_key=api_key, http_options=HttpOptions(async_client_args={"proxy": GEMINI.PROXY}))
-        for f in await app.aio.files.list():
-            if isinstance(f.update_time, datetime) and isinstance(f.name, str):
-                delta = now - f.update_time
-                if delta.total_seconds() > GEMINI.CLEAN_FILES_AFTER_SECONDS:
-                    logger.debug(f"Delete Gemini file: {f.name}")
-                    await app.aio.files.delete(name=f.name)
src/messages/help.py
@@ -37,14 +37,14 @@ def social_media_help(chat_id: int | str, ctype: str, prefix: str):
         msg += "\n🅱️哔哩哔哩"
         msg += "\n🆕和所有yt-dlp支持的链接\n"
     if permission["ai"]:
-        msg += f"\n🤖**AI对话**: `{PREFIX.GPT}`"
-        msg += f"\n🌠**AI生图**: `{PREFIX.GENIMG}` + 提示词"
+        msg += f"\n🤖**AI对话**: `{PREFIX.AI_TEXT_GENERATION}`"
+        msg += f"\n🌠**AI生图**: `{PREFIX.AI_IMG_GENERATION}` + 提示词"
         msg += f"\n📖**AI总结**: 发送 `{PREFIX.AI_SUMMARY}` 查看详细教程"
     if permission["asr"]:
         msg += f"\n🗣**语音转文字**: `{PREFIX.ASR}` + 语音消息"
     if permission["tts"]:
         msg += f"\n🗣**文字转语音**: `{PREFIX.TTS}` + 文字"
-    if permission["audio"]:
+    if permission["audio_extract"]:
         msg += f"\n🎧**提取音频或语音**: `{PREFIX.AUDIO}` `{PREFIX.VOICE}` + 视频/语音消息"
     if permission["ocr"]:
         msg += f"\n🔤**图片转文字**: `{PREFIX.OCR}` + 图片消息"
@@ -60,7 +60,7 @@ def social_media_help(chat_id: int | str, ctype: str, prefix: str):
         msg += f"\n🎬**查询影视信息**: `{PREFIX.TMDB}` + 关键词"
     if permission["ytb"]:
         msg += f"\n🔍**搜索YouTube**: `{PREFIX.SEARCH_YOUTUBE}` + 关键词"
-    if permission["google"]:
+    if permission["google_search"]:
         msg += f"\n🔍**搜索Google**: `{PREFIX.SEARCH_GOOGLE}` + 关键词"
     if permission["danmu"]:
         msg += f"\n📖**查询直播合订本**: 发送 `{PREFIX.DANMU}`, `{PREFIX.FAYAN}` 查看详细教程"
src/messages/main.py
@@ -6,14 +6,14 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
+from ai.chat_summary import ai_chat_summary
+from ai.main import ai_image_generation, ai_text_generation
 from asr.voice_recognition import voice_to_text
 from bridge.ocr import send_to_ocr_bridge
 from config import FAVORITE, PREFIX, PROXY
 from danmu.entrypoint import query_danmu
 from database.database import del_db
 from history.query import query_chat_history
-from llm.gpt import gpt_response
-from llm.summary import ai_summary
 from messages.help import social_media_help
 from messages.modify import parse_kwargs
 from messages.parser import parse_msg
@@ -99,7 +99,8 @@ async def process_message(
     message, msg_kwargs = parse_kwargs(message)
     kwargs |= msg_kwargs  # merge the kwargs from the message text
     if ai:
-        await gpt_response(client, message, **kwargs)  # /ai
+        await ai_text_generation(client, message, **kwargs)  # /ai
+        await ai_image_generation(client, message, **kwargs)  # /gen
     if asr:
         await voice_to_text(client, message, **kwargs)  # /asr
     if audio_extract:
@@ -119,7 +120,7 @@ async def process_message(
     if history:
         await query_chat_history(client, message, **kwargs)  # /history
     if summary:
-        await ai_summary(client, message, **kwargs)  # /summary
+        await ai_chat_summary(client, message, **kwargs)  # /summary
     if danmu:
         await query_danmu(client, message, **kwargs)  # /danmu
     if favorite:
@@ -186,13 +187,13 @@ async def preview_social_media(
     ignore_prefix = [
         PREFIX.ASR,
         PREFIX.AI_SUMMARY,
+        PREFIX.AI_TEXT_GENERATION,
+        PREFIX.AI_IMG_GENERATION,
         PREFIX.AUDIO,
         PREFIX.COMBINATION,
         PREFIX.CONVERT,
         PREFIX.CRYPTO,
         PREFIX.DANMU,
-        PREFIX.GENIMG,
-        PREFIX.GPT,
         PREFIX.OCR,
         PREFIX.PRICE,
         PREFIX.SEARCH_GOOGLE,
src/messages/modify.py
@@ -226,7 +226,7 @@ def escape_strings(s: str) -> str:
     "foo__DASH__bar" -> "foo-bar"
     "http__COLON____SLASH____SLASH__www__DOT__example__DOT__com" -> "http://www.example.com"
     """
-    if "__" not in s:
+    if not isinstance(s, str) or "__" not in s:
         return s
     return STRING_RE_PATTERN.sub(lambda m: STRING_MAP[m.group(0)], s)
 
@@ -266,12 +266,12 @@ def parse_kwargs(message: Message) -> tuple[Message, dict]:
         texts = re.sub(rf"#with_{match}", "", texts, flags=re.IGNORECASE)
 
     # Pattern 3: #set_xx=var -> kwargs["xx"] = var
-    for match in re.findall(r"#set_(\w+)=([^#\s]+)", texts, re.IGNORECASE):
+    for match in re.findall(r"#set_(\w+)=([^#\n]+)", texts, re.IGNORECASE):
         key, value = match
         texts = re.sub(rf"#set_{key}={value}", "", texts, flags=re.IGNORECASE)
-        if str(value).lower() in ["none", "null"]:
+        if str(value).strip().lower() in ["none", "null"]:
             value = None
-        kwargs[key.lower()] = value
+        kwargs[key.lower()] = value.strip() if value is not None else None
     if kwargs:
         logger.info(f"🔧手动设置参数: {kwargs}")
         if message.text:
src/messages/progress.py
@@ -51,6 +51,7 @@ async def modify_progress(
     try:
         if not text:
             return
+        text = str(text)
         if cache.get("modify_progress"):  # DO NOT update too frequently
             detail_progress = False
         if force_update:
src/messages/utils.py
@@ -142,6 +142,8 @@ def sender_markdown_to_html(sender: str) -> str:
 
 
 async def count_without_entities(strings: str, mode: ParseMode = ParseMode.DEFAULT) -> int:
+    if not strings:
+        return 0
     parser = Parser(client=None)
     parsed = await parser.parse(strings, mode=mode)
     return len(parsed["message"])
src/others/download_external.py
@@ -7,7 +7,6 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 
 from config import MAX_FILE_BYTES, PREFIX, PROXY
-from llm.utils import convert_md
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -15,7 +14,7 @@ from messages.utils import equal_prefix, get_reply_to, startswith_prefix
 from multimedia import is_valid_video_or_audio, validate_img
 from networking import download_file
 from publish import publish_telegraph
-from utils import find_url, guess_mime, readable_size, to_int
+from utils import convert_md, find_url, guess_mime, readable_size, to_int
 
 HELP = f"""
 ⏬**下载文件**
src/others/search_google.py
@@ -56,7 +56,7 @@ async def query_google(query: str) -> list[dict]:
             "safe": "off",
             "gl": GOOGLE_SEARCH_GL,
         }
-        response = await hx_req(api, proxy=PROXY.YOUTUBE_SEARCH, params=params, check_keys=["items"], max_retry=0)
+        response = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"], max_retry=0)
         return glom(response, "items", default=[]) or []
     except Exception as e:
         logger.error(e)
src/others/search_ytb.py
@@ -52,7 +52,7 @@ async def search_youtube(client: Client, message: Message, **kwargs):
 async def query_youtube(query: str) -> dict:
     results = []
     try:
-        logger.info(f"Query YouTube info for {query=}, proxy={PROXY.YOUTUBE_SEARCH}")
+        logger.info(f"Query YouTube info for {query=}, proxy={PROXY.GOOGLE}")
         api = "https://www.googleapis.com/youtube/v3/search"
         params = {
             "key": TOKEN.YOUTUBE_API_KEY,
@@ -62,7 +62,7 @@ async def query_youtube(query: str) -> dict:
             "safeSearch": "none",
             "type": "video",
         }
-        resp = await hx_req(api, proxy=PROXY.YOUTUBE_SEARCH, params=params, check_keys=["items"], max_retry=0)
+        resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"], max_retry=0)
         if resp.get("hx_error"):
             logger.warning(f"Search YouTube API failed: {resp['hx_error']}")
             return {"error": {resp["hx_error"]}}
src/others/watermark.py
@@ -66,7 +66,7 @@ async def add_image_watermark(client: Client, message: Message, watermark_name:
             DB.CF_R2_PUBLIC_URL + "/Watermark/gemini.png",
             Path(DOWNLOAD_DIR).joinpath("watermark/gemini.png"),
             skip_exist=True,
-            proxy=PROXY.GOOGLE_SEARCH,
+            proxy=PROXY.DOWNLOAD,
         )
         processed_path = gemini_watermark(fpath, wm_path)
         caption = "已添加AI水印: Gemini"
src/podcast/asr.py
@@ -40,7 +40,7 @@ async def get_transcripts(
     desc = glom(entry, Coalesce("content.0.value", "summary"), default="")
     prompt = f"请转录播客栏目《{feed_title}》的一期节目的音频。\n该期节目标题: {entry['title']}\n节目时长: {readable_time(duration)}\n节目简介: {desc}"
     engine = get_asr_engine(feed_title, feed_url)
-    asr_res = await asr_file(tmp_path, prompt=prompt, engine=engine, silent=True)
+    asr_res = await asr_file(tmp_path, asr_prompt=prompt, engine=engine, silent=True)
     Path(tmp_path).unlink(missing_ok=True)
     return asr_res.get("texts", "")
 
src/podcast/main.py
@@ -31,11 +31,10 @@ from pyrogram.client import Client
 from pyrogram.types import Chat, Message
 from pyrogram.types.messages_and_media.message import Str
 
-from config import GPT, PODCAST, PREFIX
+from ai.main import ai_text_generation
+from config import AI, PODCAST, PREFIX, PROXY
 from database.github import gh_clean_assets
 from database.r2 import get_cf_r2, set_cf_r2
-from llm.gpt import gpt_response
-from llm.utils import convert_html, convert_md, remove_consecutive_newlines
 from messages.sender import send2tg
 from networking import download_file, hx_req
 from podcast.asr import get_duration, get_transcripts
@@ -44,7 +43,7 @@ from podcast.xml import get_feed_title, parse_feed, save_xml, update_xml_desc
 from preview.bilibili import get_bilibili_vinfo
 from preview.youtube import get_youtube_vinfo
 from publish import publish_telegraph
-from utils import bare_url, count_subtitles, https_url, nowdt, rand_number, seconds_to_hms, strings_list
+from utils import bare_url, convert_html, convert_md, count_subtitles, https_url, nowdt, rand_number, remove_consecutive_newlines, seconds_to_hms, strings_list
 from ytdlp.download import ytdlp_download
 
 
@@ -69,10 +68,10 @@ async def summary_pods(client: Client):
                 await send2tg(client, message, texts=f"Failed download podcast {feed_title} -- {entry['title']}", reply_msg_id=-1)
                 continue
             try:
-                transcripts = await get_transcripts(info["asr_path"], feed_title, feed_url, entry)
+                transcripts = await get_transcripts(info["asr_path"], feed_title, feed_url, entry)  # TODO
                 if not transcripts:
                     continue
-                duration = await get_duration(info["asr_path"], entry)
+                duration = await get_duration(info["asr_path"], entry)  # TODO
                 duration = seconds_to_hms(duration)
                 dt = get_pubdate(entry)
                 pubdate = f"{dt:%Y-%m-%d %H:%M:%S}"
@@ -82,26 +81,16 @@ async def summary_pods(client: Client):
                 prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
                 prompt += f"\n节目时长: {duration}\n节目简介: {markdown_desc}"
                 prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
-                # Construct a message to call GPT
-                # cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
-                ai_msg = Message(
+                ai_msg = Message(  # Construct a message for AI
                     id=rand_number(),
                     chat=message.chat,
-                    text=Str(f"{strings_list(PREFIX.GPT)[0]} {prompt}"),
+                    text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{AI.PODCAST_SUMMARY_MODEL_ALIAS} {prompt}"),
                     reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(transcripts)),
                 )
-                gpt_res = await gpt_response(
-                    client,
-                    ai_msg,
-                    custom_model_id=GPT.PODCAST_SUMMARY_MODEL_ID,
-                    custom_model_name=GPT.PODCAST_SUMMARY_MODEL_NAME,
-                    include_thoughts=False,
-                    append_grounding=False,
-                    silent=True,
-                )
+                ai_res = await ai_text_generation(client, ai_msg, silent=True)
                 telegraph_content = ""
-                if gpt_res.get("texts"):
-                    telegraph_content += f"\n🤖**{gpt_res['model_name']}总结**:\n{gpt_res['texts']}"
+                if ai_res.get("texts"):
+                    telegraph_content += f"\n🤖**{ai_res['model_name']}总结**:\n{ai_res['texts']}"
                 telegraph_content += f"\n📖**节目简介**:\n {markdown_desc}" if markdown_desc else ""
                 telegraph_content += f"\n🔤**转录字幕**:\n{transcripts}"
 
@@ -122,7 +111,7 @@ async def summary_pods(client: Client):
                 )
 
                 messages = await send2tg(client, message, texts=caption, media=media, reply_msg_id=-1)
-                processed_xml = await update_xml_desc(feed_url, processed_xml, entry, summary=gpt_res.get("texts", ""), audio_path=info["asr_path"])
+                processed_xml = await update_xml_desc(feed_url, processed_xml, entry, summary=ai_res.get("texts", ""), audio_path=info["asr_path"])
                 if isinstance(messages[0], Message):
                     await set_cf_r2(entry["db_key"], data={"title": entry["title"], "url": entry["link"]})
                 has_update = True
@@ -162,7 +151,7 @@ async def get_feed_url_with_title() -> dict[str, str]:
     pods = {feed_url: await get_feed_title(feed_url) for feed_url in strings_list(PODCAST.FEED_URLS)}
     # get from OPML
     for opml in strings_list(PODCAST.OPML_URLS):
-        opml_data = await hx_req(opml, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PODCAST.PROXY)
+        opml_data = await hx_req(opml, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PROXY.PODCAST)
         data = {}
         with contextlib.suppress(Exception):
             data = xmltodict.parse(opml_data["text"])
src/podcast/xml.py
@@ -12,15 +12,14 @@ import xmltodict
 from glom import Coalesce, glom
 
 from asr.utils import audio_duration
-from config import DB, DOWNLOAD_DIR, PODCAST, cache
+from config import DB, DOWNLOAD_DIR, PODCAST, PROXY, cache
 from database.alist import upload_alist
 from database.github import gh_upload_asset
 from database.r2 import set_cf_r2
-from llm.utils import convert_html
 from networking import hx_req
 from podcast.utils import HEADERS, clean_feed_url, get_pubdate
 from preview.youtube import get_youtube_channel_thumb
-from utils import bare_url, https_url, nowdt
+from utils import bare_url, convert_html, https_url, nowdt
 
 
 @cache.memoize(ttl=600)
@@ -29,7 +28,7 @@ async def parse_feed(feed_url: str, *, raw_xml: bool = False) -> dict:
 
     DO NOT use feedparser.parse(feed_url) because it doesn't support timeout.
     """
-    data = await hx_req(feed_url, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PODCAST.PROXY)
+    data = await hx_req(feed_url, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PROXY.PODCAST)
     with contextlib.suppress(Exception):
         if raw_xml:
             return xmltodict.parse(data["text"])
src/preview/youtube.py
@@ -27,7 +27,7 @@ async def get_youtube_comments(vid: str | None) -> list[str]:
     params = {"key": TOKEN.YOUTUBE_API_KEY, "maxResults": 100, "textFormat": "plainText", "part": "snippet", "videoId": vid}
     comments = []
     try:
-        resp = await hx_req(api, proxy=PROXY.GOOGLE_SEARCH, params=params, check_keys=["items"])
+        resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"])
         if resp.get("hx_error"):
             logger.warning(f"YouTube Comments API failed: {resp['hx_error']}")
             return []
@@ -78,10 +78,10 @@ async def get_youtube_vinfo(video_id: str) -> dict:
         return {"downloadable": False, "error_msg": "❌未提供VideoID"}
     info = {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
     try:
-        logger.info(f"Fetch YouTube video info for {video_id=}, proxy={PROXY.GOOGLE_SEARCH}")
+        logger.info(f"Fetch YouTube video info for {video_id=}, proxy={PROXY.GOOGLE}")
         api = "https://www.googleapis.com/youtube/v3/videos"
         params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet,status,contentDetails,liveStreamingDetails,statistics", "id": video_id, "hl": "zh-CN"}
-        resp = await hx_req(api, proxy=PROXY.GOOGLE_SEARCH, params=params, check_keys=["items.0.snippet"], max_retry=3)
+        resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items.0.snippet"], max_retry=3)
         if resp.get("hx_error"):
             logger.warning(f"YouTube Videos API failed: {resp['hx_error']}")
             return {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
@@ -174,7 +174,7 @@ async def get_youtube_channel_thumb(channel_id: str) -> str:
         return ""
     api = "https://www.googleapis.com/youtube/v3/channels"
     params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet", "id": channel_id, "hl": "zh-CN"}
-    resp = await hx_req(api, proxy=PROXY.GOOGLE_SEARCH, params=params, check_keys=["items.0.snippet"], max_retry=3)
+    resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items.0.snippet"], max_retry=3)
     if resp.get("hx_error"):
         logger.warning(f"YouTube Channels API failed: {resp['hx_error']}")
         return ""
src/subtitles/subtitle.py
@@ -11,9 +11,9 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 from pyrogram.types.messages_and_media.message import Str
 
+from ai.main import ai_text_generation
 from asr.voice_recognition import asr_file
-from config import ASR, DOWNLOAD_DIR, GPT, PREFIX, READING_SPEED, TEXT_LENGTH, cache
-from llm.gpt import gpt_response
+from config import AI, ASR, DOWNLOAD_DIR, PREFIX, READING_SPEED, TEXT_LENGTH, cache
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -46,8 +46,7 @@ async def get_subtitle(
     *,
     to_telegraph: bool = True,
     ai_summary: bool = True,
-    summary_model_id: str = GPT.SUBTITLE_SUMMARY_MODEL_ID,
-    summary_model_name: str = GPT.SUBTITLE_SUMMARY_MODEL_NAME,
+    summary_model_id: str = AI.SUBTITLE_SUMMARY_MODEL_ALIAS,
     send_subtitle_as: Literal["file", "str", "none"] = "file",
     **kwargs,
 ):
@@ -133,7 +132,6 @@ async def get_subtitle(
         subtitle_msg = (await send2tg(client, message, texts=f"{caption}\n{subtitles}", **kwargs))[0]
     else:
         subtitle_msg = message
-
     if ai_summary and isinstance(subtitle_msg, Message):
         # use real subtitle (without AI summary by Bilibili)
         subtitles = re.sub(r"(.*?)AI总结(B站版):", "", subtitles, flags=re.DOTALL).strip()  # noqa: RUF001
@@ -142,15 +140,13 @@ async def get_subtitle(
         if description.strip():
             prompt += f"节目简介: {description}\n"
         prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
-        # Construct a message to call GPT
-        ai_msg = Message(
+        ai_msg = Message(  # Construct a message for AI
             id=subtitle_msg.id,
             chat=subtitle_msg.chat,
-            text=Str(f"/ai {prompt}"),
+            text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
             reply_to_message=Message(id=rand_number(), chat=subtitle_msg.chat, text=Str(subtitles)),
         )
-        kwargs |= {"include_thoughts": False, "append_grounding": False, "silent": True, "custom_model_id": summary_model_id, "custom_model_name": summary_model_name}
-        res = await gpt_response(client, ai_msg, **kwargs)
+        res = await ai_text_generation(client, ai_msg, silent=True)
         if res.get("texts"):
             await send2tg(client, ai_msg, texts=res["prefix"] + blockquote(res["texts"]), **kwargs)
     with contextlib.suppress(Exception):
src/tts/edge.py
@@ -6,7 +6,7 @@ import anyio
 from loguru import logger
 
 from asr.utils import audio_duration
-from config import DOWNLOAD_DIR, TTS
+from config import DOWNLOAD_DIR, PROXY, TTS
 from networking import hx_req
 from utils import markdown_to_text, rand_string
 
@@ -28,7 +28,7 @@ async def edge_tts(texts: str, model: str = "", voice_name: str = "") -> dict:
         "POST",
         headers={"Content-Type": "application/json"},
         json_data={"input": raw_texts, "voice": model, "speed": 1.0, "pitch": "0", "style": "general"},
-        proxy=TTS.EDGE_PROXY,
+        proxy=PROXY.EDGE,
         rformat="content",
     )
     if not isinstance(response.get("content"), bytes):
src/tts/gemini.py
@@ -10,15 +10,14 @@ from google.genai import types
 from google.genai.types import HttpOptions
 from loguru import logger
 from pyrogram.enums import ParseMode
-from pyrogram.types import Message
 
-from config import DOWNLOAD_DIR, GEMINI, TTS
-from llm.hooks import hook_gemini_httpoptions
+from ai.utils import literal_eval
+from config import AI, DOWNLOAD_DIR, PROXY, TTS
 from messages.utils import smart_split
 from utils import markdown_to_text, rand_string, strings_list
 
 
-async def gemini_tts(message: Message, texts: str, model: str = "", voice_name: str = "") -> dict:
+async def gemini_tts(texts: str, model: str = "", voice_name: str = "") -> dict:
     """Gemini TTS.
 
     https://ai.google.dev/gemini-api/docs/speech-generation
@@ -31,17 +30,17 @@ async def gemini_tts(message: Message, texts: str, model: str = "", voice_name:
     raw_texts = markdown_to_text(texts)
     num_token = await count_token(raw_texts, model)
     if num_token < TTS.GEMINI_INPUT_TOKEN_LIMIT:
-        return await gemini_tts_real(message, texts, model, voice_name, return_bytes=False)
+        return await gemini_tts_real(texts, model, voice_name, return_bytes=False)
     # split
     text_list = await smart_split(texts, chars_per_string=TTS.GEMINI_SPLIT_LENGTH, mode=ParseMode.DISABLED)
-    resp = await asyncio.gather(*[gemini_tts_real(message, text, model, voice_name) for text in text_list])
+    resp = await asyncio.gather(*[gemini_tts_real(text, model, voice_name) for text in text_list])
     wav_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
     combined_data = b"".join([r["voice"] for r in resp])
     save_wave_file(wav_path, combined_data)
     return {"voice": wav_path, "duration": calculate_duration(combined_data), "voice_name": voice_name, "model": model}
 
 
-async def gemini_tts_real(message: Message, texts: str, model: str, voice_name: str, *, return_bytes: bool = True) -> dict:
+async def gemini_tts_real(texts: str, model: str, voice_name: str, *, return_bytes: bool = True) -> dict:
     """Gemini TTS.
 
     Args:
@@ -50,12 +49,17 @@ async def gemini_tts_real(message: Message, texts: str, model: str, voice_name:
     Returns:
         {"voice": str or bytes, "duration": int, "voice_name": str, "model": str}
     """
-    for api_key in strings_list(GEMINI.API_KEY, shuffle=True):
+    for api_key in strings_list(AI.GEMINI_API_KEYS, shuffle=True):
         try:
-            logger.debug(f"TTS via {model}, proxy={GEMINI.PROXY}, voice: {voice_name}, texts: {texts}")
-            http_options = HttpOptions(base_url=GEMINI.BASE_URL, async_client_args={"proxy": GEMINI.PROXY})
-            http_options = hook_gemini_httpoptions(http_options, message)
-            app = genai.Client(api_key=api_key, http_options=http_options)
+            logger.debug(f"TTS via {model}, proxy={PROXY.GOOGLE}, voice: {voice_name}, texts: {texts}")
+            app = genai.Client(
+                api_key=api_key,
+                http_options=HttpOptions(
+                    base_url=AI.GEMINI_BASE_URL,
+                    headers=literal_eval(AI.GEMINI_DEFAULT_HEADERS),
+                    async_client_args={"proxy": PROXY.GOOGLE},
+                ),
+            )
             response = await app.aio.models.generate_content(
                 model=model,
                 contents=markdown_to_text(texts),
@@ -101,7 +105,13 @@ def calculate_duration(pcm: bytes, channels: int = 1, rate: float = 24000, sampl
 
 async def count_token(texts: str, model_id: str = "") -> int:
     model = model_id or TTS.GEMINI_MODEL
-    http_options = HttpOptions(async_client_args={"proxy": GEMINI.PROXY})
-    app = genai.Client(api_key=strings_list(GEMINI.API_KEY, shuffle=True)[0], http_options=http_options)
+    app = genai.Client(
+        api_key=strings_list(AI.GEMINI_API_KEYS, shuffle=True)[0],
+        http_options=HttpOptions(
+            base_url=AI.GEMINI_BASE_URL,
+            headers=literal_eval(AI.GEMINI_DEFAULT_HEADERS),
+            async_client_args={"proxy": PROXY.GOOGLE},
+        ),
+    )
     response = await app.aio.models.count_tokens(model=model, contents=texts)
     return response.total_tokens or 0
src/tts/qwen.py
@@ -9,7 +9,7 @@ from glom import glom
 from loguru import logger
 from pyrogram.enums import ParseMode
 
-from config import DOWNLOAD_DIR, TTS
+from config import DOWNLOAD_DIR, PROXY, TTS
 from messages.utils import smart_split
 from networking import download_file, hx_req
 from utils import markdown_to_text, rand_string, strings_list
@@ -56,11 +56,11 @@ async def qwen_tts_real(texts: str, model: str, voice_name: str) -> dict:
                 "POST",
                 headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
                 json_data={"model": model, "input": {"text": markdown_to_text(texts), "voice": voice_name}},
-                proxy=TTS.ALI_PROXY,
+                proxy=PROXY.ALI,
                 check_keys=["output.audio.url", "usage.output_tokens"],
             )
             url = glom(response, "output.audio.url", default="")
-            save_path = await download_file(url, proxy=TTS.ALI_PROXY)
+            save_path = await download_file(url, proxy=PROXY.ALI)
             duration = glom(response, "usage.output_tokens", default=0) / 50  # 1s = 50 tokens
         except Exception as e:
             logger.error(e)
src/tts/tts.py
@@ -60,7 +60,7 @@ async def text_to_speech(client: Client, message: Message, **kwargs):
         texts = info["text"]
     await set_reaction(client, reaction_msg, reaction="👌")
     if engine == "gemini":
-        resp = await gemini_tts(message, texts, model, voice_name)
+        resp = await gemini_tts(texts, model, voice_name)
     elif engine == "qwen":
         resp = await qwen_tts(texts, model, voice_name)
     elif engine == "sambert":
src/ytdlp/download.py
@@ -80,10 +80,10 @@ async def ytdlp_download(
 
 def download_video_info(url: str, ytdlp_opts: dict, json_path: str | Path) -> dict:
     try:
-        with YoutubeDL(ytdlp_opts) as ydl:
+        with YoutubeDL(ytdlp_opts) as ydl:  # type: ignore
             info: dict = ydl.extract_info(url, download=False)  # type: ignore
             with Path(json_path).open("w") as f:
-                json.dump(ydl.sanitize_info(info), f, ensure_ascii=False, indent=2)
+                json.dump(ydl.sanitize_info(info), f, ensure_ascii=False, indent=2)  # type: ignore
             # add custom fields
             info["extractor"] = info.get("extractor", "").lower()
             info["author"] = glom(info, Coalesce("uploader", "series", "extractor"))
@@ -170,7 +170,7 @@ def retry(func, max_retries=5):
 
 @retry
 def download_video(json_path: str, ytdlp_opts: dict, result: dict) -> dict:
-    with YoutubeDL(ytdlp_opts) as ydl:
+    with YoutubeDL(ytdlp_opts) as ydl:  # type: ignore
         error_code = ydl.download_with_info_file(json_path)  # 0: success, 1: error
     if error_code != 0 and not result.get("ytdlp_error"):
         url = unquote_plus(Path(json_path).stem)
src/ytdlp/main.py
@@ -13,9 +13,9 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 from pyrogram.types.messages_and_media.message import Str
 
-from config import ASR, CAPTION_LENGTH, DB, GPT, MAX_FILE_BYTES, PREFIX, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
+from ai.main import ai_text_generation
+from config import AI, ASR, CAPTION_LENGTH, DB, MAX_FILE_BYTES, PREFIX, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
 from database.database import get_db
-from llm.gpt import gpt_response
 from messages.database import copy_messages_from_db, save_messages
 from messages.preprocess import preprocess_media
 from messages.progress import modify_progress, telegram_uploading
@@ -25,7 +25,7 @@ from multimedia import convert_to_h264
 from preview.bilibili import get_bilibili_comments, get_bilibili_vinfo, make_bvid_clickable
 from preview.youtube import get_youtube_comments, get_youtube_vinfo
 from publish import publish_telegraph
-from utils import count_subtitles, rand_number, readable_size, readable_time, soup_to_text, strings_list, to_int, true, ts_to_dt, unicode_to_ascii
+from utils import count_subtitles, rand_number, readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
 from ytdlp.download import ytdlp_download
 from ytdlp.utils import append_subtitle, cleanup_ytdlp, get_subtitles, platform_emoji
 
@@ -50,8 +50,7 @@ async def preview_ytdlp(
     ytdlp_subtitle_target: str | int | None = None,
     ytdlp_send_subtitle: bool = False,
     ytdlp_send_summary: bool = False,
-    summary_model_id: str = GPT.SUBTITLE_SUMMARY_MODEL_ID,
-    summary_model_name: str = GPT.SUBTITLE_SUMMARY_MODEL_NAME,
+    summary_model_id: str = AI.SUBTITLE_SUMMARY_MODEL_ALIAS,
     to_telegraph: bool = True,
     show_author: bool = True,
     show_title: bool = True,
@@ -81,7 +80,6 @@ async def preview_ytdlp(
         ytdlp_send_subtitle (bool, optional): Send subtitle. Defaults to False.
         ytdlp_send_summary (bool, optional): Send AI summary. Defaults to False.
         summary_model_id (str, optional): The model id to use for AI summary.
-        summary_model_name (str, optional): The model name to use for AI summary.
         to_telegraph (bool, optional): Whether to publish the subtitle or transcription to telegraph.
     """
     logger.trace(f"{url=} {kwargs=}")
@@ -144,23 +142,13 @@ async def preview_ytdlp(
     summary = ""
     if subtitles and true(ytdlp_send_summary):
         prompt = generate_prompt(info)
-        # Construct a message to call GPT
-        ai_msg = Message(
+        ai_msg = Message(  # Construct a message for AI
             id=rand_number(),
             chat=message.chat,
-            text=Str(f"{strings_list(PREFIX.GPT)[0]} {prompt}"),
+            text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
             reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(subtitles)),
         )
-        params = {
-            "include_thoughts": False,
-            "append_grounding": False,
-            "silent": True,
-            "custom_model_id": summary_model_id,
-            "custom_model_name": summary_model_name,
-            "enable_gpt_tools": False,
-            "enable_gemini_tools": True,
-        }
-        aires = await gpt_response(client, ai_msg, **params)
+        aires = await ai_text_generation(client, ai_msg, silent=True)
         if aires.get("texts"):
             summary = f"🤖<b>{aires['model_name']}总结:</b>\n{markdown.markdown(aires['texts'])}\n"
 
src/config.py
@@ -34,14 +34,15 @@ NUM_YOUTUBE_SEARCH_RESULTS = int(os.getenv("NUM_YOUTUBE_SEARCH_RESULTS", "10"))
 NUM_GOOGLE_SEARCH_RESULTS = int(os.getenv("NUM_GOOGLE_SEARCH_RESULTS", "10"))  # Number of google search results
 GOOGLE_SEARCH_GL = os.getenv("GOOGLE_SEARCH_GL", "cn")  # "gl" parameter (Geolocation)
 CLEAN_OLD_FILES_OLDER_THAN_SECONDS = int(os.getenv("CLEAN_OLD_FILES_OLDER_THAN_SECONDS", "7200"))
+HELICONE_API_KEY = os.getenv("HELICONE_API_KEY", "")  # https://docs.helicone.ai/getting-started/integration-method/gateway
 
 
 class ENABLE:  # see fine-grained permission in `src/permission.py`
+    AI = os.getenv("ENABLE_AI", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     ASR = os.getenv("ENABLE_ASR", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     AUDIO = os.getenv("ENABLE_AUDIO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     CRONTAB = os.getenv("ENABLE_CRONTAB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     DOUYIN = os.getenv("ENABLE_DOUYIN", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    GPT = os.getenv("ENABLE_GPT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     INSTAGRAM = os.getenv("ENABLE_INSTAGRAM", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     OCR = os.getenv("ENABLE_OCR", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     HISTORY = os.getenv("ENABLE_HISTORY", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -82,10 +83,11 @@ class ENABLE:  # see fine-grained permission in `src/permission.py`
 class PREFIX:
     SOCIAL_MEDIA = os.getenv("PREFIX_SOCIAL_MEDIA", "/benny, /dl, !dl")
     AI_SUMMARY = os.getenv("PREFIX_AI_SUMMARY", "/summary").lower()
+    AI_TEXT_GENERATION = os.getenv("PREFIX_AI_TEXT_GENERATION", "/ai").lower()
+    AI_IMG_GENERATION = os.getenv("PREFIX_AI_IMG_GENERATION", "/gen").lower()
     ASR = os.getenv("PREFIX_ASR", "/asr").lower()
     AUDIO = os.getenv("PREFIX_AUDIO", "/audio").lower()
     CONVERT = os.getenv("PREFIX_CONVERT", "/convert").lower()  # convert image file to photo
-    GPT = "/ai,/gpt,/gemini,/ds,/qwen,/doubao,/grok,/kimi"  # this is fixed
     SUBTITLE = os.getenv("PREFIX_SUBTITLE", "/subtitle, /sub").lower()
     WGET = os.getenv("PREFIX_WGET", "/wget, /curl").lower()
     OCR = os.getenv("PREFIX_OCR", "/ocr").lower()
@@ -96,7 +98,6 @@ class PREFIX:
     VOICE = os.getenv("PREFIX_VOICE", "/voice").lower()
     SEARCH_YOUTUBE = os.getenv("PREFIX_SEARCH_YOUTUBE", "/youtube, /ytb").lower()
     SEARCH_GOOGLE = os.getenv("PREFIX_SEARCH_GOOGLE", "/google").lower()
-    GENIMG = os.getenv("PREFIX_GENIMG", "/gen").lower()
     DANMU = os.getenv("PREFIX_DANMU", "/danmu").lower()
     FAYAN = os.getenv("PREFIX_FAYAN", "/fa").lower()
     HISTORY = "/history, /hist"
@@ -131,7 +132,6 @@ class API:
 class DANMU:
     BASE_URL = os.getenv("DANMU_BASE_URL", "")  # Custom API, No docs
     STREAMER = os.getenv("DANMU_STREAMER", "Streamer")  # streamer name
-    PROXY = os.getenv("DANMU_PROXY", None)  # socks5://127.0.0.1:7890
     AUTH_USER = os.getenv("DANMU_AUTH_USER", "")  # username for basic auth
     AUTH_PASS = os.getenv("DANMU_AUTH_PASS", "")  # password for basic auth
     QUERY_METHOD = os.getenv("DANMU_QUERY_METHOD", "turso")  # Turso or R2+API server
@@ -177,29 +177,36 @@ class TOKEN:
 
 
 class PROXY:  # format: socks5://127.0.0.1:7890
-    TELEGRAM = os.getenv("TELEGRAM_PROXY", None)  # Telegram
-    IMG = os.getenv("IMG_PROXY", "")  # https://caravaggio.ramielcreations.com/docs/install
-    XHS = os.getenv("XHS_PROXY", None)  # Banned VPS IP, need residential proxy
-    GPT = os.getenv("GPT_PROXY", None)
+    AI_POST = os.getenv("AI_POST_PROXY", None)
+    ALI = os.getenv("ALI_PROXY", None)
+    CLOUDFLARE = os.getenv("CLOUDFLARE_PROXY", None)
+    CRYPTO = os.getenv("CRYPTO_PROXY", None)
     D1 = os.getenv("D1_PROXY", None)
-    TURSO = os.getenv("TURSO_PROXY", None)
-    WECHAT = os.getenv("WECHAT_PROXY", None)
+    DANMU = os.getenv("DANMU_PROXY", None)
     DOUYIN = os.getenv("DOUYIN_PROXY", None)
-    TIKTOK = os.getenv("TIKTOK_PROXY", None)
+    DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
+    EDGE = os.getenv("TTS_EDGE_PROXY", None)
+    GITHUB = os.getenv("GITHUB_PROXY", None)
+    GOOGLE = os.getenv("GOOGLE_PROXY", None)
+    GROQ = os.getenv("GROQ_PROXY", None)  # Ban CN & HK IP
+    IMG = os.getenv("IMG_PROXY", "")  # https://caravaggio.ramielcreations.com/docs/install
     INSTAGRAM = os.getenv("INSTAGRAM_PROXY", None)
-    TWITTER = os.getenv("TWITTER_PROXY", None)
-    WARP = os.getenv("WARP_PROXY", None)
+    OPENAI = os.getenv("OPENAI_PROXY", None)
+    PODCAST = os.getenv("PODCAST_PROXY", None)
+    REDDIT = os.getenv("REDDIT_PROXY", None)
     SPOTIFY = os.getenv("SPOTIFY_PROXY", None)
     SUBTITLE = os.getenv("SUBTITLE_PROXY", None)
-    YOUTUBE_SEARCH = os.getenv("YOUTUBE_SEARCH_PROXY", None)
-    CRYPTO = os.getenv("CRYPTO_PROXY", None)
-    GOOGLE_SEARCH = os.getenv("GOOGLE_SEARCH_PROXY", None)
-    DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
-    WEIBO = os.getenv("WEIBO_PROXY", None)
-    REDDIT = os.getenv("REDDIT_PROXY", None)
-    V2EX = os.getenv("V2EX_PROXY", None)
+    TELEGRAM = os.getenv("TELEGRAM_PROXY", None)  # Telegram
+    TENCENT = os.getenv("TENCENT_PROXY", None)  # Banned oversea IP, need a back to China proxy
+    TIKTOK = os.getenv("TIKTOK_PROXY", None)
     TMDB = os.getenv("TMDB_PROXY", None)
-    GITHUB = os.getenv("GITHUB_PROXY", None)
+    TURSO = os.getenv("TURSO_PROXY", None)
+    TWITTER = os.getenv("TWITTER_PROXY", None)
+    V2EX = os.getenv("V2EX_PROXY", None)
+    WARP = os.getenv("WARP_PROXY", None)
+    WECHAT = os.getenv("WECHAT_PROXY", None)
+    WEIBO = os.getenv("WEIBO_PROXY", None)
+    XHS = os.getenv("XHS_PROXY", None)  # Banned VPS IP, need residential proxy
     YTDLP = os.getenv("YTDLP_PROXY", None)  # general proxy for ytdlp
     YTDLP_FALLBACK = os.getenv("YTDLP_PROXY_FALLBACK", None)  # fallback proxy for ytdlp
     # for ytdlp proxy of specific sites (Like Bilibili), use this format: YTDLP_PROXY_BILIBILI
@@ -287,7 +294,6 @@ class ASR:
     LONG_ENGINE = os.getenv("ASR_LONG_ENGINE", "gemini")  # comma separated engine names
 
     TENCENT_APPID = os.getenv("ASR_TENCENT_APPID", "")
-    TENCENT_PROXY = os.getenv("ASR_TENCENT_PROXY", None)  # Banned oversea IP, need a back to China proxy
     TENCENT_SECRET_ID = os.getenv("ASR_TENCENT_SECRET_ID", "")
     TENCENT_SECRET_KEY = os.getenv("ASR_TENCENT_SECRET_KEY", "")
     TENCENT_FS_ENGINE = os.getenv("ASR_TENCENT_FS_ENGINE", "local")  # local, uguu or alist.
@@ -302,20 +308,21 @@ class ASR:
     CLOUDFLARE_CHUNK_SECONDS = float(os.getenv("ASR_CLOUDFLARE_CHUNK_SECONDS", "180"))  # split long audio file into chunks
     CLOUDFLARE_OVERLAP_SECONDS = float(os.getenv("ASR_CLOUDFLARE_OVERLAP_SECONDS", "5"))  # overlap seconds between chunks
     CLOUDFLARE_KEYS = os.getenv("ASR_CLOUDFLARE_KEYS", "")  # comma separated keys for load balance. e.g. "AccountID:API_TOKEN, AccountID:API_TOKEN, ..."
-    CLOUDFLARE_PROXY = os.getenv("ASR_CLOUDFLARE_PROXY", None)
 
-    GEMINI_CHUNK_SECONDS = float(os.getenv("ASR_GEMINI_CHUNK_SECONDS", "600"))  # split long audio file into chunks
-    GEMINI_OVERLAP_SECONDS = float(os.getenv("ASR_GEMINI_OVERLAP_SECONDS", "5"))  # overlap seconds between chunks
-    GROQ_PROXY = os.getenv("ASR_GROQ_PROXY", None)  # Ban CN & HK IP
     GROQ_MAX_BYTES = int(os.getenv("ASR_GROQ_MAX_BYTES", "26214400"))  # 25MB (max file bytes for single file)
     GROQ_CHUNK_SECONDS = float(os.getenv("ASR_GROQ_CHUNK_SECONDS", "180"))  # split long audio file into chunks
     GROQ_OVERLAP_SECONDS = float(os.getenv("ASR_GROQ_OVERLAP_SECONDS", "5"))  # overlap seconds between chunks
     GROQ_KEYS = os.getenv("ASR_GROQ_KEYS", "")  # comma separated keys for load balance.
     GROQ_MODELS = os.getenv("ASR_GROQ_MODELS", "whisper-large-v3")  # comma separated model names.
 
+    GEMINI_CHUNK_SECONDS = float(os.getenv("ASR_GEMINI_CHUNK_SECONDS", "600"))  # split long audio file into chunks
+    GEMINI_OVERLAP_SECONDS = float(os.getenv("ASR_GEMINI_OVERLAP_SECONDS", "5"))  # overlap seconds between chunks
+    GEMINI_MAX_DURATION = int(os.getenv("ASR_GEMINI_MAX_DURATION", "34200"))  # 9.5 hour
+    GEMINI_MODEL = os.getenv("ASR_GEMINI_MODEL", "gemini-2.5-flash")
+    GEMINI_CONFIG = os.getenv("ASR_GEMINI_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
+
 
 class PODCAST:
-    PROXY = os.getenv("PODCAST_PROXY", None)
     FEED_URLS = os.getenv("PODCAST_FEED_URLS", "")  # comma separated feed urls
     OPML_URLS = os.getenv("PODCAST_OPML_URLS", "")  # comma separated opml urls
     YOUTUBE_CHANNEL_IDS = os.getenv("PODCAST_YOUTUBE_CHANNEL_IDS", "")  # comma separated youtube channel ids
@@ -337,6 +344,7 @@ class PODCAST:
     ASR_FORCE_UNCENSORED_DOMAINS = os.getenv("PODCAST_ASR_FORCE_UNCENSORED_DOMAINS", "anchor.fm,feeds.acast.com")
     GH_REPO = os.getenv("PODCAST_GH_REPO", "podcast")
     GH_TOKEN = os.getenv("PODCAST_GH_TOKEN", "")
+    SUMMARY_MODEL_ID = os.getenv("PODCAST_SUMMARY_MODEL_ALIAS", "default")
 
 
 class FAVORITE:
@@ -358,7 +366,6 @@ class TTS:
     GEMINI_SPLIT_LENGTH = int(os.getenv("TTS_GEMINI_SPLIT_LENGTH", "8192"))  # split token limit of the tts model
     GEMINI_VOICE = os.getenv("TTS_GEMINI_VOICE", "Sulafat")
     ALI_API_KEY = os.getenv("TTS_ALI_API_KEY", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
-    ALI_PROXY = os.getenv("TTS_ALI_PROXY", None)  # Banned oversea IP, need a back to China proxy
     QWEN_MODEL = os.getenv("TTS_QWEN_MODEL", "qwen-tts,qwen-tts-latest")  # comma separated keys for load balance.
     QWEN_INPUT_TOKEN_LIMIT = int(os.getenv("TTS_QWEN_INPUT_TOKEN_LIMIT", "512"))  # token limit of the tts model
     QWEN_SPLIT_LENGTH = int(os.getenv("TTS_QWEN_SPLIT_LENGTH", "512"))  # split token limit of the tts model
@@ -368,131 +375,30 @@ class TTS:
     EDGE_DOMAIN = os.getenv("TTS_EDGE_DOMAIN", "https://tts.wangwangit.com")
     EDGE_VOICE = os.getenv("TTS_EDGE_VOICE", "晓晓")
     EDGE_MODEL = os.getenv("TTS_EDGE_MODEL", "zh-CN-XiaoxiaoNeural")
-    EDGE_PROXY = os.getenv("TTS_EDGE_PROXY", None)
-
-
-class GPT:
-    """This is for OpenAI compatible API.
-
-    See class GEMINI for the GEMINI configurations
-    """
-
-    PRIMARY_SEARCH_ENGINE = os.getenv("GPT_PRIMARY_SEARCH_ENGINE", "google")  # currently, only `google` is supported
-    SEARCH_NUM_RESULTS = os.getenv("GPT_SEARCH_NUM_RESULTS", "10")
-    TIMEOUT = os.getenv("GPT_TIMEOUT", None)  # timeout in seconds
-    TEMPERATURE = os.getenv("GPT_TEMPERATURE", None)
-    HISTORY_CONTEXT = os.getenv("GPT_HISTORY_CONTEXT", "20")  # Max number of history messages as context
-    TOKEN_ENCODING = os.getenv("GPT_TOKEN_ENCODING", "o200k_base")  # https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
-    MAX_RETRY = int(os.getenv("GPT_MAX_RETRY", "2"))
-    HELICONE_API_KEY = os.getenv("HELICONE_API_KEY", "")  # https://docs.helicone.ai/getting-started/integration-method/gateway
-    COLLAPSE_LENGTH = int(os.getenv("GPT_COLLAPSE_LENGTH", "500"))  # Collapse the response if the length is larger than this value
-    ALLOWED_CUSTOM_MODEL_IDS = os.getenv("GPT_ALLOWED_CUSTOM_MODEL_IDS", "")  # comma separated OpenAI compatible model ids
-    # comma separated fallback models for OpenRouter (e.g. openai/gpt-4o,anthropic/claude-3.5-sonnet)
-    OPENROUTER_FALLBACK_MODELS = os.getenv("GPT_OPENROUTER_FALLBACK_MODELS", "")
-
-    # default command (/ai).
-    # "gemini" to switch to gemini (see class GEMINI  below for details)
-    DEFAULT_PROVIDER = os.getenv("GPT_DEFAULT_PROVIDER", "gemini")
-    # omni provider (this should be a multi-modality model, like gpt-4o.)
-    # Used when the contexts contain multi-modelity data (text, image), but other model can not handle it.
-    # For example, /ds command can only handle text, but the contexts contains images.
-    OMNI_PROVIDER = os.getenv("GPT_OMNI_PROVIDER", "gemini")
-
-    # /gpt command
-    OPENAI_MODEL = os.getenv("GPT_OPENAI_MODEL", "")
-    OPENAI_MODEL_NAME = os.getenv("GPT_OPENAI_MODEL_NAME", "")
-    OPENAI_API_KEY = os.getenv("GPT_OPENAI_API_KEY", "")
-    OPENAI_BASE_URL = os.getenv("GPT_OPENAI_BASE_URL", "https://api.openai.com/v1")
-    OPENAI_ACCEPT_IMAGE = os.getenv("GPT_OPENAI_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /ds command
-    DEEPSEEK_MODEL = os.getenv("GPT_DEEPSEEK_MODEL", "")
-    DEEPSEEK_MODEL_NAME = os.getenv("GPT_DEEPSEEK_MODEL_NAME", "")
-    DEEPSEEK_API_KEY = os.getenv("GPT_DEEPSEEK_API_KEY", "")
-    DEEPSEEK_BASE_URL = os.getenv("GPT_DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1")
-    DEEPSEEK_ACCEPT_IMAGE = os.getenv("GPT_DEEPSEEK_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /qwen command
-    QWEN_MODEL = os.getenv("GPT_QWEN_MODEL", "")
-    QWEN_MODEL_NAME = os.getenv("GPT_QWEN_MODEL_NAME", "")
-    QWEN_API_KEY = os.getenv("GPT_QWEN_API_KEY", "")
-    QWEN_BASE_URL = os.getenv("GPT_QWEN_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
-    QWEN_ACCEPT_IMAGE = os.getenv("GPT_QWEN_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /grok command
-    GROK_MODEL = os.getenv("GPT_GROK_MODEL", "")
-    GROK_MODEL_NAME = os.getenv("GPT_GROK_MODEL_NAME", "")
-    GROK_API_KEY = os.getenv("GPT_GROK_API_KEY", "")
-    GROK_BASE_URL = os.getenv("GPT_GROK_BASE_URL", "https://api.x.ai/v1")
-    GROK_ACCEPT_IMAGE = os.getenv("GPT_GROK_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /doubao command
-    DOUBAO_MODEL = os.getenv("GPT_DOUBAO_MODEL", "")
-    DOUBAO_MODEL_NAME = os.getenv("GPT_DOUBAO_MODEL_NAME", "")
-    DOUBAO_API_KEY = os.getenv("GPT_DOUBAO_API_KEY", "")
-    DOUBAO_BASE_URL = os.getenv("GPT_DOUBAO_BASE_URL", "https://ark.cn-beijing.volces.com/api/v3")
-    DOUBAO_ACCEPT_IMAGE = os.getenv("GPT_DOUBAO_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-    # /kimi command
-    KIMI_MODEL = os.getenv("GPT_KIMI_MODEL", "")
-    KIMI_MODEL_NAME = os.getenv("GPT_KIMI_MODEL_NAME", "")
-    KIMI_API_KEY = os.getenv("GPT_KIMI_API_KEY", "")
-    KIMI_BASE_URL = os.getenv("GPT_KIMI_BASE_URL", "https://api.moonshot.ai/v1")
-    KIMI_ACCEPT_IMAGE = os.getenv("GPT_KIMI_ACCEPT_IMAGE", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
-
-    # AI summary
-    # comma separated chat ids that are allowed to use `cid` as the chatid for the summary
-    SUMMARY_WHITELIST_CUSTOM_CHATS = os.getenv("GPT_SUMMARY_WHITELIST_CUSTOM_CHATS", "")
-    CHAT_SUMMARY_MODEL_ID = os.getenv("CHAT_SUMMARY_MODEL_ID", "")  # Specify the model id for `/summary` command (If not set, use the default model)
-    CHAT_SUMMARY_MODEL_NAME = os.getenv("CHAT_SUMMARY_MODEL_NAME", "")
-    PODCAST_SUMMARY_MODEL_ID = os.getenv("PODCAST_SUMMARY_MODEL_ID", "")  # for generating podcast summary (If not set, use the default AI model)
-    PODCAST_SUMMARY_MODEL_NAME = os.getenv("PODCAST_SUMMARY_MODEL_NAME", "")
-    SUBTITLE_SUMMARY_MODEL_ID = os.getenv("SUBTITLE_SUMMARY_MODEL_ID", "")  # for generating podcast summary (If not set, use the default AI model)
-    SUBTITLE_SUMMARY_MODEL_NAME = os.getenv("SUBTITLE_SUMMARY_MODEL_NAME", "")
-    # For tool_call. Some models doesn't support tool call, so we use this model to do the tool_call first.
-    # Then construct the new questions for the original model.
-    TOOLS_MODEL = os.getenv("GPT_TOOLS_MODEL", "gpt-4o-mini")  # this model should be fast and cheap
-    TOOLS_BASE_URL = os.getenv("GPT_TOOLS_BASE_URL", "https://api.openai.com/v1")
-    TOOLS_API_KEY = os.getenv("GPT_TOOLS_API_KEY", "")
-
-
-class GEMINI:  # Official Gemini
-    # https://ai.google.dev/gemini-api/docs/image-generation
-    BASE_URL = os.getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/")
-    API_KEY = os.getenv("GEMINI_API_KEY", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
-    PROXY = os.getenv("GEMINI_PROXY", None)
-    PREFER_LANG = os.getenv("GEMINI_PREFER_LANG", "")  # Set a prefer response language for Gemini
-    MAX_THINKING_BUDGET = int(os.getenv("GEMINI_MAX_THINKING_BUDGET", "24576"))  # 24K
-    CLEAN_FILES_AFTER_SECONDS = int(os.getenv("GEMINI_CLEAN_FILES_AFTER_SECONDS", "172800"))  # default to 48 hours
-    ALLOWED_CUSTOM_MODEL_IDS = os.getenv("GEMINI_ALLOWED_CUSTOM_MODEL_IDS", "")  # comma separated model ids
-
-    # response modality: text
-    TEXT_MODEL = os.getenv("GEMINI_TEXT_MODEL", "gemini-2.5-pro")
-    TEXT_MODEL_NAME = os.getenv("GEMINI_TEXT_MODEL_NAME", "Gemini-2.5-Pro")
-    TEXT_THINKING_BUDGET = os.getenv("GEMINI_TEXT_THINKING_BUDGET", None)  # 0 to disable thinking. DO NOT set this if the model is not a thinking model
-    TEXT_CONFIG = os.getenv("GEMINI_TEXT_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
-
-    # ASR related
-    ASR_MAX_DURATION = int(os.getenv("GEMINI_ASR_MAX_DURATION", "34200"))  # 9.5 hour
-    ASR_MODEL = os.getenv("GEMINI_ASR_MODEL", "gemini-2.5-flash")
-    ASR_THINKING_BUDGET = os.getenv("GEMINI_ASR_THINKING_BUDGET", None)  # 0 to disable thinking. DO NOT set this if the model is not a thinking model
-    ASR_CONFIG = os.getenv("GEMINI_ASR_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
-    ASR_USE_GROUNDING = os.getenv("GEMINI_ASR_USE_GROUNDING", "1").lower() in ["1", "y", "yes", "t", "true", "on"]  # Use Grounding with Google Search
-
-
-class TEXT2IMG:
-    DEFAULT_MODEL = os.getenv("TEXT2IMG_DEFAULT_MODEL", "gemini")
-    FLUX_PROVIDER = os.getenv("TEXT2IMG_FLUX_PROVIDER", "ali,cloudflare").lower()  # comma separated
-    STABLE_DIFFUSION_PROVIDER = os.getenv("TEXT2IMG_STABLE_DIFFUSION_PROVIDER", "ali,cloudflare").lower()
-    ALI_API_KEY = os.getenv("TEXT2IMG_ALI_API_KEY", "")
-    ALI_FLUX_MODEL = os.getenv("TEXT2IMG_ALI_FLUX_MODEL", "flux-dev")
-    ALI_STABLE_DIFFUSION_MODEL = os.getenv("TEXT2IMG_ALI_STABLE_DIFFUSION_MODEL", "stable-diffusion-3.5-large")
-    ALI_PROXY = os.getenv("TEXT2IMG_ALI_PROXY", None)
-    ZIMAGE_API_URL = os.getenv("TEXT2IMG_ZIMAGE_API_URL", "")
-    ZIMAGE_PROXY = os.getenv("TEXT2IMG_ZIMAGE_PROXY", None)
-    CF_API_KEY = os.getenv("TEXT2IMG_CF_API_KEY", "")  # comma separated keys. e.g. "AccountID:API_TOKEN, AccountID:API_TOKEN, ..."
-    CF_FLUX_MODEL = os.getenv("TEXT2IMG_CF_FLUX_MODEL", "@cf/black-forest-labs/flux-1-schnell")
-    CF_STABLE_DIFFUSION_MODEL = os.getenv("TEXT2IMG_CF_STABLE_DIFFUSION_MODEL", "@cf/bytedance/stable-diffusion-xl-lightning")
-    CF_PROXY = os.getenv("TEXT2IMG_CF_PROXY", None)
-    DOUBAO_API_KEY = os.getenv("TEXT2IMG_DOUBAO_API_KEY", "")  # comma separated keys
-    DOUBAO_SEEDREAM_MODEL = os.getenv("TEXT2IMG_DOUBAO_SEEDREAM_MODEL", "doubao-seedream-4-0-250828")
-    DOUBAO_PROXY = os.getenv("TEXT2IMG_DOUBAO_PROXY", None)
-    GEMINI_MODEL = os.getenv("TEXT2IMG_GEMINI_MODEL", "gemini-2.5-flash-image")
-    GEMINI_BASE_URL = os.getenv("TEXT2IMG_GEMINI_BASE_URL", "https://generativelanguage.googleapis.com")
-    GEMINI_PROXY = os.getenv("TEXT2IMG_GEMINI_PROXY", None)
-    GEMINI_API_KEY = os.getenv("TEXT2IMG_GEMINI_API_KEY", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
+
+
+class AI:
+    # Text Generation
+    MAX_CONTEXTS_NUM = int(os.getenv("AI_MAX_CONTEXTS_NUM", "30"))
+    TEXT_MODEL_CONFIG_KEY = os.getenv("AI_MODEL_CONFIG_KEY", "AI-TEXT")  # model configuration key in CF-KV
+    TEXT_DEFAULT_PROVIDER = os.getenv("AI_TEXT_DEFAULT_PROVIDER", "gemini")
+    OPENAI_MODEL_ID = os.getenv("AI_OPENAI_MODEL_ID", "gpt-4o")
+    OPENAI_API_KEYS = os.getenv("AI_OPENAI_API_KEYS", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
+    OPENAI_BASE_URL = os.getenv("AI_OPENAI_BASE_URL", "https://api.openai.com/v1")
+    OPENAI_CLIENT_CONFIG = os.getenv("AI_OPENAI_CLIENT_CONFIG", "{}")  # client config passed to OpenAI API. Should be a json string: '{"key": "value"}'
+    OPENAI_DEFAULT_HEADERS = os.getenv("AI_OPENAI_DEFAULT_HEADERS", "{}")  # default headers passed to OpenAI API. Should be a json string: '{"key": "value"}'
+    OPENAI_COMPLETIONS_CONFIG = os.getenv("AI_OPENAI_COMPLETIONS_CONFIG", "{}")  # chat completions config. Should be a json string: '{"key": "value"}'
+    OPENAI_RESPONSES_CONFIG = os.getenv("AI_OPENAI_RESPONSES_CONFIG", "{}")  # response api config. Should be a json string: '{"key": "value"}'
+    GEMINI_MODEL_ID = os.getenv("AI_GEMINI_MODEL_ID", "gemini-2.5-flash")
+    GEMINI_API_KEYS = os.getenv("AI_GEMINI_API_KEYS", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
+    GEMINI_BASE_URL = os.getenv("AI_GEMINI_BASE_URL", "https://generativelanguage.googleapis.com")
+    GEMINI_DEFAULT_HEADERS = os.getenv("AI_GEMINI_DEFAULT_HEADERS", "{}")  # default headers passed to Gemini API. Should be a json string: '{"key": "value"}'
+    GEMINI_GENERATE_CONTENT_CONFIG = os.getenv("AI_GEMINI_GENERATE_CONTENT_CONFIG", "{}")  # gemini generate_content config. Should be a json string: '{"key": "value"}'
+    GEMINI_FILES_TTL = int(os.getenv("AI_GEMINI_FILES_TTL", "172800"))  # clean gemini files after 48 hours
+    PODCAST_SUMMARY_MODEL_ALIAS = os.getenv("PODCAST_SUMMARY_MODEL_ALIAS", "podcast-summary")
+    SUBTITLE_SUMMARY_MODEL_ALIAS = os.getenv("SUBTITLE_SUMMARY_MODEL_ALIAS", "subtitle-summary")
+    CHAT_SUMMARY_MODEL_ALIAS = os.getenv("CHAT_SUMMARY_MODEL_ALIAS", "chat-summary")
+
+    # Image Generation
+    IMG_MODEL_CONFIG_KEY = os.getenv("AI_IMG_MODEL_CONFIG_KEY", "AI-IMG")  # model configuration key in CF-KV
+    IMG_GENERATION_DEFAULT_MODEL = os.getenv("AI_IMG_GENERATION_DEFAULT_MODEL", "seedream")
src/main.py
@@ -17,14 +17,14 @@ from pyrogram.client import Client
 from pyrogram.sync import idle
 from pyrogram.types import LinkPreviewOptions, Message
 
+from ai.chat_summary import daily_summary
+from ai.utils import clean_gemini_files
 from bridge.chartimg import forward_chartimg_results
 from bridge.ocr import forward_ocr_results
 from bridge.social import forward_social_media_results
 from config import DAILY_MESSAGES, DEVICE_NAME, ENABLE, PROXY, TOKEN, TZ, cache
 from danmu.sync import sync_livechats
 from history.sync import backup_chat_history, sync_chat_history
-from llm.summary import daily_summary
-from llm.utils import clean_gemini_files
 from messages.main import process_message
 from messages.parser import parse_msg
 from permission import check_permission
src/networking.py
@@ -502,7 +502,7 @@ async def flatten_rediercts(
     # vertexaisearch.cloud.google.com
     if matched := re.search(r"(https?://)?vertexaisearch\.cloud\.google\.com/([0-9a-zA-Z\-_=+/]+)", texts):
         url = matched.group(0)
-        proxy = PROXY.GOOGLE_SEARCH
+        proxy = PROXY.GOOGLE
 
     # custom pattern
     if pattern and (matched := re.search(pattern, texts)):
src/permission.py
@@ -153,7 +153,6 @@ def check_service(cid: int | str, ctype: str) -> dict:
     } | global_permissions()
 
     if ctype == "PRIVATE":
-        permission["ai"] = True
         permission["need_prefix"] = False
 
     # global service permission
@@ -183,7 +182,7 @@ def check_service(cid: int | str, ctype: str) -> dict:
         permission["reddit"] = False
     if not ENABLE.YTDLP:
         permission["ytdlp"] = False
-    if not ENABLE.GPT:
+    if not ENABLE.AI:
         permission["ai"] = False
     if not ENABLE.ASR:
         permission["asr"] = False
src/utils.py
@@ -6,6 +6,7 @@ import os
 import random
 import re
 import string
+import tempfile
 from datetime import UTC, datetime
 from decimal import Decimal
 from pathlib import Path
@@ -21,6 +22,7 @@ from bs4 import BeautifulSoup
 from bs4.element import PageElement
 from glom import PathAccessError, glom
 from loguru import logger
+from markitdown import MarkItDown
 from pyrogram.client import Client
 from pyrogram.types import User
 from yt_dlp.extractor import gen_extractors
@@ -582,6 +584,35 @@ def cleanup_old_files(root: Path | str | None = None, duration: int = CLEAN_OLD_
             path.unlink(missing_ok=True)
 
 
+def convert_md(path: str | Path | None = None, html: str | None = None) -> str:
+    """Convert to markdown format."""
+    md = MarkItDown()
+    if path is not None:
+        path = Path(path).expanduser().resolve()
+        if not path.is_file():
+            return ""
+        result = md.convert(path)
+        return result.text_content
+    if html is not None:
+        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
+            f.write(html)
+        result = md.convert(f.name)
+        Path(f.name).unlink(missing_ok=True)
+        return result.text_content
+    return ""
+
+
+def convert_html(texts: str = "", path: str | Path | None = None) -> str:
+    """Convert to markdown format."""
+    if path is not None:
+        path = Path(path).expanduser().resolve()
+        if not path.is_file():
+            return ""
+        texts = read_text(path)
+    texts = markdown.markdown(texts)
+    return texts.replace("\n", "<br>")
+
+
 def av2bv(aid: int | str) -> str:
     """Bilibili AV -> BV ID converter."""
     aid = str(aid)
environment.yml
@@ -29,7 +29,6 @@ dependencies:
   - python=3.13
   - pyyaml>=6.0.2
   - socksio>=1.0.0
-  - tiktoken>=0.9.0
   - uvloop>=0.21.0
   - xmltodict>=0.14.2
   - youtube-transcript-api>=1.2.1
pyproject.toml
@@ -34,7 +34,6 @@ dependencies = [
   "quickchart-io==2.0.0",
   "soundfile==0.13.1",
   "telegraph[aio]==2.2.0",
-  "tiktoken==0.12.0",
   "uvloop==0.22.1",
   "xmltodict==1.0.2",
   "youtube-transcript-api==1.2.3",
uv.lock
@@ -250,7 +250,6 @@ dependencies = [
     { name = "quickchart-io", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "soundfile", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "telegraph", extra = ["aio"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "uvloop", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "xmltodict", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
     { name = "youtube-transcript-api", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -299,7 +298,6 @@ requires-dist = [
     { name = "quickchart-io", specifier = "==2.0.0" },
     { name = "soundfile", specifier = "==0.13.1" },
     { name = "telegraph", extras = ["aio"], specifier = "==2.2.0" },
-    { name = "tiktoken", specifier = "==0.12.0" },
     { name = "uvloop", specifier = "==0.22.1" },
     { name = "xmltodict", specifier = "==1.0.2" },
     { name = "youtube-transcript-api", specifier = "==1.2.3" },
@@ -2045,58 +2043,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/17/46/4d2c1ce9457ddd90913084085be5add1cb040f9bb41717adb89554a9c9d9/quickchart_io-2.0.0-py3-none-any.whl", hash = "sha256:c44b5fb4d6e957fb85db0926e691684795e9fe5d6819d33f2daea795a0f6a36b", size = 5122, upload-time = "2022-09-24T22:40:32.94Z" },
 ]
 
-[[package]]
-name = "regex"
-version = "2026.1.14"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ad/f2/638ef50852dc5741dc3bb3c7d4e773d637bc20232965ef8b6e7f6f7d4445/regex-2026.1.14.tar.gz", hash = "sha256:7bdd569b6226498001619751abe6ba3c9e3050f79cfe097e84f25b2856120e78", size = 414813, upload-time = "2026-01-14T17:53:31.244Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/21/8c/dbf1f86f33ea9e5365a18b5f82402092ab173244f5133b133128ce9b3f7c/regex-2026.1.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2d79c0cbcc86da60fee4410bd492cda9121cda2fc5a5a214b363b4566f973319", size = 489162, upload-time = "2026-01-14T17:51:38.854Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/cd/0d42bcd848be341b9d220a66b8ee79d74c3387f1def40a58d00ca26965d1/regex-2026.1.14-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4ec665ee043e552ea705829ababf342a6437916d3820afcfb520c803a863bab2", size = 291208, upload-time = "2026-01-14T17:51:40.058Z" },
-    { url = "https://files.pythonhosted.org/packages/55/b0/d60e4a1260d1070df4e8be0e41917963821f345be3522b0f1490e122fd68/regex-2026.1.14-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d03aa0b4b376591dbbe55e43e410408baa58efbc1c233eb7701390833177e20a", size = 288897, upload-time = "2026-01-14T17:51:41.441Z" },
-    { url = "https://files.pythonhosted.org/packages/98/7f/fb426139aca46aeaf1aa4dcd43ed3db4cc768efc34c724e51a3b139a8c40/regex-2026.1.14-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69c16047721fd8b517a0ebb464cbd71391711438eeaeb210f2ca698a53ec6e81", size = 798678, upload-time = "2026-01-14T17:51:42.836Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/35/2e1f3c985d8cd5c6aec03fc96e51dfa972c24c0b4aaef6e065bc1de0bbfd/regex-2026.1.14-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:94e547c2a2504b8f7ae78c8adfe6a5112db85f57cb0ee020d2c5877275e870d2", size = 864207, upload-time = "2026-01-14T17:51:44.194Z" },
-    { url = "https://files.pythonhosted.org/packages/76/7f/405e0f3b4d98614e58aab7c18ab820b741321d2dff29ef8e7d1948359912/regex-2026.1.14-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0911e70459d42483190bfbdb51ea503cde035ad2d5a6cc2a65d89500940d6cce", size = 912355, upload-time = "2026-01-14T17:51:45.586Z" },
-    { url = "https://files.pythonhosted.org/packages/10/30/c818854bbf09f41b73474381c4126c9489e02c2baa1f2178f699b2085a78/regex-2026.1.14-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ccf545852edc516f51b97d71a93308340e002ff44dd70b5f3e8486ef7db921b", size = 803581, upload-time = "2026-01-14T17:51:47.217Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/95/6585eee0e4ff1a0970606975962491d17c78b16738274281068ee7c59546/regex-2026.1.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c09b0ced5b98299709d43f106e27218c4dd8d561bea1a257c77626e6863fdad3", size = 787977, upload-time = "2026-01-14T17:51:48.636Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/0b/f235cb019ee7f912d7cf2e026a38569c83c0eb2bb74200551148f80ab3cb/regex-2026.1.14-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dbfac3be697beadc831fad938d174d73a2903142447a617ef831ce870d7ec1dd", size = 858547, upload-time = "2026-01-14T17:51:49.998Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/1e/c8561f3a01e9031c7ecc425aac2f25178487335efbee6a6c5a8a648013c2/regex-2026.1.14-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f9c67b0aec9e36daeb34051478c51fcf15c6eac8207645c6660f657ed26002a5", size = 850613, upload-time = "2026-01-14T17:51:51.614Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/21/4a1b879a4e2b991d65c92190a5e8024571c89c045cc4cf305166416b1c7b/regex-2026.1.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aaf3cd810e128763d8010633af1206715daf333348a01bb5eb72c99ed15b0277", size = 789950, upload-time = "2026-01-14T17:51:53.719Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/67/d4254424777851b16c3622049383c1c71259c9d4bea87f0d304376541a28/regex-2026.1.14-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b8430037d2253f9640727c046471bc093773abcefd212254d4b904730536b652", size = 492070, upload-time = "2026-01-14T17:51:59.178Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/9e/c3321f78f1ddb4eee88969db36fb8552217dd99d9b16a7c0ac6e88340796/regex-2026.1.14-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:10a6877cee35e574234928bcb74125063ff907fc0f5efca7a5a44bebd2fe87f3", size = 292752, upload-time = "2026-01-14T17:52:00.772Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f9/d7dd071d5d12f4f58950432c4f967b3ba6ddbd14bc84b0280a35284dd287/regex-2026.1.14-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:41f9b06ec8ebd743c78e331d062d868c398817bfb2b04191e107c1ee2ac202ed", size = 291116, upload-time = "2026-01-14T17:52:02.162Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/f4/a2d81988df08bb13e2068eec072c3d46fc578575975bba549f512bc74495/regex-2026.1.14-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:650979c05e632bc80f6267e645ad152e13c6931d6295c0ad8ba3e637c118f124", size = 807521, upload-time = "2026-01-14T17:52:03.495Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/b0/0f4217aa90bb83e04cbae39a7428fa27ed9e21dd6b5fc10186fb9a341da3/regex-2026.1.14-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6464d2c038c6bb6b534ac3144281fd5d38268bcb77cf6e17b399ca79ebbae25c", size = 873453, upload-time = "2026-01-14T17:52:04.862Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/69/b494cefbf67d1895568d952f1343a029dfe93428816a9956d8022f7a24f1/regex-2026.1.14-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ada211c9b8d6c0b2860ea37a52e06b0b3b316dbc28f403530e0227868318c9d4", size = 915006, upload-time = "2026-01-14T17:52:06.304Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/d4/54d81ba0b45893ab9dec83134d3fef383f807987c6618de3ea5ecceb98cb/regex-2026.1.14-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54cf46d11bb344d86fc5514171a55220f87a31706ef9c0cd41b310f706d50db8", size = 812793, upload-time = "2026-01-14T17:52:07.986Z" },
-    { url = "https://files.pythonhosted.org/packages/56/40/2a477aa0a2b869ea2538a7ab1ee46d581be5f17da345e9913b7a0baf7701/regex-2026.1.14-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:83792c2865452dbbd14fb00fd7c00cfc38ea76bf579944e8244a9e1b78a58bde", size = 795557, upload-time = "2026-01-14T17:52:09.45Z" },
-    { url = "https://files.pythonhosted.org/packages/07/0f/54b5af02916f3ca90987c0e1c744b7fee572f1873da9b6256f85783286e4/regex-2026.1.14-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:e3e7fbf8403dadf202438c0e1c677c21809fc7ba7489f8449b76fe27a8284264", size = 868425, upload-time = "2026-01-14T17:52:11.392Z" },
-    { url = "https://files.pythonhosted.org/packages/74/cd/c9dfdd504497a25ba64c4ef846c37f74019cfdedfe3d1cdcba4033a3ac0c/regex-2026.1.14-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:109ce462bf9f91ca72ef2864af706f0ed3d37de7692d9b90e9cff1e44ad6c3b4", size = 854751, upload-time = "2026-01-14T17:52:12.835Z" },
-    { url = "https://files.pythonhosted.org/packages/95/b3/e5347ed1eb68a0c8d6c6b5da9318c564308d022b721b1c2ca311f7a8bd74/regex-2026.1.14-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:299c1c591ecd798ce2529e24f6e11f2fe3cc42bb21b0fead056880e0d57752c3", size = 799557, upload-time = "2026-01-14T17:52:14.228Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/90/64dcf099f3efde2115ceb0a2482064d2630532a8c2b40c95d01f4b886d68/regex-2026.1.14-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c07bbee79ceb399ae4c8294b154fccdf2eefc1e86b157338d93e9e46ed327cd4", size = 489164, upload-time = "2026-01-14T17:52:19.811Z" },
-    { url = "https://files.pythonhosted.org/packages/57/33/11f82bcf6df1477211390d3c55d9a65bbdf0454101fe6f101bbf428ed72e/regex-2026.1.14-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ef59c01b8eab361b3e5768f491a0a59c6fc3b862d34d08ec9b78ce7b3f9c5d11", size = 291147, upload-time = "2026-01-14T17:52:21.146Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/b4/33df4bc04af4a7abf5754da3a1d131e9384e59ca4431d85af9f5cf7e040d/regex-2026.1.14-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:db72aebb3eede342088f6940aea3cc59f2bbf93295b8a7c7a98fa293b20accc9", size = 288981, upload-time = "2026-01-14T17:52:22.675Z" },
-    { url = "https://files.pythonhosted.org/packages/72/fd/d89b1425b9b420877eec3588d1abec08948e836461a16e4748be64078cda/regex-2026.1.14-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23da4da4a156d613946f197ad85da2c3ce3269166909249206dbfc6a62e27d4b", size = 799097, upload-time = "2026-01-14T17:52:24.081Z" },
-    { url = "https://files.pythonhosted.org/packages/04/f0/149b80499a12a9ef525656a780abca8383b9689687afb3eef8f16d62574c/regex-2026.1.14-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c59aaa581c86d0003a805843399fdd859e3803ee3f6bf694a96ede726b60d26c", size = 864980, upload-time = "2026-01-14T17:52:25.847Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/bb/bec2a2ba7e0120915b02d46b68c265938a235657baaf7be79746e0a40583/regex-2026.1.14-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4176e42a6b940b704b25d804afc799c4f2cf88c245e71c111d91c9259a5a08bd", size = 911606, upload-time = "2026-01-14T17:52:27.529Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/49/fcb59ec88bf188632877ea18eca43bed95c49fd049a3a16f943dc48ec225/regex-2026.1.14-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:336662753d92ecc68a6e5d557d5648550927c3927bb18959a6c336c6d2309b95", size = 803356, upload-time = "2026-01-14T17:52:29.031Z" },
-    { url = "https://files.pythonhosted.org/packages/04/a3/a4e1873b32c7b4e9839edbf86d2369bbbd5759581481bf095eb561186acd/regex-2026.1.14-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d3f2da65a0552a244319cd0079f7dcbd7b18358c05ca803757f123b5315f9e2b", size = 788042, upload-time = "2026-01-14T17:52:30.546Z" },
-    { url = "https://files.pythonhosted.org/packages/05/b9/0f3fcb32b9ac5467f3a6634fc867bb008670eabebc5dbf91c76d0ee63d1d/regex-2026.1.14-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ea9cb3230eb1791b74530fe59a9ad1e41282eee27cddf9f765cb216f1a35b491", size = 859373, upload-time = "2026-01-14T17:52:32.11Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f4/f1f7602b5e9a60fdabebaf5b6796b460a4820fbe932993467ae6c12bd8ac/regex-2026.1.14-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:00a41d01546c09bfd972492f4f45515cba5cd8b15d129e6f43b5e9b6bf5cf5db", size = 850110, upload-time = "2026-01-14T17:52:34.615Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/e4/96e231d977a95fe493931ee10b60352d7b0f25fe733660dd4ce34d7669dd/regex-2026.1.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a16876c16759e2030cbc2444d72a773ba9fb11c58ddf1411bceac3015e85ad62", size = 789584, upload-time = "2026-01-14T17:52:36.667Z" },
-    { url = "https://files.pythonhosted.org/packages/11/fe/16f795a7d49970393f43c1593a59057d9f0037858cd9797ca2e6965031e6/regex-2026.1.14-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:2cccc1a0d1c07dc5e7f65f042f17a678aa431b27d2c1b33983cdb52daf4e49a5", size = 492068, upload-time = "2026-01-14T17:52:42.561Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/8d/297e5410c4aba87c0c5c7760e1ffa34f9d4bec0bd3b264073c5f6d251ab1/regex-2026.1.14-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c47239a9736f6082540f91f77dd634a7771eac1e8720bc35ef274d8ea0a72b90", size = 292752, upload-time = "2026-01-14T17:52:44.414Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/8d/d9efc9580631603255856b306e4a19c6c3b45491a793ce60a4de76118831/regex-2026.1.14-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ac5dcb96ed037c692eb40b0c96bd5ba588f07fd898bd14e111c751a4bf195b21", size = 291118, upload-time = "2026-01-14T17:52:46.315Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/cd/89735cc17f41667bf1cb7fb341109eb19ada117ef0a8e8882a9396de68f0/regex-2026.1.14-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98af87df5496a39a7f4fa619568a12e0b719af25e75ecbd968a671609fda3702", size = 807759, upload-time = "2026-01-14T17:52:47.771Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/2d/e5db572360c76b335d578a4bec6437b302e1f170722b1f0c79c7295ec169/regex-2026.1.14-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:670d6865632ef2ad1ba0f326b4eb714107b71e3ea9a48a2564d407834273e2da", size = 873536, upload-time = "2026-01-14T17:52:49.695Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/a1/704748140afb90045c3d635cd1929e15b821627ef7a1b4ae22fe3c1cf18a/regex-2026.1.14-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa783080add7cedbeb8c11e8c7e3efb9353b7c183701548fae70ec44b7b886cd", size = 915064, upload-time = "2026-01-14T17:52:51.199Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/5a/00699f1bcc8f5aaf9cae4b1f673c1a3ba5256ea2d4d53f8f21319976cd25/regex-2026.1.14-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0aa76616ee8a1fdefa62f486324ba6fecc3059261779ebb1575a7b7ddf5fb7c9", size = 812937, upload-time = "2026-01-14T17:52:52.77Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/fd/c6742cb9ed24a8fe197603a6808e5641eaaa59c13a2ad8624d39d0405d82/regex-2026.1.14-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6ff33c4e28c44de3e1877afaf55feb306093cb6cb8e49bf083cfd9bdb258e130", size = 795650, upload-time = "2026-01-14T17:52:54.717Z" },
-    { url = "https://files.pythonhosted.org/packages/17/36/ccadcc5f1204529ca638c969659a9b56ef706f4eb908bbd7a9a7645793b8/regex-2026.1.14-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a39879c089bc84fd8ab6f02de458534e7ed8e7bf72091322ff0d8b9138f612c1", size = 868549, upload-time = "2026-01-14T17:52:56.309Z" },
-    { url = "https://files.pythonhosted.org/packages/78/5e/a7b09f3031bbd0e1ab15d08277cac61193adfd62bb6d10e7ba4e69cee4e6/regex-2026.1.14-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:cf0ce5cd5b0c011ec49ff51f85f5ba6ed46ecc5491fa60f803734b2e70dd32aa", size = 854779, upload-time = "2026-01-14T17:52:57.789Z" },
-    { url = "https://files.pythonhosted.org/packages/de/ae/a70e39d97b9611628b1d9c3a709d24f1639bcbfa99277391864303a8cd61/regex-2026.1.14-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a29ecdaa0f5dac290b17b61150d00646240b195dbe2950bf3de6360cf41c7cce", size = 799776, upload-time = "2026-01-14T17:52:59.344Z" },
-]
-
 [[package]]
 name = "requests"
 version = "2.32.5"
@@ -2247,42 +2193,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" },
 ]
 
-[[package]]
-name = "tiktoken"
-version = "0.12.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
-    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
-    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
-    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
-    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
-    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
-    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
-    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
-    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
-]
-
 [[package]]
 name = "tqdm"
 version = "4.67.1"