Commit 86bb25f
Changed files (2)
src
ai
src/ai/texts/contexts.py
@@ -5,7 +5,6 @@ import base64
import contextlib
import hashlib
import mimetypes
-import time
from pathlib import Path
from typing import TYPE_CHECKING, Literal
@@ -178,32 +177,40 @@ async def single_openai_response_context(client: Client, message: Message, opena
extra_markdown_extensions = [".html", ".docx", ".pptx", ".xls", ".xlsx"] # convert to markdown
messages = await client.get_media_group(message.chat.id, message.id) if message.media_group_id else [message]
+ media_send_as = openai_params.get("openai_media_send_as", "base64")
+ allow_image = bool(openai_params.get("allow_image"))
+ allow_video = bool(openai_params.get("allow_video"))
+ allow_audio = bool(openai_params.get("allow_audio"))
+ allow_file = bool(openai_params.get("allow_file"))
contexts = []
for msg in messages:
info = parse_msg(msg, silent=True)
sender = info["fwd_full_name"] or info["full_name"]
media_path = DOWNLOAD_DIR + "/" + info["file_name"]
- media_send_as = openai_params.get("openai_media_send_as", "base64")
file_id = ""
try:
- if info["mtype"] == "photo":
- if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params, info["mtype"])):
- contexts.append({"type": "input_image", "file_id": file_id, "detail": "high"})
+ if info["mtype"] == "photo" and allow_image:
+ if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params)):
+ contexts.append({"type": "input_image", "file_id": file_id})
if not file_id:
res = await base64_media(client, msg)
- contexts.append({"type": "input_image", "image_url": f"data:image/{res['ext']};base64,{res['base64']}", "detail": "high"})
-
- elif info["mtype"] == "video":
- if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params, info["mtype"])):
+ contexts.append({"type": "input_image", "image_url": f"data:image/{res['ext']};base64,{res['base64']}"})
+ elif info["mtype"] == "video" and allow_video:
+ if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params)):
contexts.append({"type": "input_video", "file_id": file_id})
if not file_id:
res = await base64_media(client, msg)
contexts.append({"type": "input_video", "video_url": f"data:video/{res['ext']};base64,{res['base64']}"})
-
- elif info["mtype"] == "document":
+ elif info["mtype"] == "audio" and allow_audio:
+ if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params)):
+ contexts.append({"type": "input_audio", "file_id": file_id})
+ if not file_id:
+ res = await base64_media(client, msg)
+ contexts.append({"type": "input_audio", "audio_url": f"data:audio/{res['ext']};base64,{res['base64']}"})
+ elif info["mtype"] == "document" and allow_file:
guessed_mime, _ = mimetypes.guess_type(info["file_name"])
if info["mime_type"] == "application/pdf" or guessed_mime == "application/pdf":
- if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params, info["mtype"])):
+ if media_send_as == "file_id" and (file_id := await get_openai_file_id(client, msg, openai_params)):
contexts.append({"type": "input_file", "file_id": file_id})
if not file_id:
res = await base64_media(client, msg)
@@ -246,7 +253,7 @@ async def single_openai_response_context(client: Client, message: Message, opena
return {"role": role, "type": "message", "content": contexts, **extra}
-async def get_openai_file_id(client: Client, message: Message, openai_params: dict, mtype: str) -> str:
+async def get_openai_file_id(client: Client, message: Message, openai_params: dict) -> str:
def get_real_baseurl() -> str:
base_url = str(openai_params["base_url"]) or ""
default_headers = openai_params.get("default_headers", {})
@@ -258,15 +265,9 @@ async def get_openai_file_id(client: Client, message: Message, openai_params: di
return default_headers.get("x-portkey-custom-host") or ""
return base_url
- if mtype not in ["photo", "video", "document"]:
- return ""
- if not openai_params["allow_image"] and mtype == "photo":
+ if openai_params.get("max_upload_size") and message_bytes(message) > int(openai_params["max_upload_size"]):
+ logger.warning(f"Message-{message.id} size {message_bytes(message)} bytes exceeds max_upload_size {openai_params['max_upload_size']}")
return ""
- if not openai_params["allow_video"] and mtype == "video":
- return ""
- if not openai_params["allow_file"] and mtype == "document":
- return ""
-
cache_day = openai_params.get("cache_day", 30)
api_key = openai_params["api_key"]
model_id = openai_params["model_id"]
@@ -282,28 +283,17 @@ async def get_openai_file_id(client: Client, message: Message, openai_params: di
api_key=api_key,
http_client=DefaultAsyncHttpxClient(proxy=openai_params["proxy"]) if openai_params.get("proxy") else None,
)
- fpath: str = await client.download_media(message) # type: ignore
- extra_body = {"expire_at": int(time.time()) + 3600 * 24 * cache_day}
-
- preprocess_configs = {}
- if message.video:
- duration = glom(message, "video.duration", default=1e8)
- ratio = int(duration // 300)
- fps = ratio * 0.5
- if fps < 0.5:
- fps = 0.5
- elif fps > 5.0:
- fps = 5.0
- preprocess_configs = {"video": {"fps": fps, "model": openai_params["model_id"]}}
- if preprocess_configs:
- extra_body["preprocess_configs"] = preprocess_configs
+ fpath: str | Path = await client.download_media(message) # ty:ignore[invalid-assignment]
try:
- resp = await openai.files.create(file=Path(fpath), purpose="user_data", extra_body=extra_body)
- while resp.status == "processing":
+ # hotfix: convert audio to aac
+ if message.audio and not str(fpath).endswith(".aac"):
+ fpath = await downsampe_audio(fpath, ext="aac", codec="aac")
+ resp = await openai.files.create(file=Path(fpath), purpose="user_data")
+ while resp.status in ["processing", "uploaded"]:
logger.trace(f"Upload media to OpenAI processing: {resp.model_dump()}")
await asyncio.sleep(3)
resp = await openai.files.retrieve(file_id=resp.id)
- if resp.status == "active":
+ if resp.status in ["active", "processed"]:
Path(fpath).unlink(missing_ok=True)
await set_cf_r2(r2_key, data=resp.model_dump(), metadata={"file_id": resp.id})
return resp.id
@@ -495,7 +485,8 @@ async def context_bytes(client: Client, message: Message) -> int:
for msg in chains:
groups = await client.get_media_group(msg.chat.id, msg.id) if msg.media_group_id else [msg]
messages.extend(groups)
- size_bytes = 0
- for m in messages:
- size_bytes += glom(m, Coalesce("photo.sizes.-1.file_size", "video.file_size", "document.file_size"), default=0)
- return size_bytes
+ return sum(message_bytes(m) for m in messages)
+
+
+def message_bytes(message: Message) -> int:
+ return glom(message, Coalesce("photo.sizes.-1.file_size", "video.file_size", "document.file_size"), default=0)
src/ai/texts/openai_response.py
@@ -38,8 +38,9 @@ async def openai_responses_api(
openai_responses_config: str | dict = "",
openai_proxy: str | None = PROXY.OPENAI,
cache_response_ttl: int = 0,
- openai_allow_image: bool = False, # whether to allow image in input modalities
+ openai_allow_image: bool = True, # whether to allow image in input modalities
openai_allow_video: bool = False, # whether to allow video in input modalities
+ openai_allow_audio: bool = False, # whether to allow audio in input modalities
openai_allow_file: bool = False, # whether to allow file in input modalities
openai_media_send_as: Literal["base64", "file_id"] = "base64",
skills: str = "",
@@ -91,6 +92,7 @@ async def openai_responses_api(
"cache_day": cache_day,
"allow_image": openai_allow_image,
"allow_video": openai_allow_video,
+ "allow_audio": openai_allow_audio,
"allow_file": openai_allow_file,
"openai_media_send_as": openai_media_send_as,
},