Commit 5447d27

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-06-07 04:06:13
chore(asr): only keep the first channel in `downsample_audio`
1 parent fec7ee9
Changed files (2)
src/asr/utils.py
@@ -76,11 +76,11 @@ def get_gemini_asr_method(duration: float | None = None) -> tuple[str, list[str]
     return "gemini", [x.lstrip(".") for x in GEMINI_AUDIO_EXT]
 
 
-async def downsampe_audio(path: str | Path, ext: str = "opus", codec: str = "libopus", sample_rate: int = 16000, **kwargs) -> Path:
+async def downsampe_audio(path: str | Path, ext: str = "opus", codec: str = "libopus", sample_rate: int = 16000, channel: int = 1, **kwargs) -> Path:
     path = Path(path).expanduser().resolve()
     if not path.is_file():
         return path
-    return await convert_to_audio(path, ext=ext, codec=codec, ar=sample_rate, **kwargs)
+    return await convert_to_audio(path, ext=ext, codec=codec, ac=channel, ar=sample_rate, **kwargs)
 
 
 def is_english_word(text: str) -> bool:
src/llm/contexts.py
@@ -13,11 +13,10 @@ from openai import AsyncOpenAI
 from pyrogram.client import Client
 from pyrogram.types import Message
 
-from asr.utils import GEMINI_AUDIO_EXT
+from asr.utils import GEMINI_AUDIO_EXT, downsampe_audio
 from config import GPT
 from llm.utils import BOT_TIPS, clean_context, convert_md
 from messages.parser import parse_msg
-from multimedia import convert_to_audio
 
 if TYPE_CHECKING:
     from io import BytesIO
@@ -146,7 +145,7 @@ async def single_gemini_context(client: Client, message: Message, app: genai.Cli
             if info["mtype"] in ["video", "photo", "audio", "voice"] or info["mime_type"] in gemini_mime_types or any(info["file_name"].endswith(ext) for ext in gemini_extensions):
                 fpath: str = await client.download_media(msg, in_memory=False)  # type: ignore  # type: ignore
                 if info["mtype"] in ["audio", "voice"] and Path(fpath).suffix not in GEMINI_AUDIO_EXT:
-                    audio_path = await convert_to_audio(fpath, ext="opus", codec="libopus")
+                    audio_path = await downsampe_audio(fpath)
                     fpath = audio_path.as_posix()
                 upload = await app.aio.files.upload(file=fpath, config=UploadFileConfig(display_name=info["file_name"] or f"send from {info['full_name']}"))
                 while upload.state == FileState.PROCESSING: