Commit 5447d27
Changed files (2)
src
asr
llm
src/asr/utils.py
@@ -76,11 +76,11 @@ def get_gemini_asr_method(duration: float | None = None) -> tuple[str, list[str]
return "gemini", [x.lstrip(".") for x in GEMINI_AUDIO_EXT]
-async def downsampe_audio(path: str | Path, ext: str = "opus", codec: str = "libopus", sample_rate: int = 16000, **kwargs) -> Path:
+async def downsampe_audio(path: str | Path, ext: str = "opus", codec: str = "libopus", sample_rate: int = 16000, channel: int = 1, **kwargs) -> Path:
path = Path(path).expanduser().resolve()
if not path.is_file():
return path
- return await convert_to_audio(path, ext=ext, codec=codec, ar=sample_rate, **kwargs)
+ return await convert_to_audio(path, ext=ext, codec=codec, ac=channel, ar=sample_rate, **kwargs)
def is_english_word(text: str) -> bool:
src/llm/contexts.py
@@ -13,11 +13,10 @@ from openai import AsyncOpenAI
from pyrogram.client import Client
from pyrogram.types import Message
-from asr.utils import GEMINI_AUDIO_EXT
+from asr.utils import GEMINI_AUDIO_EXT, downsampe_audio
from config import GPT
from llm.utils import BOT_TIPS, clean_context, convert_md
from messages.parser import parse_msg
-from multimedia import convert_to_audio
if TYPE_CHECKING:
from io import BytesIO
@@ -146,7 +145,7 @@ async def single_gemini_context(client: Client, message: Message, app: genai.Cli
if info["mtype"] in ["video", "photo", "audio", "voice"] or info["mime_type"] in gemini_mime_types or any(info["file_name"].endswith(ext) for ext in gemini_extensions):
fpath: str = await client.download_media(msg, in_memory=False) # type: ignore # type: ignore
if info["mtype"] in ["audio", "voice"] and Path(fpath).suffix not in GEMINI_AUDIO_EXT:
- audio_path = await convert_to_audio(fpath, ext="opus", codec="libopus")
+ audio_path = await downsampe_audio(fpath)
fpath = audio_path.as_posix()
upload = await app.aio.files.upload(file=fpath, config=UploadFileConfig(display_name=info["file_name"] or f"send from {info['full_name']}"))
while upload.state == FileState.PROCESSING: