Commit e42e857

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-08-31 18:04:00
fix(asr): handle different audio file types in `groq_asr`
1 parent 7b63892
Changed files (1)
src
src/asr/groq.py
@@ -12,7 +12,7 @@ from loguru import logger
 from asr.utils import audio_chunk_to_bytes, convert_single_channel, downsampe_audio, get_file_bytes, load_audio
 from config import ASR
 from networking import hx_req
-from utils import seconds_to_time, strings_list, zhcn
+from utils import guess_mime, seconds_to_time, strings_list, zhcn
 
 
 async def groq_asr(path: str | Path, model: str = "", prompt: str = "", temperature: float = 0, language: str = "") -> dict:
@@ -55,6 +55,12 @@ async def groq_single_file(
         "temperature": str(temperature),  # must be string
         "response_format": "verbose_json",
     }
+    if isinstance(path_or_bytes, Path | str):
+        file_name = Path(path_or_bytes).name
+        mime = guess_mime(Path(path_or_bytes))
+    else:
+        file_name = "chunk.ogg"
+        mime = "audio/ogg"
     if prompt:
         data["prompt"] = prompt
     if language:
@@ -64,7 +70,7 @@ async def groq_single_file(
         "https://api.groq.com/openai/v1/audio/transcriptions",
         method="POST",
         headers={"Authorization": f"Bearer {strings_list(ASR.GROQ_KEYS, shuffle=True)[0]}"},
-        files={"file": ("chunk.ogg", io.BytesIO(audio_bytes), "audio/ogg")},
+        files={"file": (file_name, io.BytesIO(audio_bytes), mime)},
         data=data,
         timeout=600,
         proxy=ASR.GROQ_PROXY,