Commit ed65b8f
Changed files (11)
src/asr/ali.py
@@ -34,8 +34,8 @@ async def ali_asr(path: str | Path) -> dict:
if not path.is_file():
return {"texts": "", "error": "File not found."}
supported_ext = [".aac", ".amr", ".avi", ".flac", ".flv", ".m4a", ".mkv", ".mov", ".mp3", ".mp4", ".mpeg", ".oga", ".ogg", ".opus", ".wav", ".webm", ".wma", ".wmv"]
- audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
api_keys = strings_list(ASR.ALI_API_KEY, shuffle=True)
if not api_keys:
return {"error": "请配置阿里云语音识别的API Key"}
@@ -50,7 +50,7 @@ async def ali_asr(path: str | Path) -> dict:
url = FILE_SERVER.removesuffix("/") + "/" + path.name
elif ASR.ALI_FS_ENGINE.lower() == "uguu":
if audio_path.stat().st_size > 100 * 1024 * 1024: # 100 MB
- audio_path = await downsampe_audio(audio_path)
+ audio_path = await downsampe_audio(audio_path, ext="wav", codec="pcm_s16le")
url = await upload_uguu(audio_path) # max 100 MB for Uguu
elif ASR.ALI_FS_ENGINE.lower() == "alist":
url = await upload_alist(audio_path)
@@ -120,9 +120,8 @@ async def query_ali_asr(task_id: str, api_key: str, query_times: int = 0) -> dic
async def ali_realtime_asr(model: str, path: str | Path, api_key: str) -> dict:
# convert audio file
sample_rate = 8000 if "8k" in model else 16000
- ext = "opus"
- audio_path = await downsampe_audio(path, ext=ext, sample_rate=sample_rate, channel=1)
- recognition = Recognition(model=model, format=ext, sample_rate=sample_rate, callback=RecognitionCallback(), api_key=api_key)
+ audio_path = await downsampe_audio(path, ext="wav", codec="pcm_s16le", sample_rate=sample_rate, channel=1)
+ recognition = Recognition(model=model, format="wav", sample_rate=sample_rate, callback=RecognitionCallback(), api_key=api_key)
result = recognition.call(Path(audio_path).as_posix())
if result.status_code != 200:
return {"error": f"❌语音识别失败: {result.message}"}
src/asr/cloudflare.py
@@ -37,10 +37,10 @@ async def cloudflare_asr(
if not path.is_file():
return {"texts": "", "error": "File not found."}
supported_ext = [".mp3", ".opus", ".ogg", ".oga", ".wav", ".flac", ".aac"]
- audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
# max allowed file size is 25MB
- if audio_path.stat().st_size < ASR.CLOUDFLARE_MAX_BYTES:
+ if duration < ASR.CLOUDFLARE_CHUNK_SECONDS:
return await cloudflare_single_file(audio_path, model=model, prompt=prompt)
return await cloudflare_file_chunks(audio_path, duration, model=model, prompt=prompt)
@@ -123,9 +123,9 @@ async def cloudflare_file_chunks(
Returns:
dict: {"texts": str, "raw_texts": str, "segments": list[dict]}
"""
- # only support opus file
- ogg_path = path if path.suffix in [".oga", ".ogg", ".opus"] else await downsampe_audio(path, ext="opus", codec="libopus")
- audio, duration, sr = load_audio(ogg_path)
+ # only support wav file
+ wav_path = path if path.suffix.lower() == ".wav" else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio, duration, sr = load_audio(wav_path)
if sr == 0:
return {"error": "Failed to load audio."}
transcription = {}
src/asr/deepgram.py
@@ -22,8 +22,8 @@ async def deepgram_asr(path: str | Path) -> dict:
if not path.is_file():
return {"texts": "", "error": "File not found."}
supported_ext = [".mp3", ".aac", ".flac", ".m4a", ".mp2", ".mp4", ".ogg", ".opus", ".oga", ".pcm", ".wav", ".webm"]
- audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
api_keys = strings_list(ASR.DEEPGRAM_API, shuffle=True)
if not api_keys:
return {"error": "请配置DeepGram语音识别的API Key"}
src/asr/gemini.py
@@ -43,8 +43,8 @@ async def gemini_asr(
path = Path(path).expanduser().resolve()
if not path.is_file():
return {"texts": "", "error": "File not found."}
- audio_path = path if path.suffix.lower() in GEMINI_AUDIO_EXT else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in GEMINI_AUDIO_EXT else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
duration = audio_duration(audio_path)
if duration < ASR.GEMINI_CHUNK_SECONDS:
return await gemini_single_file(message, audio_path, model_id=model_id, prompt=prompt, delete_gemini_file=delete_gemini_file)
@@ -147,8 +147,8 @@ async def gemini_file_chunks(
dict: {"texts": str, "raw_texts": str, "segments": list[dict]}
"""
path = Path(path).expanduser().resolve()
- ogg_path = path if path.suffix in [".oga", ".ogg", ".opus"] else await downsampe_audio(path, ext="opus", codec="libopus")
- audio, duration, sr = load_audio(ogg_path)
+ wav_path = path if path.suffix.lower() == ".wav" else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio, duration, sr = load_audio(wav_path)
if sr == 0:
return {"error": "Failed to load audio."}
transcription = {}
@@ -156,7 +156,7 @@ async def gemini_file_chunks(
# Calculate # of chunks
total_chunks = (duration // (chunk_seconds - overlap_seconds)) + 1
total_chunks = int(total_chunks)
- chunk_paths = [Path(DOWNLOAD_DIR) / f"{rand_string()}.opus" for _ in range(total_chunks)]
+ chunk_paths = [Path(DOWNLOAD_DIR) / f"{rand_string()}.wav" for _ in range(total_chunks)]
tasks = []
offset_list = []
# Loop through each chunk, extract current chunk from audio
src/asr/groq.py
@@ -27,8 +27,8 @@ async def groq_asr(path: str | Path, model: str = "", prompt: str = "", temperat
if not path.is_file():
return {"texts": "", "error": "File not found."}
supported_ext = [".aac", ".flac", ".m4a", ".mp3", ".mpeg", ".mpga", ".ogg", ".opus", ".wav", ".webm"]
- audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
# max allowed file size is 25MB
if audio_path.stat().st_size < ASR.GROQ_MAX_BYTES:
return await groq_single_file(audio_path, model=model, prompt=prompt, temperature=temperature, language=language)
@@ -59,8 +59,8 @@ async def groq_single_file(
file_name = Path(path_or_bytes).name
mime = guess_mime(Path(path_or_bytes))
else:
- file_name = "chunk.ogg"
- mime = "audio/ogg"
+ file_name = "chunk.wav"
+ mime = "audio/wav"
if prompt:
data["prompt"] = prompt
if language:
@@ -273,8 +273,8 @@ async def groq_file_chunks(
path = Path(path).expanduser().resolve()
if not path.is_file():
return {"texts": "", "error": "File not found."}
- if path.suffix.lower() not in [".opus", ".ogg"]:
- path = await downsampe_audio(path, ext="opus", codec="libopus")
+ if path.suffix.lower() != ".wav":
+ path = await downsampe_audio(path, ext="wav", codec="pcm_s16le")
audio, duration, sr = load_audio(path)
if sr == 0:
return {"error": "Failed to load audio."}
src/asr/tecent.py
@@ -88,10 +88,10 @@ async def tencent_asr(path: str | Path, language: str, duration: float) -> dict:
if not path.is_file():
return {"texts": "", "error": "File not found."}
supported_ext = [".wav", ".pcm", ".ogg", ".opus", ".oga", ".speex", ".silk", ".mp3", ".m4a", ".aac", ".amr"]
- audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="opus", codec="libopus")
- audio_path = await convert_single_channel(audio_path)
+ audio_path = path if path.suffix.lower() in supported_ext else await downsampe_audio(path, ext="wav", codec="pcm_s16le")
+ audio_path = await convert_single_channel(audio_path, ext="wav", codec="pcm_s16le")
if duration < 1: # some thing error in detecting duration
- audio_path = await downsampe_audio(path, ext="opus", codec="libopus")
+ audio_path = await downsampe_audio(path, ext="wav", codec="pcm_s16le")
duration = audio_duration(audio_path)
# max allowed duration is 60s
@@ -120,13 +120,13 @@ async def tencent_single_asr(path_or_bytes: Path | bytes, language: str, *, offs
if isinstance(path_or_bytes, Path):
# max 3 MB
file_size = path_or_bytes.stat().st_size
- audio_path = path_or_bytes if file_size < 3 * 1024 * 1024 else await downsampe_audio(path_or_bytes, ext="opus", codec="libopus")
+ audio_path = path_or_bytes if file_size < 3 * 1024 * 1024 else await downsampe_audio(path_or_bytes)
voice_format = Path(audio_path).suffix.lower().lstrip(".")
if voice_format in ["ogg", "opus", "oga"]: # tencnet only supports ogg-opus
voice_format = "ogg-opus"
audio_bytes = await get_file_bytes(audio_path)
elif isinstance(path_or_bytes, bytes):
- voice_format = "ogg-opus"
+ voice_format = "wav"
audio_bytes = path_or_bytes
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
payload = f'{{"EngSerViceType":"{language}","SourceType":1,"WordInfo":2,"VoiceFormat":"{voice_format}","Data":"{audio_base64}"}}'
@@ -193,9 +193,9 @@ async def tencent_file_chunks(
Returns:
dict: {"texts": str, "raw_texts": str, "segments": list[dict]}
"""
- # only support opus file
- ogg_path = path if path.suffix in [".oga", ".ogg", ".opus"] else await downsampe_audio(path, ext="opus", codec="libopus")
- audio, _, sr = load_audio(ogg_path)
+ # only support wav file
+ aac_path = path if path.suffix == ".wav" else await downsampe_audio(path)
+ audio, _, sr = load_audio(aac_path)
if sr == 0:
return {"error": "Failed to load audio."}
src/asr/utils.py
@@ -37,7 +37,7 @@ def auto_choose_asr_engine(duration: float, engine: str) -> str:
enabled_engines.append("ali")
if all([ASR.TENCENT_APPID, ASR.TENCENT_SECRET_ID, ASR.TENCENT_SECRET_KEY, ASR.TENCENT_FS_ENGINE]):
enabled_engines.append("tencent")
- if all([ASR.CLOUDFLARE_MODEL, ASR.CLOUDFLARE_KEYS, ASR.CLOUDFLARE_MAX_BYTES, ASR.CLOUDFLARE_CHUNK_SECONDS]):
+ if all([ASR.CLOUDFLARE_MODEL, ASR.CLOUDFLARE_KEYS, ASR.CLOUDFLARE_CHUNK_SECONDS]):
enabled_engines.append("cloudflare")
if all([GEMINI.ASR_MODEL, GEMINI.API_KEY, GEMINI.BASE_URL, ASR.GEMINI_CHUNK_SECONDS]):
enabled_engines.append("gemini")
@@ -74,7 +74,7 @@ def auto_choose_asr_engine(duration: float, engine: str) -> str:
return random.choice(engines) if engines else fallback_engine
-async def downsampe_audio(path: str | Path, ext: str = "opus", codec: str = "libopus", sample_rate: int = 16000, channel: int = 1, **kwargs) -> Path:
+async def downsampe_audio(path: str | Path, ext: str = "wav", codec: str = "pcm_s16le", sample_rate: int = 16000, channel: int = 1, **kwargs) -> Path:
path = Path(path).expanduser().resolve()
if not path.is_file():
return path
@@ -96,11 +96,11 @@ async def get_audio_channel(path: str | Path) -> int:
return -1
-async def convert_single_channel(path: str | Path) -> Path:
+async def convert_single_channel(path: str | Path, **kwargs) -> Path:
path = Path(path).expanduser().resolve()
num_channel = await get_audio_channel(path)
if num_channel != 1:
- return await downsampe_audio(path, ext="opus", codec="libopus", channel=1)
+ return await downsampe_audio(path, **kwargs)
return path
@@ -142,14 +142,14 @@ def load_audio(path: Path | str) -> tuple[ndarray, float, int]:
return ndarray([]), 0, 0
-async def audio_chunk_to_bytes(chunk: ndarray, samplerate: int, fmt: str = "ogg", subtype: str = "OPUS") -> bytes:
+async def audio_chunk_to_bytes(chunk: ndarray, samplerate: int, fmt: str = "WAV", subtype: str = "PCM_16") -> bytes:
buffer = io.BytesIO()
await asyncio.to_thread(sf.write, buffer, chunk, samplerate, format=fmt, subtype=subtype)
buffer.seek(0) # move cursor to beginning
return buffer.getvalue()
-async def audio_chunk_to_path(chunk: ndarray, samplerate: int, path: str | Path, fmt: str = "ogg", subtype: str = "OPUS"):
+async def audio_chunk_to_path(chunk: ndarray, samplerate: int, path: str | Path, fmt: str = "WAV", subtype: str = "PCM_16"):
out_path = Path(path).expanduser().resolve()
out_path.parent.mkdir(exist_ok=True, parents=True)
await asyncio.to_thread(sf.write, out_path.as_posix(), chunk, samplerate, format=fmt, subtype=subtype)
src/tts/gemini.py
@@ -15,7 +15,6 @@ from pyrogram.types import Message
from config import CAPTION_LENGTH, DOWNLOAD_DIR, GEMINI, TTS
from llm.hooks import hook_gemini_httpoptions
from messages.utils import blockquote, smart_split
-from multimedia import convert_to_audio
from utils import markdown_to_text, rand_string, strings_list
@@ -36,12 +35,11 @@ async def gemini_tts(message: Message, texts: str, model: str = "", voice_name:
# split
text_list = await smart_split(texts, chars_per_string=TTS.GEMINI_SPLIT_LENGTH, mode=ParseMode.DISABLED)
resp = await asyncio.gather(*[gemini_tts_real(message, text, model, voice_name) for text in text_list])
- save_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
+ wav_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
combined_data = b"".join([r["voice"] for r in resp])
- save_wave_file(save_path, combined_data)
- ogg = await convert_to_audio(save_path, ext="ogg", codec="libopus")
+ save_wave_file(wav_path, combined_data)
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
- return {"voice": ogg, "duration": calculate_duration(combined_data), "caption": caption}
+ return {"voice": wav_path, "duration": calculate_duration(combined_data), "caption": caption}
async def gemini_tts_real(message: Message, texts: str, model: str, voice_name: str, *, return_bytes: bool = True) -> dict:
@@ -75,10 +73,9 @@ async def gemini_tts_real(message: Message, texts: str, model: str, voice_name:
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
if return_bytes:
return {"voice": data, "duration": calculate_duration(data), "caption": caption}
- save_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
- save_wave_file(save_path, data)
- ogg = await convert_to_audio(save_path, ext="ogg", codec="libopus")
- return {"voice": ogg, "duration": calculate_duration(data), "caption": caption}
+ wav_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
+ save_wave_file(wav_path, data)
+ return {"voice": wav_path, "duration": calculate_duration(data), "caption": caption}
except Exception as e:
logger.error(e)
return {}
src/tts/qwen.py
@@ -11,7 +11,6 @@ from pyrogram.enums import ParseMode
from config import CAPTION_LENGTH, DOWNLOAD_DIR, TTS
from messages.utils import blockquote, smart_split
-from multimedia import convert_to_audio
from networking import download_file, hx_req
from utils import markdown_to_text, rand_string, strings_list
@@ -29,18 +28,17 @@ async def qwen_tts(texts: str, model: str = "", voice_name: str = "") -> dict:
raw_texts = markdown_to_text(texts)
num_token = count_token(raw_texts, model)
if num_token < TTS.QWEN_INPUT_TOKEN_LIMIT:
- return await qwen_tts_real(texts, model, voice_name, convert_ogg=True)
+ return await qwen_tts_real(texts, model, voice_name)
# split
text_list = await smart_split(texts, chars_per_string=TTS.QWEN_SPLIT_LENGTH, mode=ParseMode.DISABLED)
resp = await asyncio.gather(*[qwen_tts_real(text, model, voice_name) for text in text_list])
- save_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
- merge_wav([r["voice"] for r in resp], save_path)
- ogg = await convert_to_audio(save_path, ext="ogg", codec="libopus")
+ wav_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
+ merge_wav([r["voice"] for r in resp], wav_path)
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
- return {"voice": ogg, "duration": sum([r["duration"] for r in resp]), "caption": caption}
+ return {"voice": wav_path, "duration": sum([r["duration"] for r in resp]), "caption": caption}
-async def qwen_tts_real(texts: str, model: str, voice_name: str, *, convert_ogg: bool = False) -> dict:
+async def qwen_tts_real(texts: str, model: str, voice_name: str) -> dict:
"""Qwen TTS.
Args:
@@ -67,8 +65,6 @@ async def qwen_tts_real(texts: str, model: str, voice_name: str, *, convert_ogg:
save_path = await download_file(url, proxy=TTS.ALI_PROXY)
duration = glom(response, "usage.output_tokens", default=0) / 50 # 1s = 50 tokens
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
- if convert_ogg:
- save_path = await convert_to_audio(save_path, ext="ogg", codec="libopus")
except Exception as e:
logger.error(e)
return {"voice": save_path, "duration": duration, "caption": caption}
src/tts/sambert.py
@@ -13,7 +13,6 @@ from pyrogram.enums import ParseMode
from config import CAPTION_LENGTH, DOWNLOAD_DIR, TTS
from messages.utils import blockquote, smart_split
-from multimedia import convert_to_audio
from tts.engines import LIMIT_FOR_MODEL, get_random_one
from utils import markdown_to_text, rand_string, strings_list
@@ -33,18 +32,17 @@ async def sambert_tts(texts: str, model: str = "", voice_name: str = "") -> dict
raw_texts = markdown_to_text(texts)
if len(raw_texts) < TTS.SAMBERT_LENGTH_LIMIT:
- return await sambert_tts_real(texts, model, voice_name, convert_ogg=True)
+ return await sambert_tts_real(texts, model, voice_name)
# split
text_list = await smart_split(texts, chars_per_string=TTS.SAMBERT_LENGTH_LIMIT, mode=ParseMode.DISABLED)
resp = await asyncio.gather(*[sambert_tts_real(text, model, voice_name) for text in text_list])
- save_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
- merge_wav([r["voice"] for r in resp], save_path)
- ogg = await convert_to_audio(save_path, ext="ogg", codec="libopus")
+ wav_path = Path(DOWNLOAD_DIR) / f"{rand_string(16)}.wav"
+ merge_wav([r["voice"] for r in resp], wav_path)
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
- return {"voice": ogg, "duration": sum([r["duration"] for r in resp]), "caption": caption}
+ return {"voice": wav_path, "duration": sum([r["duration"] for r in resp]), "caption": caption}
-async def sambert_tts_real(texts: str, model: str, voice_name: str, *, convert_ogg: bool = False) -> dict:
+async def sambert_tts_real(texts: str, model: str, voice_name: str) -> dict:
"""Sambert TTS.
Args:
@@ -68,8 +66,6 @@ async def sambert_tts_real(texts: str, model: str, voice_name: str, *, convert_o
if timestamps := response.get_timestamps():
duration = glom(timestamps, "-1.end_time", default=0) / 1000
caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
- if convert_ogg:
- save_path = await convert_to_audio(save_path, ext="ogg", codec="libopus")
except Exception as e:
logger.error(e)
return {"voice": save_path, "duration": duration, "caption": caption}
src/config.py
@@ -287,8 +287,7 @@ class ASR:
ALI_FS_ENGINE = os.getenv("ASR_ALI_FS_ENGINE", "local") # local, uguu or alist.
DEEPGRAM_API = os.getenv("ASR_DEEPGRAM_API", "") # comma separated keys for load balance. e.g. "key1,key2,key3"
CLOUDFLARE_MODEL = os.getenv("ASR_CLOUDFLARE_MODEL", "@cf/openai/whisper-large-v3-turbo")
- CLOUDFLARE_MAX_BYTES = int(os.getenv("ASR_CLOUDFLARE_MAX_BYTES", "26214400")) # 25MB (max file bytes for single file)
- CLOUDFLARE_CHUNK_SECONDS = float(os.getenv("ASR_CLOUDFLARE_CHUNK_SECONDS", "600")) # split long audio file into chunks
+ CLOUDFLARE_CHUNK_SECONDS = float(os.getenv("ASR_CLOUDFLARE_CHUNK_SECONDS", "180")) # split long audio file into chunks
CLOUDFLARE_OVERLAP_SECONDS = float(os.getenv("ASR_CLOUDFLARE_OVERLAP_SECONDS", "5")) # overlap seconds between chunks
CLOUDFLARE_KEYS = os.getenv("ASR_CLOUDFLARE_KEYS", "") # comma separated keys for load balance. e.g. "AccountID:API_TOKEN, AccountID:API_TOKEN, ..."
CLOUDFLARE_PROXY = os.getenv("ASR_CLOUDFLARE_PROXY", None)
@@ -297,7 +296,7 @@ class ASR:
GEMINI_OVERLAP_SECONDS = float(os.getenv("ASR_GEMINI_OVERLAP_SECONDS", "5")) # overlap seconds between chunks
GROQ_PROXY = os.getenv("ASR_GROQ_PROXY", None) # Ban CN & HK IP
GROQ_MAX_BYTES = int(os.getenv("ASR_GROQ_MAX_BYTES", "26214400")) # 25MB (max file bytes for single file)
- GROQ_CHUNK_SECONDS = float(os.getenv("ASR_GROQ_CHUNK_SECONDS", "600")) # split long audio file into chunks
+ GROQ_CHUNK_SECONDS = float(os.getenv("ASR_GROQ_CHUNK_SECONDS", "180")) # split long audio file into chunks
GROQ_OVERLAP_SECONDS = float(os.getenv("ASR_GROQ_OVERLAP_SECONDS", "5")) # overlap seconds between chunks
GROQ_KEYS = os.getenv("ASR_GROQ_KEYS", "") # comma separated keys for load balance.
GROQ_MODELS = os.getenv("ASR_GROQ_MODELS", "whisper-large-v3") # comma separated model names.