Commit 332381e

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-10 03:22:55
feat(asr): support custom ASR engine based on duration
1 parent 6f0160a
Changed files (3)
src/asr/utils.py
@@ -7,9 +7,9 @@ from config import ASR, FILE_SERVER
 
 def get_asr_method(duration: float, file_size: int, force_engine: str = "") -> tuple[str, list[str]]:
     """Get ASR method and supported file types."""
-    if duration < 60:
+    if duration < ASR.SHORT_DURATION:
         asr_engine = random.choice([x.strip() for x in ASR.SHORT_ENGINE.split(",") if x.strip()])
-    elif 60 <= duration <= 300:
+    elif ASR.SHORT_DURATION <= duration <= ASR.MIDDLE_DURATION:
         asr_engine = random.choice([x.strip() for x in ASR.MIDDLE_ENGINE.split(",") if x.strip()])
     else:
         asr_engine = random.choice([x.strip() for x in ASR.LONG_ENGINE.split(",") if x.strip()])
@@ -49,8 +49,6 @@ def get_ali_asr_method(file_size: int) -> tuple[str, list[str]]:
 
 
 def get_tencent_asr_method(duration: float, file_size: int) -> tuple[str, list[str]]:
-    if duration > ASR.TENCENT_MAX_DURATION:
-        return f"无法识别时长超过{ASR.TENCENT_MAX_DURATION}秒的音频, 当前音频时长: {duration}秒", []
     if not all([ASR.TENCENT_APPID, ASR.TENCENT_SECRET_ID, ASR.TENCENT_SECRET_KEY]):
         return "请设置Tencent ASR相关环境变量", []
 
src/preview/ytdlp.py
@@ -65,7 +65,7 @@ async def preview_ytdlp(
     youtube_comments_provider: str = PROVIDER.YOUTUBE_COMMENTS,
     proxy: str | None = None,
     append_transcription: bool = True,
-    ytdlp_transcription_engine: str = "gemini",
+    ytdlp_transcription_engine: str = "",
     transcription_only: bool = False,
     transcription_force_file: bool = False,
     to_telegraph: bool = True,
src/config.py
@@ -248,10 +248,13 @@ class DB:
 
 
 class ASR:
+    # use different engines based on duration
     # support ali, tencent, gemini engines
-    SHORT_ENGINE = os.getenv("ASR_SHORT_ENGINE", "tencent,ali")  # duration < 60s
-    MIDDLE_ENGINE = os.getenv("ASR_MIDDLE_ENGINE", "tencent,ali")  # 60s <= duration <= 300s
-    LONG_ENGINE = os.getenv("ASR_LONG_ENGINE", "gemini")  # duration > 300s
+    SHORT_ENGINE = os.getenv("ASR_SHORT_ENGINE", "tencent")
+    SHORT_DURATION = int(os.getenv("ASR_SHORT_DURATION", "60"))
+    MIDDLE_ENGINE = os.getenv("ASR_MIDDLE_ENGINE", "tencent,ali")
+    MIDDLE_DURATION = int(os.getenv("ASR_MIDDLE_DURATION", "600"))
+    LONG_ENGINE = os.getenv("ASR_LONG_ENGINE", "gemini")
     GEMINI_BASR_URL = os.getenv("ASR_GEMINI_BASR_URL", "https://generativelanguage.googleapis.com/")
     GEMINI_API_KEY = os.getenv("ASR_GEMINI_API_KEY", "")  # comma separated keys for load balance. e.g. "key1,key2,key3"
     GEMINI_MAX_DURATION = int(os.getenv("ASR_GEMINI_MAX_DURATION", "34200"))  # 9.5 hour
@@ -260,7 +263,6 @@ class ASR:
     GEMINI_THINKING_BUDGET = os.getenv("ASR_GEMINI_THINKING_BUDGET", None)  # 0 to disable thinking. DO NOT set this if the model is not a thinking model
     GEMINI_CONFIG = os.getenv("ASR_GEMINI_CONFIG", "{}")  # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
     TENCENT_APPID = os.getenv("ASR_TENCENT_APPID", "")
-    TENCENT_MAX_DURATION = int(os.getenv("ASR_TENCENT_MAX_DURATION", "3600"))  # 1 hour
     TENCENT_PROXY = os.getenv("ASR_TENCENT_PROXY", None)  # Banned oversea IP, need a back to China proxy
     TENCENT_SECRET_ID = os.getenv("ASR_TENCENT_SECRET_ID", "")
     TENCENT_SECRET_KEY = os.getenv("ASR_TENCENT_SECRET_KEY", "")