Commit 332381e
src/asr/utils.py
@@ -7,9 +7,9 @@ from config import ASR, FILE_SERVER
def get_asr_method(duration: float, file_size: int, force_engine: str = "") -> tuple[str, list[str]]:
"""Get ASR method and supported file types."""
- if duration < 60:
+ if duration < ASR.SHORT_DURATION:
asr_engine = random.choice([x.strip() for x in ASR.SHORT_ENGINE.split(",") if x.strip()])
- elif 60 <= duration <= 300:
+ elif ASR.SHORT_DURATION <= duration <= ASR.MIDDLE_DURATION:
asr_engine = random.choice([x.strip() for x in ASR.MIDDLE_ENGINE.split(",") if x.strip()])
else:
asr_engine = random.choice([x.strip() for x in ASR.LONG_ENGINE.split(",") if x.strip()])
@@ -49,8 +49,6 @@ def get_ali_asr_method(file_size: int) -> tuple[str, list[str]]:
def get_tencent_asr_method(duration: float, file_size: int) -> tuple[str, list[str]]:
- if duration > ASR.TENCENT_MAX_DURATION:
- return f"无法识别时长超过{ASR.TENCENT_MAX_DURATION}秒的音频, 当前音频时长: {duration}秒", []
if not all([ASR.TENCENT_APPID, ASR.TENCENT_SECRET_ID, ASR.TENCENT_SECRET_KEY]):
return "请设置Tencent ASR相关环境变量", []
src/preview/ytdlp.py
@@ -65,7 +65,7 @@ async def preview_ytdlp(
youtube_comments_provider: str = PROVIDER.YOUTUBE_COMMENTS,
proxy: str | None = None,
append_transcription: bool = True,
- ytdlp_transcription_engine: str = "gemini",
+ ytdlp_transcription_engine: str = "",
transcription_only: bool = False,
transcription_force_file: bool = False,
to_telegraph: bool = True,
src/config.py
@@ -248,10 +248,13 @@ class DB:
class ASR:
+ # use different engines based on duration
# support ali, tencent, gemini engines
- SHORT_ENGINE = os.getenv("ASR_SHORT_ENGINE", "tencent,ali") # duration < 60s
- MIDDLE_ENGINE = os.getenv("ASR_MIDDLE_ENGINE", "tencent,ali") # 60s <= duration <= 300s
- LONG_ENGINE = os.getenv("ASR_LONG_ENGINE", "gemini") # duration > 300s
+ SHORT_ENGINE = os.getenv("ASR_SHORT_ENGINE", "tencent")
+ SHORT_DURATION = int(os.getenv("ASR_SHORT_DURATION", "60"))
+ MIDDLE_ENGINE = os.getenv("ASR_MIDDLE_ENGINE", "tencent,ali")
+ MIDDLE_DURATION = int(os.getenv("ASR_MIDDLE_DURATION", "600"))
+ LONG_ENGINE = os.getenv("ASR_LONG_ENGINE", "gemini")
GEMINI_BASR_URL = os.getenv("ASR_GEMINI_BASR_URL", "https://generativelanguage.googleapis.com/")
GEMINI_API_KEY = os.getenv("ASR_GEMINI_API_KEY", "") # comma separated keys for load balance. e.g. "key1,key2,key3"
GEMINI_MAX_DURATION = int(os.getenv("ASR_GEMINI_MAX_DURATION", "34200")) # 9.5 hour
@@ -260,7 +263,6 @@ class ASR:
GEMINI_THINKING_BUDGET = os.getenv("ASR_GEMINI_THINKING_BUDGET", None) # 0 to disable thinking. DO NOT set this if the model is not a thinking model
GEMINI_CONFIG = os.getenv("ASR_GEMINI_CONFIG", "{}") # default config passed to GenerateContentConfig. Should be a json string: '{"key": "value"}'
TENCENT_APPID = os.getenv("ASR_TENCENT_APPID", "")
- TENCENT_MAX_DURATION = int(os.getenv("ASR_TENCENT_MAX_DURATION", "3600")) # 1 hour
TENCENT_PROXY = os.getenv("ASR_TENCENT_PROXY", None) # Banned oversea IP, need a back to China proxy
TENCENT_SECRET_ID = os.getenv("ASR_TENCENT_SECRET_ID", "")
TENCENT_SECRET_KEY = os.getenv("ASR_TENCENT_SECRET_KEY", "")