Commit d3a1e01

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-07-11 09:01:56
fix(asr): more reliable audio duration calculation
1 parent ec0bdcd
Changed files (1)
src
src/asr/utils.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import contextlib
+import json
 import random
 import re
 from pathlib import Path
 
 import soundfile as sf
+from ffmpeg import FFmpeg
 from soundfile import LibsndfileError
 
 from config import ASR, GEMINI
@@ -107,7 +110,15 @@ async def convert_single_channel(path: str | Path) -> Path:
 
 
 def audio_duration(path: str | Path) -> float:
-    with sf.SoundFile(path, "r") as f:
-        sr = f.samplerate
-        audio = f.read(dtype="float32")
-        return len(audio) / sr
+    with contextlib.suppress(LibsndfileError), sf.SoundFile(path) as f:
+        samplerate = f.samplerate
+        frames = f.frames
+        return frames / samplerate
+    with contextlib.suppress(Exception):
+        ffprobe = FFmpeg(executable="ffprobe").input(Path(path).as_posix(), print_format="json", show_streams=None)
+        metadata = json.loads(ffprobe.execute())
+        streams = metadata.get("streams", [])
+        durations = [x.get("duration", 0) for x in streams]  # all channels duration (some file embed the duration in subtitle stream)
+        return max(map(float, durations))
+
+    return 0.0