Commit b66f56d

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-04-27 18:06:24
style(subtitle): use transcription format instead of WebVTT
1 parent a46a7bc
Changed files (1)
src
src/others/subtitle.py
@@ -124,11 +124,45 @@ async def fetch_subtitle(video_id: str, provider: str) -> dict:
     except Exception as e:
         logger.error(f"Failed to get subtitle: {e}")
         return {"error": str(e)}
-    return to_webvtt(subtitles)
+    return to_transcription(subtitles)
+
+
+def to_transcription(subtitles: list[dict]) -> dict:
+    """Converts subtitles to "[minute:second] transcription" format.
+
+    sample subtitles = [
+        {'text': 'hello', 'start': 0.056, 'duration': 2.88},
+        {'text': 'world!', 'start': 2.983, 'duration': 3.244},
+    ]
+
+    Returns:
+        dict: {
+            "subtitle": "[minute:second] transcription",
+            "num_chars": 11,
+            "num_tokens": 2,
+            }
+    """
+    if not subtitles:
+        return {}
+
+    res = []
+    num_chars = 0
+
+    for subtitle in subtitles:
+        minutes = int(float(subtitle["start"]) // 60)
+        seconds = int(float(subtitle["start"]) % 60)
+        res.append(f"[{minutes}:{seconds:02d}] {subtitle['text']}")
+        num_chars += len(subtitle["text"])
+
+    return {
+        "subtitle": "\n".join(res),
+        "num_chars": num_chars,
+        "reading_minutes": num_chars / READING_SPEED,
+    }
 
 
 def to_webvtt(subtitles: list[dict]) -> dict:
-    """Converts subtitles to WebVTT format.
+    """(Deprecated, use `to_transcription`) Converts subtitles to WebVTT format.
 
     sample subtitles = [
         {'text': 'hello', 'start': 0.056, 'duration': 2.88},