Commit b66f56d
Changed files (1)
src
others
src/others/subtitle.py
@@ -124,11 +124,45 @@ async def fetch_subtitle(video_id: str, provider: str) -> dict:
except Exception as e:
logger.error(f"Failed to get subtitle: {e}")
return {"error": str(e)}
- return to_webvtt(subtitles)
+ return to_transcription(subtitles)
+
+
+def to_transcription(subtitles: list[dict]) -> dict:
+ """Converts subtitles to "[minute:second] transcription" format.
+
+ sample subtitles = [
+ {'text': 'hello', 'start': 0.056, 'duration': 2.88},
+ {'text': 'world!', 'start': 2.983, 'duration': 3.244},
+ ]
+
+ Returns:
+ dict: {
+ "subtitle": "[minute:second] transcription",
+ "num_chars": 11,
+ "num_tokens": 2,
+ }
+ """
+ if not subtitles:
+ return {}
+
+ res = []
+ num_chars = 0
+
+ for subtitle in subtitles:
+ minutes = int(float(subtitle["start"]) // 60)
+ seconds = int(float(subtitle["start"]) % 60)
+ res.append(f"[{minutes}:{seconds:02d}] {subtitle['text']}")
+ num_chars += len(subtitle["text"])
+
+ return {
+ "subtitle": "\n".join(res),
+ "num_chars": num_chars,
+ "reading_minutes": num_chars / READING_SPEED,
+ }
def to_webvtt(subtitles: list[dict]) -> dict:
- """Converts subtitles to WebVTT format.
+ """(Deprecated, use `to_transcription`) Converts subtitles to WebVTT format.
sample subtitles = [
{'text': 'hello', 'start': 0.056, 'duration': 2.88},