Commit f360e11

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-03-24 02:44:24
feat(subtitle): add video title
1 parent 03788bb
Changed files (1)
src
src/others/subtitle.py
@@ -2,18 +2,17 @@
 # -*- coding: utf-8 -*-
 
 
-import asyncio
 import io
 from datetime import timedelta
 
+from glom import glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
-from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api import YouTubeTranscriptApi  # type: ignore
 
-from config import API, PREFIX, PROXY, TOKEN
+from config import API, PREFIX, PROXY, READING_SPEED, TOKEN
 from database import cache
-from llm.utils import count_tokens
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -51,22 +50,26 @@ async def get_subtitle(client: Client, message: Message, **kwargs):
             return
         cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
 
-    if res := await fetch_subtitle(vid):
-        logger.success(res)
-        if subtitles := res.get("subtitle", ""):
-            with io.BytesIO(subtitles.encode("utf-8")) as f:
-                await client.send_document(
-                    to_int(target_chat),
-                    f,
-                    file_name=f"{vid}字幕.txt",
-                    caption=f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\nToken: {res['num_tokens']}",
-                )
-        elif error := res.get("error", ""):
-            await modify_progress(text=error, force_update=True, **kwargs)
-            await asyncio.sleep(3)
-    else:
+    res = await fetch_subtitle(vid)
+    if not res:
         await modify_progress(text="❌获取字幕失败", force_update=True, **kwargs)
-        await asyncio.sleep(3)
+        return
+    if error := res.get("error", ""):
+        await modify_progress(text=error, force_update=True, **kwargs)
+        return
+    if not res.get("subtitle", ""):
+        return
+    subtitles = res.get("subtitle", "")
+    logger.success(subtitles)
+    if vinfo := await fetch_youtube_video_info(vid):
+        caption = f"[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        with io.BytesIO(subtitles.encode("utf-8")) as f:
+            await client.send_document(to_int(target_chat), f, file_name="字幕文件.txt", caption=caption)
+    else:
+        caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        with io.BytesIO(subtitles.encode("utf-8")) as f:
+            await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)
+
     await modify_progress(del_status=True, **kwargs)
 
 
@@ -163,8 +166,26 @@ def to_webvtt(subtitles: list[dict]) -> dict:
             vtt_output.append(f"{start} --> {end}")
             vtt_output.append(text)
             vtt_output.append("")  # Add blank line between subtitles
-        num_tokens = count_tokens("\n".join(vtt_output))
-        return {"subtitle": "\n".join(vtt_output), "num_chars": num_chars, "num_tokens": num_tokens}
+        # num_tokens = count_tokens("\n".join(vtt_output))
+        reading_minutes = num_chars / READING_SPEED  # minutes
+        return {"subtitle": "\n".join(vtt_output), "num_chars": num_chars, "reading_minutes": reading_minutes}
     except Exception as e:
         logger.error(f"Failed to convert subtitles to WebVTT: {e}")
         return {"error": str(e)}
+
+
+async def fetch_youtube_video_info(video_id: str) -> dict:
+    try:
+        logger.info(f"Fetch Video info for {video_id=}, proxy={PROXY.SUBTITLE}")
+        api = "https://www.googleapis.com/youtube/v3/videos"
+        params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet", "id": video_id, "hl": "zh-CN"}
+        resp = await hx_req(api, proxy=PROXY.SUBTITLE, params=params, check_keys=["items"], max_retry=0)
+        if resp.get("hx_error"):
+            logger.warning(f"YouTube Videos API failed: {resp['hx_error']}")
+            return {}
+        title = glom(resp, "items.0.snippet.title")
+        desc = glom(resp, "items.0.snippet.description")
+    except Exception as e:
+        logger.error(f"Failed to get video info: {e}")
+        return {}
+    return {"title": title, "description": desc}