Commit f360e11
Changed files (1)
src
others
src/others/subtitle.py
@@ -2,18 +2,17 @@
# -*- coding: utf-8 -*-
-import asyncio
import io
from datetime import timedelta
+from glom import glom
from loguru import logger
from pyrogram.client import Client
from pyrogram.types import Message
-from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api import YouTubeTranscriptApi # type: ignore
-from config import API, PREFIX, PROXY, TOKEN
+from config import API, PREFIX, PROXY, READING_SPEED, TOKEN
from database import cache
-from llm.utils import count_tokens
from messages.parser import parse_msg
from messages.progress import modify_progress
from messages.sender import send2tg
@@ -51,22 +50,26 @@ async def get_subtitle(client: Client, message: Message, **kwargs):
return
cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
- if res := await fetch_subtitle(vid):
- logger.success(res)
- if subtitles := res.get("subtitle", ""):
- with io.BytesIO(subtitles.encode("utf-8")) as f:
- await client.send_document(
- to_int(target_chat),
- f,
- file_name=f"{vid}字幕.txt",
- caption=f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\nToken: {res['num_tokens']}",
- )
- elif error := res.get("error", ""):
- await modify_progress(text=error, force_update=True, **kwargs)
- await asyncio.sleep(3)
- else:
+ res = await fetch_subtitle(vid)
+ if not res:
await modify_progress(text="❌获取字幕失败", force_update=True, **kwargs)
- await asyncio.sleep(3)
+ return
+ if error := res.get("error", ""):
+ await modify_progress(text=error, force_update=True, **kwargs)
+ return
+ if not res.get("subtitle", ""):
+ return
+ subtitles = res.get("subtitle", "")
+ logger.success(subtitles)
+ if vinfo := await fetch_youtube_video_info(vid):
+ caption = f"[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+ with io.BytesIO(subtitles.encode("utf-8")) as f:
+ await client.send_document(to_int(target_chat), f, file_name="字幕文件.txt", caption=caption)
+ else:
+ caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+ with io.BytesIO(subtitles.encode("utf-8")) as f:
+ await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)
+
await modify_progress(del_status=True, **kwargs)
@@ -163,8 +166,26 @@ def to_webvtt(subtitles: list[dict]) -> dict:
vtt_output.append(f"{start} --> {end}")
vtt_output.append(text)
vtt_output.append("") # Add blank line between subtitles
- num_tokens = count_tokens("\n".join(vtt_output))
- return {"subtitle": "\n".join(vtt_output), "num_chars": num_chars, "num_tokens": num_tokens}
+ # num_tokens = count_tokens("\n".join(vtt_output))
+ reading_minutes = num_chars / READING_SPEED # minutes
+ return {"subtitle": "\n".join(vtt_output), "num_chars": num_chars, "reading_minutes": reading_minutes}
except Exception as e:
logger.error(f"Failed to convert subtitles to WebVTT: {e}")
return {"error": str(e)}
+
+
+async def fetch_youtube_video_info(video_id: str) -> dict:
+ try:
+ logger.info(f"Fetch Video info for {video_id=}, proxy={PROXY.SUBTITLE}")
+ api = "https://www.googleapis.com/youtube/v3/videos"
+ params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet", "id": video_id, "hl": "zh-CN"}
+ resp = await hx_req(api, proxy=PROXY.SUBTITLE, params=params, check_keys=["items"], max_retry=0)
+ if resp.get("hx_error"):
+ logger.warning(f"YouTube Videos API failed: {resp['hx_error']}")
+ return {}
+ title = glom(resp, "items.0.snippet.title")
+ desc = glom(resp, "items.0.snippet.description")
+ except Exception as e:
+ logger.error(f"Failed to get video info: {e}")
+ return {}
+ return {"title": title, "description": desc}