Commit b821511

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-04 09:53:25
feat(subtitle): use ytdlp to download audio and ASR to get subtitles
1 parent 6b8dc05
Changed files (4)
src/preview/ytdlp.py
@@ -15,24 +15,39 @@ from glom import glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
-from pyrogram.types import Message
+from pyrogram.types import Message, ReplyParameters
 from yt_dlp import YoutubeDL
 from yt_dlp.utils import DownloadError, ExtractorError, YoutubeDLError
 
 from asr.voice_recognition import asr_file
-from config import API, CAPTION_LENGTH, DB, DOWNLOAD_DIR, MAX_FILE_BYTES, PROVIDER, PROXY, READING_SPEED, TID, TOKEN, YTDLP_DOWNLOAD_MAX_FILE_BYTES, YTDLP_RE_ENCODING_MAX_FILE_BYTES, cache
+from config import (
+    API,
+    CAPTION_LENGTH,
+    DB,
+    DOWNLOAD_DIR,
+    MAX_FILE_BYTES,
+    PROVIDER,
+    PROXY,
+    READING_SPEED,
+    TEXT_LENGTH,
+    TID,
+    TOKEN,
+    YTDLP_DOWNLOAD_MAX_FILE_BYTES,
+    YTDLP_RE_ENCODING_MAX_FILE_BYTES,
+    cache,
+)
 from cookies import cookie_cloud_bilibili
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.preprocess import preprocess_media
 from messages.progress import modify_progress, telegram_uploading
 from messages.sender import send2tg
-from messages.utils import count_without_entities, get_reply_to, smart_split, warp_comments
+from messages.utils import blockquote, count_without_entities, get_reply_to, smart_split, warp_comments
 from multimedia import convert_to_h264, generate_cover
 from networking import hx_req
 from others.emoji import emojify
-from others.subtitle import fetch_subtitle
 from preview.utils import bv2av, make_bvid_clickable
+from subtitles.base import fetch_subtitle
 from utils import publish_telegraph, readable_size, readable_time, remove_none_values, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
 
 
@@ -45,13 +60,13 @@ async def preview_ytdlp(
     message: Message,
     url: str = "",
     *,
+    use_db: bool = True,
     ytdlp_audio_only: bool = False,
     ytdlp_send_video: bool = True,
     ytdlp_send_audio: bool = False,
     bilibili_comments_provider: str = PROVIDER.BILIBILI_COMMENTS,
     youtube_comments_provider: str = PROVIDER.YOUTUBE_COMMENTS,
     proxy: str | None = None,
-    append_youtube_subtitle: bool = True,
     append_transcription: bool = True,
     ytdlp_transcription_engine: str = "gemini",
     to_telegraph: bool = True,
@@ -63,23 +78,24 @@ async def preview_ytdlp(
         client (Client): The Pyrogram client.
         message (Message): The trigger message object.
         url (str, optional): ytdlp link.
+        use_db (bool, optional): Whether to use database to cache the result. Defaults to True.
         ytdlp_audio_only (bool, optional): Download audio only. Defaults to True.
         ytdlp_send_video (bool, optional): Send video. Defaults to True.
         ytdlp_send_audio (bool, optional): Send audio. Defaults to False.
         bilibili_comments_provider (str, optional): The bilibili comments extractor: "free", "tikhub" or "false"
         youtube_comments_provider (str, optional): The youtube comments extractor: "free" or "false".
         proxy (str, optional): Proxy to use. Defaults to None.
-        append_youtube_subtitle (bool, optional): Also send youtube subtitle.
         append_transcription (bool, optional): Also append transcription.
-        ytdlp_transcription_method (str, optional): Method to get transcription.
+        ytdlp_transcription_engine (str, optional): Method to get transcription.
         to_telegraph (bool, optional): Whether to publish the subtitle or transcription to telegraph.
+        delete_files (bool, optional): Whether to delete video & audio after uploading.
     """
     logger.trace(f"{url=} {kwargs=}")
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析链接\n{url}", **kwargs)
         kwargs["progress"] = res[0]
     db_key = url
-    if kv := await get_db(db_key):
+    if use_db and (kv := await get_db(db_key)):
         logger.debug(f"YT-DLP preview {DB.ENGINE} cache hit for key={db_key}")
         if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
             return
@@ -134,7 +150,7 @@ async def preview_ytdlp(
     video_path = info.get("video_path", Path(""))
     audio_path = info.get("audio_path", Path(""))
     # only save messages when both video and audio are uploaded
-    save_to_db = bool(video_path.is_file() and audio_path.is_file())
+    save_to_db = bool(use_db and video_path.is_file() and audio_path.is_file())
     msg = f"✅下载成功:\n{info['summary']}"
     logger.success(f"{msg!r}")
     await modify_progress(text=msg.strip(), **kwargs)
@@ -182,6 +198,7 @@ async def preview_ytdlp(
     texts = texts.strip()
     sent_messages: list[Message | None] = []  # 把发送的消息都记录下来
     target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
+    target_chat = to_int(target_chat)
     reply_msg_id = kwargs.get("reply_msg_id", 0)
     reply_parameters = get_reply_to(message.id, reply_msg_id)
     thumb = generate_cover(video_path) if video_path.is_file() else generate_cover(audio_path)
@@ -200,7 +217,7 @@ async def preview_ytdlp(
             await modify_progress(text=f"🎬视频上传中-P{idx + 1}: {readable_size(path=video['video'])}", force_update=True, **kwargs)
             sent_messages.append(
                 await client.send_video(
-                    chat_id=to_int(target_chat),
+                    chat_id=target_chat,
                     caption=warp_comments(caption),
                     reply_parameters=reply_parameters,
                     progress=telegram_uploading,
@@ -233,29 +250,25 @@ async def preview_ytdlp(
             if v := locals().get(k):
                 metadata[k] = unicode_to_ascii(v)
         await save_messages(messages=sent_messages, key=url, metadata=metadata)
-    if "youtube" in info["extractor"] and append_youtube_subtitle and (video_path.is_file() or audio_path.is_file()):
-        res = await fetch_subtitle(video_id=info["id"], provider="free")
-        if subtitles := res.get("subtitle"):
-            caption = f"{emoji}[{info['author']}]({info['author_url']})\n🕒{create_time}\n📝[{info['title']}]({url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
-            if to_telegraph:
-                html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-                if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
-                    caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
-            with io.BytesIO(subtitles.encode("utf-8")) as f:
-                await client.send_document(to_int(target_chat), f, file_name=f"{info['title']}.txt", caption=caption)
-                append_transcription = False  # disable asr transcription
-
-    if any(x in info["extractor"] for x in ["youtube", "bilibili"]) and append_transcription and audio_path.is_file():
-        asr_res = await asr_file(audio_path, ytdlp_transcription_engine, duration, client=client, message=message, slient=True)
-        if texts := asr_res.get("texts"):
-            caption = f"{emoji}[{info['author']}]({info['author_url']})\n🕒{create_time}\n📝[{info['title']}]({url})\n字符数: {len(texts)}\n阅读时长: {len(texts) / READING_SPEED:.1f}分钟"
-            if to_telegraph:
-                html = "\n".join([f"<p>{s}</p>" for s in texts.split("\n")])
-                if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
-                    caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
-            with io.BytesIO(texts.encode("utf-8")) as f:
-                await client.send_document(to_int(target_chat), f, file_name=f"{info['title']}.txt", caption=caption)
-        [await modify_progress(msg, del_status=True) for msg in asr_res.get("sent_messages", [])]
+    if any(x in info["extractor"] for x in ["youtube", "bilibili"]) and append_transcription and (video_path.is_file() or audio_path.is_file()):
+        res = await fetch_subtitle(video_id=info["id"], provider="free") if info["extractor"] == "youtube" else {}
+        subtitles = res.get("subtitle", "")
+        if not subtitles:
+            res = await asr_file(audio_path, ytdlp_transcription_engine, duration, client=client, message=message, slient=True)
+            subtitles = res.get("texts", "")
+        if subtitles:
+            if len(subtitles) > TEXT_LENGTH:
+                caption = f"{emoji}[{info['author']}]({info['author_url']})\n🕒{create_time}\n📝[{info['title']}]({url})\n字符数: {len(subtitles)}\n阅读时长: {len(subtitles) / READING_SPEED:.1f}分钟"
+                if to_telegraph:
+                    html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
+                    if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
+                        caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
+                with io.BytesIO(subtitles.encode("utf-8")) as f:
+                    await client.send_document(to_int(target_chat), f, file_name=f"{info['title']}.txt", caption=caption)
+            else:
+                first_msg: Message = sent_messages[0] if sent_messages else message  # type: ignore
+                await client.send_message(first_msg.chat.id, blockquote(subtitles), reply_parameters=ReplyParameters(message_id=first_msg.id))
+        [await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]
 
     Path(json_file).unlink(missing_ok=True)
     cleanup_ytdlp(info["id"])
src/others/subtitle.py → src/subtitles/base.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import io
 from datetime import UTC, datetime, timedelta
 from zoneinfo import ZoneInfo
 
@@ -10,96 +9,14 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 from youtube_transcript_api import YouTubeTranscriptApi  # type: ignore
 
-from asr.voice_recognition import asr_file
-from config import API, PREFIX, PROVIDER, PROXY, READING_SPEED, TOKEN, TZ
-from database import cache
+from config import API, PREFIX, PROXY, READING_SPEED, TOKEN, TZ
 from messages.parser import parse_msg
-from messages.progress import modify_progress
-from messages.sender import send2tg
-from messages.utils import equal_prefix, startswith_prefix
+from messages.utils import startswith_prefix
 from networking import hx_req, match_social_media_link
-from utils import publish_telegraph, to_int
-
-HELP = f"""📃**提取字幕**
-使用说明:
-1. `{PREFIX.SUBTITLE} URL` 下载该链接的字幕
-2. 以 `{PREFIX.SUBTITLE}` 回复消息可下载消息中链接的字幕
-
-当前只支持YouTube
-"""
-
-
-async def get_subtitle(client: Client, message: Message, youtube_subtitle_provider: str = PROVIDER.YOUTUBE_SUBTITLE, *, to_telegraph: bool = True, **kwargs):
-    """Get YouTube Subtitle."""
-    target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
-    # send docs if message == "/subtitle", without reply
-    if equal_prefix(message.text, prefix=[PREFIX.SUBTITLE]) and not message.reply_to_message:
-        await send2tg(client, message, texts=HELP, **kwargs)
-        return
-    if not (vid := await find_yt_vid(client, message)):
-        return
-
-    yt_url = f"https://www.youtube.com/watch?v={vid}"
-    msg = f"🔍**正在获取字幕**\n{yt_url}"
-    if kwargs.get("show_progress"):
-        res = await send2tg(client, message, texts=msg, **kwargs)
-        kwargs["progress"] = res[0]
-
-    # cache media_group message
-    if media_group_id := message.media_group_id:
-        if cache.get(f"subtitle-{message.chat.id}-{media_group_id}"):
-            return
-        cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
-
-    this_info = parse_msg(message, silent=True)
-    reply_info = parse_msg(message.reply_to_message, silent=True) if message.reply_to_message else {}
-
-    res = await fetch_subtitle(vid, youtube_subtitle_provider)
-    if error := res.get("error", ""):
-        if "Subtitles are disabled for this video" in error:
-            error = "❌该视频没有提供字幕选项"
-        if this_info["mtype"] in ["audio", "video"] or reply_info.get("mtype", "") in ["audio", "video"]:
-            error += "\n🔄尝试使用语音转文字获取字幕"
-            await modify_progress(text=error, force_update=True, **kwargs)
-            msg = message if this_info["mtype"] in ["audio", "video"] else message.reply_to_message
-            fpath: str = await msg.download()  # type: ignore
-            asr_res = await asr_file(fpath, engine="gemini", client=client, message=message, **kwargs)
-            if asr_res.get("error"):
-                await modify_progress(text=asr_res["error"], force_update=True, **kwargs)
-                return
-            res = {"subtitles": asr_res["texts"], "num_chars": len(asr_res["texts"]), "reading_minutes": len(asr_res["texts"]) / READING_SPEED}
-            if asr_res.get("telegraph"):
-                res["telegraph"] = asr_res["telegraph"]
-        else:
-            await modify_progress(text=error, force_update=True, **kwargs)
-            return
-    subtitles = res.get("subtitles", "")
-    if not subtitles:
-        return
-    logger.success(subtitles)
-    if vinfo := await fetch_youtube_video_info(vid):
-        caption = f"🔴[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
-        caption += f"📝[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
-        if to_telegraph:
-            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-            if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=yt_url):
-                caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
-        with io.BytesIO(subtitles.encode("utf-8")) as f:
-            await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
-    else:
-        caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
-        if to_telegraph:
-            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-            if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=yt_url):
-                caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
-        with io.BytesIO(subtitles.encode("utf-8")) as f:
-            await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)
-
-    [await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]
-    await modify_progress(del_status=True, **kwargs)
 
 
 async def find_yt_vid(client: Client, message: Message) -> str:
+    """Find YouTube video ID from message."""
     info = parse_msg(message)
     if not startswith_prefix(info["text"], prefix=[PREFIX.SUBTITLE]):
         return ""
@@ -131,6 +48,7 @@ async def find_yt_vid(client: Client, message: Message) -> str:
 
 
 async def fetch_subtitle(video_id: str, provider: str) -> dict:
+    """Fetch subtitles from YouTube."""
     succ = False
     subtitles = []
     try:
@@ -236,6 +154,7 @@ def to_webvtt(subtitles: list[dict]) -> dict:
 
 
 async def fetch_youtube_video_info(video_id: str) -> dict:
+    """Fetch YouTube video info."""
     try:
         logger.info(f"Fetch Video info for {video_id=}, proxy={PROXY.SUBTITLE}")
         api = "https://www.googleapis.com/youtube/v3/videos"
src/subtitles/subtitle.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import io
+
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from asr.voice_recognition import asr_file
+from config import PREFIX, PROVIDER, READING_SPEED
+from database import cache
+from messages.parser import parse_msg
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import equal_prefix
+from preview.ytdlp import preview_ytdlp
+from subtitles.base import fetch_subtitle, fetch_youtube_video_info, find_yt_vid
+from utils import publish_telegraph, to_int
+
+HELP = f"""📃**提取字幕**
+使用说明:
+1. `{PREFIX.SUBTITLE} URL` 下载该链接的字幕
+2. 以 `{PREFIX.SUBTITLE}` 回复消息可下载消息中链接的字幕
+
+当前只支持YouTube
+"""
+
+
+async def get_subtitle(client: Client, message: Message, youtube_subtitle_provider: str = PROVIDER.YOUTUBE_SUBTITLE, *, to_telegraph: bool = True, **kwargs):
+    """Get YouTube Subtitle."""
+    target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
+    # send docs if message == "/subtitle", without reply
+    if equal_prefix(message.text, prefix=[PREFIX.SUBTITLE]) and not message.reply_to_message:
+        await send2tg(client, message, texts=HELP, **kwargs)
+        return
+    if not (vid := await find_yt_vid(client, message)):
+        return
+
+    yt_url = f"https://www.youtube.com/watch?v={vid}"
+    msg = f"🔍**正在获取字幕**\n{yt_url}"
+    if kwargs.get("show_progress"):
+        res = await send2tg(client, message, texts=msg, **kwargs)
+        kwargs["progress"] = res[0]
+
+    # cache media_group message
+    if media_group_id := message.media_group_id:
+        if cache.get(f"subtitle-{message.chat.id}-{media_group_id}"):
+            return
+        cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
+
+    this_info = parse_msg(message, silent=True)
+    reply_info = parse_msg(message.reply_to_message, silent=True) if message.reply_to_message else {}
+
+    res = await fetch_subtitle(vid, youtube_subtitle_provider)
+    if error := res.get("error", ""):
+        if "Subtitles are disabled for this video" in error:
+            error = "❌该视频没有提供字幕选项\n🔄尝试使用语音转文字获取字幕"
+            await modify_progress(text=error, force_update=True, **kwargs)
+        if this_info["mtype"] in ["audio", "video"] or reply_info.get("mtype", "") in ["audio", "video"]:
+            msg = message if this_info["mtype"] in ["audio", "video"] else message.reply_to_message
+            fpath: str = await msg.download()  # type: ignore
+            asr_res = await asr_file(fpath, engine="gemini", client=client, message=message, **kwargs)
+            if asr_res.get("error"):
+                await modify_progress(text=asr_res["error"], force_update=True, **kwargs)
+                return
+            res = {"subtitles": asr_res["texts"], "num_chars": len(asr_res["texts"]), "reading_minutes": len(asr_res["texts"]) / READING_SPEED}
+            if asr_res.get("telegraph"):
+                res["telegraph"] = asr_res["telegraph"]
+        else:
+            kwargs |= {
+                "show_progress": False,
+                "url": yt_url,
+                "append_transcription": True,
+                "ytdlp_audio_only": True,
+                "youtube_comments_provider": False,
+                "proxy": None,
+                "use_db": False,
+            }
+            await preview_ytdlp(client=client, message=message, **kwargs)
+            await modify_progress(del_status=True, **kwargs)
+            return
+    subtitles = res.get("subtitles", "")
+    if not subtitles:
+        return
+    logger.success(subtitles)
+    if vinfo := await fetch_youtube_video_info(vid):
+        caption = f"🔴[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
+        caption += f"📝[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        if to_telegraph:
+            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
+            if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=yt_url):
+                caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
+        with io.BytesIO(subtitles.encode("utf-8")) as f:
+            await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+    else:
+        caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        if to_telegraph:
+            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
+            if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=yt_url):
+                caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
+        with io.BytesIO(subtitles.encode("utf-8")) as f:
+            await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)
+
+    [await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]
+    await modify_progress(del_status=True, **kwargs)
src/handler.py
@@ -21,7 +21,6 @@ from others.extract_audio import extract_audio_file
 from others.raw_img_file import convert_raw_img_file
 from others.search_google import search_google
 from others.search_ytb import search_youtube
-from others.subtitle import get_subtitle
 from permission import check_service
 from preview.bilibili import preview_bilibili
 from preview.douyin import preview_douyin
@@ -33,6 +32,7 @@ from preview.weibo import preview_weibo
 from preview.xiaohongshu import preview_xhs
 from preview.ytdlp import ProxyError, preview_ytdlp
 from price.entrypoint import get_asset_price
+from subtitles.subtitle import get_subtitle
 from utils import to_int, true