Commit b73d77a

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-05 07:35:40
feat(subtitle): support bilibili
1 parent 8b29251
Changed files (3)
src
preview
subtitles
src/preview/ytdlp.py
@@ -251,7 +251,7 @@ async def preview_ytdlp(
                 metadata[k] = unicode_to_ascii(v)
         await save_messages(messages=sent_messages, key=url, metadata=metadata)
     if any(x in info["extractor"] for x in ["youtube", "bilibili"]) and append_transcription and (video_path.is_file() or audio_path.is_file()):
-        res = await fetch_subtitle(video_id=info["id"], provider="free") if info["extractor"] == "youtube" else {}
+        res = await fetch_subtitle(url=url, provider="free")
         subtitles = res.get("subtitle", "")
         if not subtitles:
             res = await asr_file(audio_path, ytdlp_transcription_engine, duration, client=client, message=message, slient=True)
src/subtitles/base.py
@@ -15,8 +15,8 @@ from messages.utils import startswith_prefix
 from networking import hx_req, match_social_media_link
 
 
-async def find_yt_vid(client: Client, message: Message) -> str:
-    """Find YouTube video ID from message."""
+async def match_url(client: Client, message: Message) -> str:
+    """Find valid url from message."""
     info = parse_msg(message)
     if not startswith_prefix(info["text"], prefix=[PREFIX.SUBTITLE]):
         return ""
@@ -24,10 +24,10 @@ async def find_yt_vid(client: Client, message: Message) -> str:
     matched = await match_social_media_link(info["text"], flatten_first=True)
     for entity_url in info["entity_urls"]:
         matched = await match_social_media_link(entity_url, flatten_first=True)
-        if matched["platform"] == "youtube":
-            return matched["vid"]
-    if matched["platform"] == "youtube":
-        return matched["vid"]
+        if matched["platform"] in ["youtube", "bilibili"]:
+            return matched["url"]
+    if matched["platform"] in ["youtube", "bilibili"]:
+        return matched["url"]
 
     # is replying to message?
     if not message.reply_to_message:
@@ -40,17 +40,22 @@ async def find_yt_vid(client: Client, message: Message) -> str:
         matched = await match_social_media_link(info["text"], flatten_first=True)
         for entity_url in info["entity_urls"]:
             matched = await match_social_media_link(entity_url, flatten_first=True)
-            if matched["platform"] == "youtube":
-                return matched["vid"]
-        if matched["platform"] == "youtube":
-            return matched["vid"]
+            if matched["platform"] in ["youtube", "bilibili"]:
+                return matched["url"]
+        if matched["platform"] in ["youtube", "bilibili"]:
+            return matched["url"]
     return ""
 
 
-async def fetch_subtitle(video_id: str, provider: str) -> dict:
+async def fetch_subtitle(url: str, provider: str) -> dict:
     """Fetch subtitles from YouTube."""
     succ = False
+    error = "❌下载内嵌字幕失败\n🔄尝试使用语音转文字获取字幕"
     subtitles = []
+    matched = await match_social_media_link(url, flatten_first=True)
+    if matched["platform"] != "youtube":
+        return {"error": error}
+    video_id = matched["vid"]
     try:
         if "free" in provider:
             proxy = {"http": PROXY.SUBTITLE, "https": PROXY.SUBTITLE} if PROXY.SUBTITLE else None
@@ -70,7 +75,7 @@ async def fetch_subtitle(video_id: str, provider: str) -> dict:
             subtitles = resp["data"].get("subtitles", [])
     except Exception as e:
         logger.error(f"Failed to get subtitle: {e}")
-        return {"error": str(e)}
+        return {"error": error}
     return await to_transcription(subtitles)
 
 
src/subtitles/subtitle.py
@@ -13,8 +13,9 @@ from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
 from messages.utils import equal_prefix
+from networking import match_social_media_link
 from preview.ytdlp import preview_ytdlp
-from subtitles.base import fetch_subtitle, fetch_youtube_video_info, find_yt_vid
+from subtitles.base import fetch_subtitle, fetch_youtube_video_info, match_url
 from utils import publish_telegraph, to_int
 
 HELP = f"""📃**提取字幕**
@@ -22,7 +23,9 @@ HELP = f"""📃**提取字幕**
 1. `{PREFIX.SUBTITLE} URL` 下载该链接的字幕
 2. 以 `{PREFIX.SUBTITLE}` 回复消息可下载消息中链接的字幕
 
-当前只支持YouTube
+⚙️站点支持
+1. Bilibili: 下载音频后通过语音转文字获取字幕
+2. YouTube: 首先尝试下载内嵌字幕, 失败后使用语音转文字获取字幕
 """
 
 
@@ -33,11 +36,10 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
     if equal_prefix(message.text, prefix=[PREFIX.SUBTITLE]) and not message.reply_to_message:
         await send2tg(client, message, texts=HELP, **kwargs)
         return
-    if not (vid := await find_yt_vid(client, message)):
+    if not (url := await match_url(client, message)):
         return
 
-    yt_url = f"https://www.youtube.com/watch?v={vid}"
-    msg = f"🔍**正在获取字幕**\n{yt_url}"
+    msg = f"🔍**正在获取字幕**\n{url}"
     if kwargs.get("show_progress"):
         res = await send2tg(client, message, texts=msg, **kwargs)
         kwargs["progress"] = res[0]
@@ -51,11 +53,9 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
     this_info = parse_msg(message, silent=True)
     reply_info = parse_msg(message.reply_to_message, silent=True) if message.reply_to_message else {}
 
-    res = await fetch_subtitle(vid, youtube_subtitle_provider)
+    res = await fetch_subtitle(url, youtube_subtitle_provider)  # will raise error if not youtube url
     if error := res.get("error", ""):
-        if "Subtitles are disabled for this video" in error:
-            error = "❌该视频没有提供字幕选项\n🔄尝试使用语音转文字获取字幕"
-            await modify_progress(text=error, force_update=True, **kwargs)
+        await modify_progress(text=error, force_update=True, **kwargs)
         if this_info["mtype"] in ["audio", "video"] or reply_info.get("mtype", "") in ["audio", "video"]:
             msg = message if this_info["mtype"] in ["audio", "video"] else message.reply_to_message
             fpath: str = await msg.download()  # type: ignore
@@ -69,10 +69,11 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
         else:
             kwargs |= {
                 "show_progress": False,
-                "url": yt_url,
+                "url": url,
                 "append_transcription": True,
                 "ytdlp_audio_only": True,
                 "youtube_comments_provider": False,
+                "bilibili_comments_provider": False,
                 "proxy": None,
                 "use_db": False,
             }
@@ -83,20 +84,22 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
     if not subtitles:
         return
     logger.success(subtitles)
+    matched = await match_social_media_link(url)
+    vid = matched["vid"]
     if vinfo := await fetch_youtube_video_info(vid):
         caption = f"🔴[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
-        caption += f"📝[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        caption += f"📝[{vinfo['title']}]({url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
         if to_telegraph:
             html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-            if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=yt_url):
+            if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
                 caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
         with io.BytesIO(subtitles.encode("utf-8")) as f:
             await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
     else:
-        caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+        caption = f"原视频: [YouTube链接]({url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
         if to_telegraph:
             html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-            if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=yt_url):
+            if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=url):
                 caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
         with io.BytesIO(subtitles.encode("utf-8")) as f:
             await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)