Commit b73d77a
Changed files (3)
src
preview
subtitles
src/preview/ytdlp.py
@@ -251,7 +251,7 @@ async def preview_ytdlp(
metadata[k] = unicode_to_ascii(v)
await save_messages(messages=sent_messages, key=url, metadata=metadata)
if any(x in info["extractor"] for x in ["youtube", "bilibili"]) and append_transcription and (video_path.is_file() or audio_path.is_file()):
- res = await fetch_subtitle(video_id=info["id"], provider="free") if info["extractor"] == "youtube" else {}
+ res = await fetch_subtitle(url=url, provider="free")
subtitles = res.get("subtitle", "")
if not subtitles:
res = await asr_file(audio_path, ytdlp_transcription_engine, duration, client=client, message=message, slient=True)
src/subtitles/base.py
@@ -15,8 +15,8 @@ from messages.utils import startswith_prefix
from networking import hx_req, match_social_media_link
-async def find_yt_vid(client: Client, message: Message) -> str:
- """Find YouTube video ID from message."""
+async def match_url(client: Client, message: Message) -> str:
+ """Find valid url from message."""
info = parse_msg(message)
if not startswith_prefix(info["text"], prefix=[PREFIX.SUBTITLE]):
return ""
@@ -24,10 +24,10 @@ async def find_yt_vid(client: Client, message: Message) -> str:
matched = await match_social_media_link(info["text"], flatten_first=True)
for entity_url in info["entity_urls"]:
matched = await match_social_media_link(entity_url, flatten_first=True)
- if matched["platform"] == "youtube":
- return matched["vid"]
- if matched["platform"] == "youtube":
- return matched["vid"]
+ if matched["platform"] in ["youtube", "bilibili"]:
+ return matched["url"]
+ if matched["platform"] in ["youtube", "bilibili"]:
+ return matched["url"]
# is replying to message?
if not message.reply_to_message:
@@ -40,17 +40,22 @@ async def find_yt_vid(client: Client, message: Message) -> str:
matched = await match_social_media_link(info["text"], flatten_first=True)
for entity_url in info["entity_urls"]:
matched = await match_social_media_link(entity_url, flatten_first=True)
- if matched["platform"] == "youtube":
- return matched["vid"]
- if matched["platform"] == "youtube":
- return matched["vid"]
+ if matched["platform"] in ["youtube", "bilibili"]:
+ return matched["url"]
+ if matched["platform"] in ["youtube", "bilibili"]:
+ return matched["url"]
return ""
-async def fetch_subtitle(video_id: str, provider: str) -> dict:
+async def fetch_subtitle(url: str, provider: str) -> dict:
"""Fetch subtitles from YouTube."""
succ = False
+ error = "❌下载内嵌字幕失败\n🔄尝试使用语音转文字获取字幕"
subtitles = []
+ matched = await match_social_media_link(url, flatten_first=True)
+ if matched["platform"] != "youtube":
+ return {"error": error}
+ video_id = matched["vid"]
try:
if "free" in provider:
proxy = {"http": PROXY.SUBTITLE, "https": PROXY.SUBTITLE} if PROXY.SUBTITLE else None
@@ -70,7 +75,7 @@ async def fetch_subtitle(video_id: str, provider: str) -> dict:
subtitles = resp["data"].get("subtitles", [])
except Exception as e:
logger.error(f"Failed to get subtitle: {e}")
- return {"error": str(e)}
+ return {"error": error}
return await to_transcription(subtitles)
src/subtitles/subtitle.py
@@ -13,8 +13,9 @@ from messages.parser import parse_msg
from messages.progress import modify_progress
from messages.sender import send2tg
from messages.utils import equal_prefix
+from networking import match_social_media_link
from preview.ytdlp import preview_ytdlp
-from subtitles.base import fetch_subtitle, fetch_youtube_video_info, find_yt_vid
+from subtitles.base import fetch_subtitle, fetch_youtube_video_info, match_url
from utils import publish_telegraph, to_int
HELP = f"""📃**提取字幕**
@@ -22,7 +23,9 @@ HELP = f"""📃**提取字幕**
1. `{PREFIX.SUBTITLE} URL` 下载该链接的字幕
2. 以 `{PREFIX.SUBTITLE}` 回复消息可下载消息中链接的字幕
-当前只支持YouTube
+⚙️站点支持
+1. Bilibili: 下载音频后通过语音转文字获取字幕
+2. YouTube: 首先尝试下载内嵌字幕, 失败后使用语音转文字获取字幕
"""
@@ -33,11 +36,10 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
if equal_prefix(message.text, prefix=[PREFIX.SUBTITLE]) and not message.reply_to_message:
await send2tg(client, message, texts=HELP, **kwargs)
return
- if not (vid := await find_yt_vid(client, message)):
+ if not (url := await match_url(client, message)):
return
- yt_url = f"https://www.youtube.com/watch?v={vid}"
- msg = f"🔍**正在获取字幕**\n{yt_url}"
+ msg = f"🔍**正在获取字幕**\n{url}"
if kwargs.get("show_progress"):
res = await send2tg(client, message, texts=msg, **kwargs)
kwargs["progress"] = res[0]
@@ -51,11 +53,9 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
this_info = parse_msg(message, silent=True)
reply_info = parse_msg(message.reply_to_message, silent=True) if message.reply_to_message else {}
- res = await fetch_subtitle(vid, youtube_subtitle_provider)
+ res = await fetch_subtitle(url, youtube_subtitle_provider) # will raise error if not youtube url
if error := res.get("error", ""):
- if "Subtitles are disabled for this video" in error:
- error = "❌该视频没有提供字幕选项\n🔄尝试使用语音转文字获取字幕"
- await modify_progress(text=error, force_update=True, **kwargs)
+ await modify_progress(text=error, force_update=True, **kwargs)
if this_info["mtype"] in ["audio", "video"] or reply_info.get("mtype", "") in ["audio", "video"]:
msg = message if this_info["mtype"] in ["audio", "video"] else message.reply_to_message
fpath: str = await msg.download() # type: ignore
@@ -69,10 +69,11 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
else:
kwargs |= {
"show_progress": False,
- "url": yt_url,
+ "url": url,
"append_transcription": True,
"ytdlp_audio_only": True,
"youtube_comments_provider": False,
+ "bilibili_comments_provider": False,
"proxy": None,
"use_db": False,
}
@@ -83,20 +84,22 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
if not subtitles:
return
logger.success(subtitles)
+ matched = await match_social_media_link(url)
+ vid = matched["vid"]
if vinfo := await fetch_youtube_video_info(vid):
caption = f"🔴[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
- caption += f"📝[{vinfo['title']}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+ caption += f"📝[{vinfo['title']}]({url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
if to_telegraph:
html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
- if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=yt_url):
+ if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
with io.BytesIO(subtitles.encode("utf-8")) as f:
await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
else:
- caption = f"原视频: [{vid}]({yt_url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
+ caption = f"原视频: [YouTube链接]({url})\n字符数: {res['num_chars']}\n阅读时长: {res['reading_minutes']:.1f}分钟"
if to_telegraph:
html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
- if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=yt_url):
+ if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=url):
caption += f"\n**⚡️[Telegraph即时预览]({telegraph_url})**"
with io.BytesIO(subtitles.encode("utf-8")) as f:
await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)