Commit ca6bf84

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-09-10 18:12:41
feat(summary): add video summary for `/summary` command
1 parent 08800c5
Changed files (2)
src
llm
subtitles
src/llm/summary.py
@@ -6,26 +6,32 @@ import re
 from datetime import datetime, timedelta
 from zoneinfo import ZoneInfo
 
+from glom import Coalesce, glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Chat, Message
 from pyrogram.types.messages_and_media.message import Str
 
-from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ
+from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ, cache
 from llm.gpt import gpt_response
 from llm.utils import BOT_TIPS, count_tokens
 from messages.chat_history import get_history_info_list
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
-from messages.utils import equal_prefix, startswith_prefix, to_int
+from messages.utils import equal_prefix, remove_prefix, startswith_prefix, to_int
+from networking import match_social_media_link
+from subtitles.subtitle import get_subtitle
 from utils import nowdt, rand_number, strings_list
 
 HELP = f"""🤖**AI总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
-⚠️使用`{PREFIX.COMBINATION}`命令只生成聊天记录文件, 不进行AI总结
-使用说明:
-# 后跟消息数量或时间范围
-@ 后跟用户名 (可多次使用@)
+⚠️使用`{PREFIX.AI_SUMMARY}`命令生成聊天记录文件 + 聊天记录AI总结
+⚠️使用`{PREFIX.COMBINATION}`命令只生成聊天记录文件, 不对聊天记录AI总结
+⚠️额外功能: 使用`{PREFIX.AI_SUMMARY} + 油管或B站链接`对视频内容进行AI总结
+
+{PREFIX.AI_SUMMARY}使用说明:
+- # 后跟消息数量或时间范围
+- @ 后跟用户名 (可多次使用@)
 
 **1️⃣指定条目数**
 - `{PREFIX.AI_SUMMARY} #N`: 总结最近的N条历史消息
@@ -101,9 +107,26 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
         summary_prefix (str | None): Prefix string of the response message.
     """
     # send docs if message == "/summary"
-    if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]):
+    if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]) and not message.reply_to_message:
         await send2tg(client, message, texts=HELP, **kwargs)
         return
+    if not startswith_prefix(message.content, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]):
+        return
+    # summary Youtube & Bilibili video (skip for summaring chat history)
+    if startswith_prefix(message.text, prefix=PREFIX.AI_SUMMARY) and not remove_prefix(message.text, prefix=PREFIX.AI_SUMMARY).strip().startswith("#"):
+        # Youtube & Bilibili links in message's content or reply_to_message's content or reply_to_message's entity_urls
+        links_to_check = [glom(message, Coalesce("content", "reply_to_message.content"), default="")]
+        if message.reply_to_message:
+            reply_info = parse_msg(message.reply_to_message, use_cache=False, silent=True)
+            links_to_check.extend(reply_info["entity_urls"])
+        for link in links_to_check:
+            matched = await match_social_media_link(link)
+            if matched["platform"] in ["youtube", "bilibili"]:
+                cache.delete(f"parse_msg-{message.chat.id}-{message.id}")
+                msg = Message(id=glom(message, Coalesce("reply_to_message.id", "id")), chat=message.chat, text=Str(f"{PREFIX.SUBTITLE} {matched['url']}"))
+                kwargs |= {"ai_summary": True, "send_subtitle_as": "none"}
+                await get_subtitle(client, msg, **kwargs)
+                return
 
     info = parse_msg(message, silent=True)
     need_summay = startswith_prefix(info["text"], prefix=[PREFIX.AI_SUMMARY])
src/subtitles/subtitle.py
@@ -3,6 +3,7 @@
 import contextlib
 import re
 from io import BytesIO
+from typing import Literal
 
 from glom import Coalesce, glom
 from loguru import logger
@@ -39,7 +40,15 @@ HELP = f"""📃**提取字幕**
 """  # noqa: RUF001
 
 
-async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool = True, ai_summary: bool = True, **kwargs):
+async def get_subtitle(
+    client: Client,
+    message: Message,
+    *,
+    to_telegraph: bool = True,
+    ai_summary: bool = True,
+    send_subtitle_as: Literal["file", "str", "none"] = "file",
+    **kwargs,
+):
     """Get YouTube Subtitle."""
     target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
     # send docs if message == "/subtitle", without reply
@@ -55,8 +64,15 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
         cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
     matched = await match_social_media_link(url)
     platform = matched["platform"]
+    if platform not in ["bilibili", "youtube"]:
+        await send2tg(client, message, texts="仅支持Bilibili和YouTube视频链接", **kwargs)
+        return
     vid = glom(matched, Coalesce("vid", "bvid"), default=url)
     vinfo = await get_youtube_vinfo(vid) if platform == "youtube" else await get_bilibili_vinfo(vid)
+    if error := vinfo.get("error_msg"):
+        await send2tg(client, message, texts=error, **kwargs)
+        return
+    url = glom(vinfo, Coalesce("url", "link"), default=url)
     description = glom(vinfo, Coalesce("description", "desc"), default="")
     caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['pubdate']}\n📝[{vinfo['title']}]({url})"
     msg = f"🔍**正在获取字幕:**\n{caption}"[:TEXT_LENGTH]
@@ -96,7 +112,7 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
                 return
             res |= {"subtitles": res["texts"], "num_chars": count_subtitles(res["texts"]), "reading_minutes": count_subtitles(res["texts"]) / READING_SPEED}
 
-    # Send subtitle file
+    # Send subtitle
     subtitles = glom(res, Coalesce("full", "subtitles", "summary"), default="")
     if not subtitles:
         await modify_progress(del_status=True, **kwargs)
@@ -104,12 +120,17 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
     logger.success(subtitles)
     caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['pubdate']}\n"
     caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {readable_time(60 * res['reading_minutes'])}"
-    if to_telegraph:
-        html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-        if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
-            caption += f"\n⚡️[即时预览]({telegraph_url})"
-    with BytesIO(subtitles.encode("utf-8")) as f:
-        subtitle_msg = await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+    if send_subtitle_as == "file":
+        if to_telegraph:
+            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
+            if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
+                caption += f"\n⚡️[即时预览]({telegraph_url})"
+        with BytesIO(subtitles.encode("utf-8")) as f:
+            subtitle_msg = await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+    elif send_subtitle_as == "str":
+        subtitle_msg = (await send2tg(client, message, texts=f"{caption}\n{subtitles}", **kwargs))[0]
+    else:
+        subtitle_msg = message
 
     if ai_summary and isinstance(subtitle_msg, Message):
         # use real subtitle (without AI summary by Bilibili)