Commit ce6fca4

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-20 08:34:12
feat(subtitle): add AI summary
1 parent 74a4126
Changed files (5)
src/llm/gemini.py
@@ -58,7 +58,7 @@ async def gemini_response(client: Client, message: Message, conversations: list[
         extra_config_str = GEMINI.IMG_CONFIG if modality == "image" else GEMINI.TEXT_CONFIG
         genconfig = json.loads(extra_config_str)
     try:
-        msg = f"🤖**{model_name}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"
+        msg = f"🤖**{model_name}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"[:TEXT_LENGTH]
         status_msg = (await send2tg(client, message, texts=msg, **kwargs))[0]
         kwargs["progress"] = status_msg
         contexts = await get_conversation_contexts(client, conversations, ctx_format="gemini")
src/llm/gpt.py
@@ -112,7 +112,7 @@ async def gpt_response(client: Client, message: Message, *, gpt_stream: bool = T
         return await send2tg(client, message, texts=f"⚠️**{config['friendly_name']}** 未配置模型ID, 请尝试其他命令\n\n{HELP}", **kwargs)
 
     config["completions"]["messages"] = await get_conversation_contexts(client, conversations, ctx_format="openai")
-    msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"
+    msg = f"🤖**{config['friendly_name']}**: 思考中...\n👤**[{info['full_name'] or info['ctitle']}](tg://user?id={info['uid']})**: “{clean_cmd_prefix(info['text'])}”"[:TEXT_LENGTH]
     status_msg = (await send2tg(client, message, texts=msg, **kwargs))[0]
     kwargs["progress"] = status_msg
     if context_type.get("error"):
src/preview/utils.py
@@ -183,7 +183,7 @@ async def bilibili_subtitle_and_summary(url_or_vid: int | str) -> dict:
         if summary:
             final["summary"] = summary.strip()
         if summary and subtitles:
-            final["full"] = f"以下为B站视频AI总结:\n{summary}\n\n\n以下为B站视频字幕:\n{subtitles}".strip()
+            final["full"] = f"Bilibili官方生成的AI总结:\n{summary}\n\n\nBilibili原视频提供的字幕:\n{subtitles}".strip()
     except Exception as e:
         logger.error(e)
         return {"error": "下载B站AI总结失败"}
src/preview/ytdlp.py
@@ -272,11 +272,10 @@ async def preview_ytdlp(
                     if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
                         caption += f"\n⚡️[Telegraph即时预览]({telegraph_url})"
                 with io.BytesIO(subtitles.encode("utf-8")) as f:
-                    await client.send_document(to_int(target_chat), f, file_name=f"{info['title']}.txt", caption=caption)
+                    sent_messages.append(await client.send_document(to_int(target_chat), f, file_name=f"{info['title']}.txt", caption=caption))
             else:
                 first_msg: Message = sent_messages[0] if sent_messages else message  # type: ignore
-                await client.send_message(first_msg.chat.id, blockquote(subtitles), reply_parameters=ReplyParameters(message_id=first_msg.id))
-        [await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]
+                sent_messages.append(await client.send_message(first_msg.chat.id, blockquote(subtitles), reply_parameters=ReplyParameters(message_id=first_msg.id)))
 
     Path(json_file).unlink(missing_ok=True)
     cleanup_ytdlp(info["id"])
src/subtitles/subtitle.py
@@ -1,16 +1,19 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import contextlib
-import io
+import re
+from io import BytesIO
 
 from glom import Coalesce, glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
+from pyrogram.types.messages_and_media.message import Str
 
 from asr.voice_recognition import asr_file
-from config import PREFIX, PROVIDER, READING_SPEED
+from config import PREFIX, PROVIDER, READING_SPEED, TEXT_LENGTH
 from database import cache
+from llm.gpt import gpt_response
 from messages.parser import parse_msg
 from messages.progress import modify_progress
 from messages.sender import send2tg
@@ -35,7 +38,16 @@ HELP = f"""📃**提取字幕**
 """  # noqa: RUF001
 
 
-async def get_subtitle(client: Client, message: Message, youtube_subtitle_provider: str = PROVIDER.YOUTUBE_SUBTITLE, *, to_telegraph: bool = True, force_file: bool = False, **kwargs):
+async def get_subtitle(
+    client: Client,
+    message: Message,
+    youtube_subtitle_provider: str = PROVIDER.YOUTUBE_SUBTITLE,
+    *,
+    to_telegraph: bool = True,
+    ai_summary: bool = True,
+    force_file: bool = True,
+    **kwargs,
+):
     """Get YouTube Subtitle."""
     target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
     # send docs if message == "/subtitle", without reply
@@ -44,30 +56,38 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
         return
     if not (url := await match_url(client, message)):
         return
-    matched = await match_social_media_link(url)
-    platform = matched["platform"]
-    msg = f"🔍**正在获取字幕**\n{url}"
-    if kwargs.get("show_progress"):
-        res = await send2tg(client, message, texts=msg, **kwargs)
-        kwargs["progress"] = res[0]
-
     # cache media_group message
     if media_group_id := message.media_group_id:
         if cache.get(f"subtitle-{message.chat.id}-{media_group_id}"):
             return
         cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
+    matched = await match_social_media_link(url)
+    platform = matched["platform"]
+    vid = glom(matched, Coalesce("vid", "bvid"), default=url)
+    vinfo = await fetch_youtube_video_info(vid) if platform == "youtube" else await get_bilibili_video_info(vid)
+    description = glom(vinfo, Coalesce("description", "desc"), default="")
+    caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n📝[{vinfo['title']}]({url})"
+    msg = f"🔍**正在获取字幕:**\n{caption}"[:TEXT_LENGTH]
+    if kwargs.get("show_progress"):
+        res = await send2tg(client, message, texts=msg, **kwargs)
+        kwargs["progress"] = res[0]
 
     this_info = parse_msg(message, silent=True)
     reply_info = parse_msg(message.reply_to_message, silent=True) if message.reply_to_message else {}
 
+    # Fetch subtitle via API
     res = await fetch_subtitle(url, youtube_subtitle_provider)
+    subtitle_file_sent = False
+
+    # API failed
     if error := res.get("error", ""):
         if this_info["mtype"] in ["audio", "video"] or reply_info.get("mtype", "") in ["audio", "video"]:
             await modify_progress(text=error + "\n正在通过ASR识别字幕", force_update=True, **kwargs)
             msg = message if this_info["mtype"] in ["audio", "video"] else message.reply_to_message
             fpath: str = await client.download_media(msg)  # type: ignore
             engine = "gemini" if platform == "youtube" else ""  # use gemini to bypass censorship
-            res = await asr_file(fpath, engine=engine, client=client, message=message, silent=True, **kwargs)
+            prompt = f"请转录{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目的音频。\n该期节目标题: {vinfo['title']}\n节目简介: {description}"
+            res = await asr_file(fpath, engine=engine, prompt=prompt, client=client, message=message, silent=True, **kwargs)
             if res.get("error"):
                 await modify_progress(text=res["error"], force_update=True, **kwargs)
                 return
@@ -85,32 +105,42 @@ async def get_subtitle(client: Client, message: Message, youtube_subtitle_provid
                 "proxy": None,
                 "use_db": False,
             }
-            await preview_ytdlp(client=client, message=message, **kwargs)
+            # Download and send subtitle file via ytdlp
+            subtitle_msg = (await preview_ytdlp(client=client, message=message, **kwargs))[0]
+            data: BytesIO = await client.download_media(subtitle_msg, in_memory=True)  # type: ignore
+            subtitles = data.getvalue().decode("utf-8")
+            subtitle_file_sent = True
+
+    # Send subtitle file
+    if not subtitle_file_sent:
+        subtitles = glom(res, Coalesce("full", "subtitles", "summary"), default="")
+        if not subtitles:
             await modify_progress(del_status=True, **kwargs)
             return
-    subtitles = glom(res, Coalesce("full", "subtitles", "summary"), default="")
-    if not subtitles:
-        return
-    logger.success(subtitles)
-    vid = matched.get("vid", matched.get("bvid", url))
-    if platform in ["bilibili", "youtube"]:
-        vinfo = await fetch_youtube_video_info(vid) if platform == "youtube" else await get_bilibili_video_info(vid)
+        logger.success(subtitles)
         caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
         caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {res['reading_minutes']:.1f}分钟"
         if to_telegraph:
             html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
             if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
                 caption += f"\n⚡️[Telegraph即时预览]({telegraph_url})"
-        with io.BytesIO(subtitles.encode("utf-8")) as f:
-            await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
-    else:
-        caption = f"🎬来源: [视频链接]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {res['reading_minutes']:.1f}分钟"
-        if to_telegraph:
-            html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
-            if telegraph_url := await publish_telegraph(title=f"{vid}字幕", html=html, url=url):
-                caption += f"\n⚡️[Telegraph即时预览]({telegraph_url})"
-        with io.BytesIO(subtitles.encode("utf-8")) as f:
-            await client.send_document(to_int(target_chat), f, file_name=f"{vid}字幕.txt", caption=caption)
+        with BytesIO(subtitles.encode("utf-8")) as f:
+            subtitle_msg = await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+
+    if ai_summary and isinstance(subtitle_msg, Message):
+        # use real subtitle (without AI summary by Bilibili)
+        subtitles = re.sub(r"(.*?)Bilibili原视频提供的字幕:", "", subtitles, flags=re.DOTALL).strip()
+        prompt = f"以下是{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目详情, 请解读本期节目内容。"
+        prompt += "\n注意: 不要复述节目标题、作者、日期、简介等基本信息, 不要进行任何寒暄与问候, 直接输出节目内容解读。"
+        prompt += f"\n节目标题: {vinfo['title']}\n发布日期: {vinfo['date']:%Y-%m-%d %H:%M:%S}\n节目简介: {description}\n节目文字稿:\n{subtitles}"
+        # Construct a message to call GPT
+        ai_msg = Message(
+            id=subtitle_msg.id,
+            chat=subtitle_msg.chat,
+            text=Str(f"{PREFIX.GPT} {prompt}"),
+            from_user=message.from_user,
+        )
+        await gpt_response(client, ai_msg, **kwargs)
     with contextlib.suppress(Exception):
         [await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]
         await modify_progress(del_status=True, **kwargs)