Commit d4985d0

benny-dou <60535774+benny-dou@users.noreply.github.com>
2026-05-13 03:03:57
style(summary): improve transcription summary style
1 parent f82c3c8
Changed files (5)
src/ai/transcription_summary.py
@@ -0,0 +1,86 @@
+#!/venv/bin/python
+# -*- coding: utf-8 -*-
+import json
+from contextlib import suppress
+
+from pyrogram.types import Chat, Message
+from pyrogram.types.messages_and_media.message import Str
+
+from ai.main import ai_text_generation
+from config import PREFIX
+from utils import count_subtitles, rand_number
+
+JSON_SCHEMA = {
+    "title": "Transcription Summary",
+    "type": "object",
+    "properties": {
+        "abstract": {"description": "需涵盖节目核心主题、关键观点和主要结论,用连贯的一段话概括,避免过于简略", "title": "全文概览", "type": "string"},
+        "sections": {
+            "description": "将节目划分为不同片段,每个片段需拟定简洁准确的标题,匹配1个相关emoji,并总结该片段的核心内容",
+            "title": "片段内容",
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string", "description": "该片段的标题"},
+                    "emoji": {"type": "string", "description": "匹配该片段的emoji,例如💡、💰、⚠️等"},
+                    "summary": {"type": "string", "description": "该片段的总结"},
+                    "start": {"type": "string", "description": "该片段的开始时间, 格式为(HH:MM:SS或MM:SS)"},
+                },
+            },
+        },
+    },
+    "required": ["abstract", "sections"],
+    "additionalProperties": False,
+}
+
+
+async def summarize_transcription(transcription: str, reference: str | None = None, model: str = "gemini") -> dict:
+    if count_subtitles(transcription) < 200:  # skip short transcription
+        return {}
+    res = await ai_text_generation(
+        "fake-client",  # type: ignore
+        message=Message(
+            id=rand_number(),
+            chat=Chat(id=rand_number()),
+            text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{model} {transcription.strip()}"),
+        ),
+        gemini_generate_content_config={
+            "system_instruction": system_prompt(reference),
+            "responseMimeType": "application/json",
+            "responseJsonSchema": JSON_SCHEMA,
+        },
+        openai_responses_config={
+            "instructions": system_prompt(reference),
+            "text": {
+                "format": {
+                    "type": "json_schema",
+                    "name": "TranscriptionSummary",
+                    "strict": True,
+                    "description": "基于提供的转录文稿,提炼出节目的核心内容,生成符合指定JSON格式的内容总结",
+                    "schema": JSON_SCHEMA,
+                }
+            },
+        },
+        gemini_append_grounding=False,
+        openai_enable_tool_call=False,
+        openai_append_tool_results=False,
+        silent=True,
+    )
+    with suppress(Exception):
+        if not res.get("texts", ""):
+            return {}
+        summary = json.loads(res.get("texts", "{}"))
+        texts = f"{summary['abstract'].strip()}\n\n**章节速览**"
+        for section in summary["sections"]:
+            texts += f"\n{section['emoji']}**{section['title']}** [{section['start']}]\n{section['summary']}"
+        res["texts"] = texts
+        return res
+    return {}
+
+
+def system_prompt(reference: str | None = None) -> str:
+    prompt = "你是一位专业的节目总结大师,任务是基于提供的转录文稿,提炼出节目的核心内容,生成符合指定JSON格式的内容总结。"
+    if reference:
+        prompt += f"\n{reference}"
+    return prompt.strip()
src/podcast/main.py
@@ -29,10 +29,9 @@ from glom import Coalesce, glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Chat, Message
-from pyrogram.types.messages_and_media.message import Str
 
-from ai.main import ai_text_generation
-from config import AI, PODCAST, PREFIX, PROXY
+from ai.transcription_summary import summarize_transcription
+from config import AI, PODCAST, PROXY
 from database.github import gh_clean_assets
 from database.r2 import get_cf_r2, set_cf_r2
 from messages.sender import send2tg
@@ -78,16 +77,9 @@ async def summary_pods(client: Client):
                 caption = f"🎧[{feed_title}]({homepage})\n📝[{entry['title']}]({entry['link']})\n🕒{pubdate}\n⏳{duration} #️⃣字数: {count_subtitles(transcripts)}"
                 markdown_desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
                 markdown_desc = remove_consecutive_newlines(markdown_desc, newline_level=2)
-                prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
+                prompt = f"该转录稿对应于播客栏目《{feed_title}》的一期节目,节目详情如下:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
                 prompt += f"\n节目时长: {duration}\n节目简介: {markdown_desc}"
-                prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
-                ai_msg = Message(  # Construct a message for AI
-                    id=rand_number(),
-                    chat=message.chat,
-                    text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{AI.PODCAST_SUMMARY_MODEL_ALIAS} {prompt}"),
-                    reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(transcripts)),
-                )
-                ai_res = await ai_text_generation(client, ai_msg, silent=True)
+                ai_res = await summarize_transcription(transcripts, reference=prompt, model=AI.PODCAST_SUMMARY_MODEL_ALIAS)
                 telegraph_content = ""
                 if ai_res.get("texts"):
                     telegraph_content += f"\n🤖**{ai_res['model_name']}总结**:\n{ai_res['texts']}"
src/subtitles/subtitle.py
@@ -9,9 +9,8 @@ from glom import Coalesce, glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
-from pyrogram.types.messages_and_media.message import Str
 
-from ai.main import ai_text_generation
+from ai.transcription_summary import summarize_transcription
 from asr.voice_recognition import asr_file
 from config import AI, ASR, DOWNLOAD_DIR, PREFIX, READING_SPEED, TEXT_LENGTH, cache
 from messages.parser import parse_msg
@@ -23,7 +22,7 @@ from preview.bilibili import get_bilibili_vinfo
 from preview.youtube import get_youtube_vinfo
 from publish import publish_telegraph
 from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, rand_number, readable_time, to_int
+from utils import count_subtitles, readable_time, to_int
 from ytdlp.download import ytdlp_download
 
 HELP = f"""📃**提取字幕**
@@ -136,20 +135,13 @@ async def get_subtitle(
     if ai_summary and isinstance(subtitle_msg, Message):
         # use real subtitle (without AI summary by Bilibili)
         subtitles = re.sub(r"(.*?)AI总结(B站版):", "", subtitles, flags=re.DOTALL).strip()  # noqa: RUF001
-        prompt = f"以上是{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目的文字稿。该期节目详情如下:\n"
+        prompt = f"该转录稿对应于{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目,节目详情如下:\n"
         prompt += f"节目标题: {vinfo['title']}\n发布日期: {vinfo['pubdate']}\n"
         if description.strip():
-            prompt += f"节目简介: {description}\n"
-        prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
-        ai_msg = Message(  # Construct a message for AI
-            id=subtitle_msg.id,
-            chat=subtitle_msg.chat,
-            text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
-            reply_to_message=Message(id=rand_number(), chat=subtitle_msg.chat, text=Str(subtitles)),
-        )
-        res = await ai_text_generation(client, ai_msg, silent=True)
+            prompt += f"节目简介: {description}"
+        res = await summarize_transcription(subtitles, reference=prompt, model=summary_model_id)
         if res.get("texts"):
-            await send2tg(client, ai_msg, texts=res["prefix"] + blockquote(res["texts"]), **kwargs)
+            await send2tg(client, subtitle_msg, texts=res["prefix"] + blockquote(res["texts"]), **kwargs)
     with contextlib.suppress(Exception):
         [await delete_message(msg) for msg in res.get("sent_messages", [])]
         await delete_message(kwargs.get("progress"))
src/ytdlp/main.py
@@ -10,10 +10,9 @@ from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
-from pyrogram.types.messages_and_media.message import Str
 
-from ai.main import ai_text_generation
-from config import AI, ASR, CAPTION_LENGTH, DB, MAX_FILE_BYTES, PREFIX, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
+from ai.transcription_summary import summarize_transcription
+from config import AI, ASR, CAPTION_LENGTH, DB, MAX_FILE_BYTES, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
 from database.database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.preprocess import preprocess_media
@@ -24,7 +23,7 @@ from multimedia import convert_to_h264
 from preview.bilibili import get_bilibili_comments, get_bilibili_vinfo, make_bvid_clickable
 from preview.youtube import get_youtube_comments, get_youtube_vinfo
 from publish import publish_telegraph
-from utils import count_subtitles, rand_number, readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
+from utils import count_subtitles, readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
 from ytdlp.download import ytdlp_download
 from ytdlp.utils import append_subtitle, cleanup_ytdlp, generate_prompt, get_subtitles, platform_emoji
 
@@ -140,14 +139,7 @@ async def preview_ytdlp(
     # get ai summary
     summary = ""
     if subtitles and true(ytdlp_send_summary):
-        prompt = generate_prompt(info, target="summary")
-        ai_msg = Message(  # Construct a message for AI
-            id=rand_number(),
-            chat=message.chat,
-            text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
-            reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(subtitles)),
-        )
-        aires = await ai_text_generation(client, ai_msg, silent=True)
+        aires = await summarize_transcription(sub, reference=generate_prompt(info), model=summary_model_id)
         if aires.get("texts"):
             summary = f"🤖<b>{aires['model_name']}总结:</b>\n{markdown.markdown(aires['texts'])}\n"
 
src/ytdlp/utils.py
@@ -194,7 +194,7 @@ async def get_subtitles(audio_path: str | Path, url: str, asr_engine: str, vinfo
     # send subtitles
     subtitles = ""
     matched = await match_social_media_link(url)
-    reference = generate_prompt(vinfo, "correction")
+    reference = generate_prompt(vinfo)
     if matched["platform"] in ["bilibili", "youtube"]:  # get subtitle from API first
         res = await fetch_subtitle(url=url, reference=reference)
         subtitles = res.get("subtitles", "")  # only subtitles, no Bilibili's AI summary
@@ -247,19 +247,18 @@ async def append_subtitle(name: str, sent_messages: dict) -> dict:
     return modified
 
 
-def generate_prompt(info: dict, target: Literal["summary", "correction"]) -> str:
+def generate_prompt(info: dict) -> str:
     """Generate prompt for AI summary or correction."""
-    prompt = f"以上是{info['extractor'].title()}视频" if target == "summary" else f"本次转录稿为{info['extractor'].title()}平台"
+    prompt = f"该转录稿对应于{info['extractor'].title()}平台"
     if author := info.get("author"):
         prompt += f"作者【{author}】"
-    prompt += "的一期节目的文字稿。该期节目详情如下:\n"
+    prompt += "的一期节目,节目详情如下:\n"
     if title := info.get("title"):
         prompt += f"节目标题: {title}\n"
     if pubdate := glom(info, Coalesce("pubdate", "upload_date"), default=""):
         prompt += f"发布日期: {pubdate}\n"
     if desc := info.get("description"):
         prompt += f"节目简介: {desc}\n"
-    prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头" if target == "summary" else ""
     return prompt