Commit d4985d0
Changed files (5)
src
src/ai/transcription_summary.py
@@ -0,0 +1,86 @@
+#!/venv/bin/python
+# -*- coding: utf-8 -*-
+import json
+from contextlib import suppress
+
+from pyrogram.types import Chat, Message
+from pyrogram.types.messages_and_media.message import Str
+
+from ai.main import ai_text_generation
+from config import PREFIX
+from utils import count_subtitles, rand_number
+
+JSON_SCHEMA = {
+ "title": "Transcription Summary",
+ "type": "object",
+ "properties": {
+ "abstract": {"description": "需涵盖节目核心主题、关键观点和主要结论,用连贯的一段话概括,避免过于简略", "title": "全文概览", "type": "string"},
+ "sections": {
+ "description": "将节目划分为不同片段,每个片段需拟定简洁准确的标题,匹配1个相关emoji,并总结该片段的核心内容",
+ "title": "片段内容",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "title": {"type": "string", "description": "该片段的标题"},
+ "emoji": {"type": "string", "description": "匹配该片段的emoji,例如💡、💰、⚠️等"},
+ "summary": {"type": "string", "description": "该片段的总结"},
+ "start": {"type": "string", "description": "该片段的开始时间, 格式为(HH:MM:SS或MM:SS)"},
+ },
+ },
+ },
+ },
+ "required": ["abstract", "sections"],
+ "additionalProperties": False,
+}
+
+
+async def summarize_transcription(transcription: str, reference: str | None = None, model: str = "gemini") -> dict:
+ if count_subtitles(transcription) < 200: # skip short transcription
+ return {}
+ res = await ai_text_generation(
+ "fake-client", # type: ignore
+ message=Message(
+ id=rand_number(),
+ chat=Chat(id=rand_number()),
+ text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{model} {transcription.strip()}"),
+ ),
+ gemini_generate_content_config={
+ "system_instruction": system_prompt(reference),
+ "responseMimeType": "application/json",
+ "responseJsonSchema": JSON_SCHEMA,
+ },
+ openai_responses_config={
+ "instructions": system_prompt(reference),
+ "text": {
+ "format": {
+ "type": "json_schema",
+ "name": "TranscriptionSummary",
+ "strict": True,
+ "description": "基于提供的转录文稿,提炼出节目的核心内容,生成符合指定JSON格式的内容总结",
+ "schema": JSON_SCHEMA,
+ }
+ },
+ },
+ gemini_append_grounding=False,
+ openai_enable_tool_call=False,
+ openai_append_tool_results=False,
+ silent=True,
+ )
+ with suppress(Exception):
+ if not res.get("texts", ""):
+ return {}
+ summary = json.loads(res.get("texts", "{}"))
+ texts = f"{summary['abstract'].strip()}\n\n**章节速览**"
+ for section in summary["sections"]:
+ texts += f"\n{section['emoji']}**{section['title']}** [{section['start']}]\n{section['summary']}"
+ res["texts"] = texts
+ return res
+ return {}
+
+
+def system_prompt(reference: str | None = None) -> str:
+ prompt = "你是一位专业的节目总结大师,任务是基于提供的转录文稿,提炼出节目的核心内容,生成符合指定JSON格式的内容总结。"
+ if reference:
+ prompt += f"\n{reference}"
+ return prompt.strip()
src/podcast/main.py
@@ -29,10 +29,9 @@ from glom import Coalesce, glom
from loguru import logger
from pyrogram.client import Client
from pyrogram.types import Chat, Message
-from pyrogram.types.messages_and_media.message import Str
-from ai.main import ai_text_generation
-from config import AI, PODCAST, PREFIX, PROXY
+from ai.transcription_summary import summarize_transcription
+from config import AI, PODCAST, PROXY
from database.github import gh_clean_assets
from database.r2 import get_cf_r2, set_cf_r2
from messages.sender import send2tg
@@ -78,16 +77,9 @@ async def summary_pods(client: Client):
caption = f"🎧[{feed_title}]({homepage})\n📝[{entry['title']}]({entry['link']})\n🕒{pubdate}\n⏳{duration} #️⃣字数: {count_subtitles(transcripts)}"
markdown_desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
markdown_desc = remove_consecutive_newlines(markdown_desc, newline_level=2)
- prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
+ prompt = f"该转录稿对应于播客栏目《{feed_title}》的一期节目,节目详情如下:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
prompt += f"\n节目时长: {duration}\n节目简介: {markdown_desc}"
- prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
- ai_msg = Message( # Construct a message for AI
- id=rand_number(),
- chat=message.chat,
- text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{AI.PODCAST_SUMMARY_MODEL_ALIAS} {prompt}"),
- reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(transcripts)),
- )
- ai_res = await ai_text_generation(client, ai_msg, silent=True)
+ ai_res = await summarize_transcription(transcripts, reference=prompt, model=AI.PODCAST_SUMMARY_MODEL_ALIAS)
telegraph_content = ""
if ai_res.get("texts"):
telegraph_content += f"\n🤖**{ai_res['model_name']}总结**:\n{ai_res['texts']}"
src/subtitles/subtitle.py
@@ -9,9 +9,8 @@ from glom import Coalesce, glom
from loguru import logger
from pyrogram.client import Client
from pyrogram.types import Message
-from pyrogram.types.messages_and_media.message import Str
-from ai.main import ai_text_generation
+from ai.transcription_summary import summarize_transcription
from asr.voice_recognition import asr_file
from config import AI, ASR, DOWNLOAD_DIR, PREFIX, READING_SPEED, TEXT_LENGTH, cache
from messages.parser import parse_msg
@@ -23,7 +22,7 @@ from preview.bilibili import get_bilibili_vinfo
from preview.youtube import get_youtube_vinfo
from publish import publish_telegraph
from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, rand_number, readable_time, to_int
+from utils import count_subtitles, readable_time, to_int
from ytdlp.download import ytdlp_download
HELP = f"""📃**提取字幕**
@@ -136,20 +135,13 @@ async def get_subtitle(
if ai_summary and isinstance(subtitle_msg, Message):
# use real subtitle (without AI summary by Bilibili)
subtitles = re.sub(r"(.*?)AI总结(B站版):", "", subtitles, flags=re.DOTALL).strip() # noqa: RUF001
- prompt = f"以上是{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目的文字稿。该期节目详情如下:\n"
+ prompt = f"该转录稿对应于{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目,节目详情如下:\n"
prompt += f"节目标题: {vinfo['title']}\n发布日期: {vinfo['pubdate']}\n"
if description.strip():
- prompt += f"节目简介: {description}\n"
- prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
- ai_msg = Message( # Construct a message for AI
- id=subtitle_msg.id,
- chat=subtitle_msg.chat,
- text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
- reply_to_message=Message(id=rand_number(), chat=subtitle_msg.chat, text=Str(subtitles)),
- )
- res = await ai_text_generation(client, ai_msg, silent=True)
+ prompt += f"节目简介: {description}"
+ res = await summarize_transcription(subtitles, reference=prompt, model=summary_model_id)
if res.get("texts"):
- await send2tg(client, ai_msg, texts=res["prefix"] + blockquote(res["texts"]), **kwargs)
+ await send2tg(client, subtitle_msg, texts=res["prefix"] + blockquote(res["texts"]), **kwargs)
with contextlib.suppress(Exception):
[await delete_message(msg) for msg in res.get("sent_messages", [])]
await delete_message(kwargs.get("progress"))
src/ytdlp/main.py
@@ -10,10 +10,9 @@ from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from loguru import logger
from pyrogram.client import Client
from pyrogram.types import Message
-from pyrogram.types.messages_and_media.message import Str
-from ai.main import ai_text_generation
-from config import AI, ASR, CAPTION_LENGTH, DB, MAX_FILE_BYTES, PREFIX, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
+from ai.transcription_summary import summarize_transcription
+from config import AI, ASR, CAPTION_LENGTH, DB, MAX_FILE_BYTES, READING_SPEED, YTDLP_RE_ENCODING_MAX_FILE_BYTES
from database.database import get_db
from messages.database import copy_messages_from_db, save_messages
from messages.preprocess import preprocess_media
@@ -24,7 +23,7 @@ from multimedia import convert_to_h264
from preview.bilibili import get_bilibili_comments, get_bilibili_vinfo, make_bvid_clickable
from preview.youtube import get_youtube_comments, get_youtube_vinfo
from publish import publish_telegraph
-from utils import count_subtitles, rand_number, readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
+from utils import count_subtitles, readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
from ytdlp.download import ytdlp_download
from ytdlp.utils import append_subtitle, cleanup_ytdlp, generate_prompt, get_subtitles, platform_emoji
@@ -140,14 +139,7 @@ async def preview_ytdlp(
# get ai summary
summary = ""
if subtitles and true(ytdlp_send_summary):
- prompt = generate_prompt(info, target="summary")
- ai_msg = Message( # Construct a message for AI
- id=rand_number(),
- chat=message.chat,
- text=Str(f"{PREFIX.AI_TEXT_GENERATION} @{summary_model_id} {prompt}"),
- reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(subtitles)),
- )
- aires = await ai_text_generation(client, ai_msg, silent=True)
+ aires = await summarize_transcription(sub, reference=generate_prompt(info), model=summary_model_id)
if aires.get("texts"):
summary = f"🤖<b>{aires['model_name']}总结:</b>\n{markdown.markdown(aires['texts'])}\n"
src/ytdlp/utils.py
@@ -194,7 +194,7 @@ async def get_subtitles(audio_path: str | Path, url: str, asr_engine: str, vinfo
# send subtitles
subtitles = ""
matched = await match_social_media_link(url)
- reference = generate_prompt(vinfo, "correction")
+ reference = generate_prompt(vinfo)
if matched["platform"] in ["bilibili", "youtube"]: # get subtitle from API first
res = await fetch_subtitle(url=url, reference=reference)
subtitles = res.get("subtitles", "") # only subtitles, no Bilibili's AI summary
@@ -247,19 +247,18 @@ async def append_subtitle(name: str, sent_messages: dict) -> dict:
return modified
-def generate_prompt(info: dict, target: Literal["summary", "correction"]) -> str:
+def generate_prompt(info: dict) -> str:
"""Generate prompt for AI summary or correction."""
- prompt = f"以上是{info['extractor'].title()}视频" if target == "summary" else f"本次转录稿为{info['extractor'].title()}平台"
+ prompt = f"该转录稿对应于{info['extractor'].title()}平台"
if author := info.get("author"):
prompt += f"作者【{author}】"
- prompt += "的一期节目的文字稿。该期节目详情如下:\n"
+ prompt += "的一期节目,节目详情如下:\n"
if title := info.get("title"):
prompt += f"节目标题: {title}\n"
if pubdate := glom(info, Coalesce("pubdate", "upload_date"), default=""):
prompt += f"发布日期: {pubdate}\n"
if desc := info.get("description"):
prompt += f"节目简介: {desc}\n"
- prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头" if target == "summary" else ""
return prompt