Commit 52713e9
Changed files (1)
src
src/ai/transcription_summary.py
@@ -1,20 +1,25 @@
#!/venv/bin/python
# -*- coding: utf-8 -*-
+import base64
import json
+import re
from contextlib import suppress
+from pathlib import Path
from pyrogram.types import Chat, Message
from pyrogram.types.messages_and_media.message import Str
from ai.main import ai_text_generation
-from config import PREFIX
+from config import DB, DOWNLOAD_DIR, PREFIX
+from database.r2 import set_cf_r2
+from networking import download_file
from utils import count_subtitles, rand_number
JSON_SCHEMA = {
"title": "Transcription Summary",
"type": "object",
"properties": {
- "abstract": {"description": "需涵盖节目核心主题、关键观点和主要结论,用连贯的一段话概括,避免过于简略", "title": "全文概览", "type": "string"},
+ "abstract": {"title": "全文概览", "description": "需涵盖节目核心主题、关键观点和主要结论,用连贯的一段话概括,避免过于简略", "type": "string"},
"sections": {
"description": "将节目划分为不同片段,每个片段需拟定简洁准确的标题,匹配1个相关emoji,并总结该片段的核心内容",
"title": "片段内容",
@@ -29,8 +34,14 @@ JSON_SCHEMA = {
},
},
},
+ "mermaid": {
+ "title": "思维导图",
+ "type": "string",
+ "pattern": "^flowchart LR",
+ "description": "以Mermaid flowchart格式表示的思维导图,以'flowchart LR'开头",
+ },
},
- "required": ["abstract", "sections"],
+ "required": ["abstract", "sections", "mermaid"],
"additionalProperties": False,
}
@@ -71,7 +82,12 @@ async def summarize_transcription(transcription: str, reference: str | None = No
if not res.get("texts", ""):
return {}
summary = json.loads(res.get("texts", "{}"))
- texts = f"{summary['abstract'].strip()}\n\n**章节速览**"
+ mermaid = beautify_mermaid(summary["mermaid"])
+ mermaid_img = await save_mermaid_jpg_to_r2(mermaid)
+ texts = f"{summary['abstract'].strip()}"
+ if mermaid_img:
+ texts += f"\n🧠**[思维导图]({mermaid_img})**\n"
+ texts += "\n⚡️**章节速览**"
for section in summary["sections"]:
texts += f"\n{section['emoji']}**{section['title']}** [{section['start']}]\n{section['summary']}"
res["texts"] = texts
@@ -84,3 +100,36 @@ def system_prompt(reference: str | None = None) -> str:
if reference:
prompt += f"\n{reference}"
return prompt.strip()
+
+
+def beautify_mermaid(mermaid: str) -> str:
+ def replace(s: str) -> str:
+ s = s.replace("\n", "<br/>")
+ s = s.replace(" ", "<br/>")
+ s = s.replace('"', """)
+ s = s.replace("'", "'")
+ s = s.replace("[", "[")
+ s = s.replace("]", "]")
+ s = s.replace("(", "#40;")
+ s = s.replace(")", "#41;")
+ return s.replace("@", "#64;")
+
+ def callback(match: re.Match[str]):
+ original = match.group(1)
+ new_text = replace(original)
+ return f"[{new_text}]"
+
+ mermaid = re.sub(r"\[(.*?)\]", callback, mermaid.strip())
+ return f"---\nconfig:\n theme: neo\n look: neo\n---\n{mermaid.strip()}"
+
+
+async def save_mermaid_jpg_to_r2(mermaid: str) -> str:
+ b64_str = base64.urlsafe_b64encode(mermaid.encode("utf-8")).decode("ascii")
+ save_path = Path(DOWNLOAD_DIR) / f"{rand_number()}.jpg"
+ await download_file(f"https://mermaid.ink/img/{b64_str}?type=jpeg&theme=forest&width=2160", path=save_path, suffix=".jpg")
+ if save_path.is_file():
+ r2_key = f"TTL/365d/{save_path.name}"
+ await set_cf_r2(r2_key, data=save_path.read_bytes(), mime_type="image/jpeg", silent=True)
+ save_path.unlink(missing_ok=True)
+ return f"{DB.CF_R2_PUBLIC_URL}/{r2_key}"
+ return ""