Commit dd5f343

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-09-19 04:17:54
style(podcast): refine podcast caption style
1 parent 701f021
Changed files (1)
src
podcast
src/podcast/main.py
@@ -21,7 +21,6 @@ Besides, it will also upload the enclosure file to GitHub Releases
 """
 
 import contextlib
-import io
 from pathlib import Path
 from urllib.parse import unquote_plus
 
@@ -32,16 +31,15 @@ from pyrogram.client import Client
 from pyrogram.types import Chat, Message
 from pyrogram.types.messages_and_media.message import Str
 
-from config import GPT, PODCAST, READING_SPEED, cache
+from config import GPT, PODCAST, PREFIX
 from database.github import gh_clean_assets
 from database.r2 import get_cf_r2, set_cf_r2
 from llm.gpt import gpt_response
 from llm.utils import convert_html, convert_md, remove_consecutive_newlines
 from messages.sender import send2tg
-from messages.utils import blockquote
 from networking import download_file, hx_req
-from podcast.asr import backup_audio, get_duration, get_transcripts
-from podcast.utils import HEADERS, clean_feed_url, feed_saved_target, get_pubdate, remove_img_tag
+from podcast.asr import get_duration, get_transcripts
+from podcast.utils import HEADERS, clean_feed_url, feed_saved_target, get_pubdate
 from podcast.xml import get_feed_title, parse_feed, save_xml, update_xml_desc
 from preview.bilibili import get_bilibili_vinfo
 from preview.youtube import get_youtube_vinfo
@@ -75,40 +73,21 @@ async def summary_pods(client: Client):
                 if not transcripts:
                     continue
                 duration = await get_duration(info["asr_path"], entry)
+                duration = seconds_to_hms(duration)
                 dt = get_pubdate(entry)
                 pubdate = f"{dt:%Y-%m-%d %H:%M:%S}"
-                base_caption = f"🎧播客: [{feed_title}]({homepage})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {seconds_to_hms(duration)}"
-                desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
-                desc = remove_consecutive_newlines(desc, newline_level=2)
-                audio_caption = base_caption + f"\n📖简介: {desc}" if desc else base_caption
-                transcript_caption = base_caption + f"\n#️⃣字数: {count_subtitles(transcripts)}\n⏳阅读: {seconds_to_hms(60 * count_subtitles(transcripts) / READING_SPEED)}"
-                if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(f"{audio_caption}\n{transcripts}"), author=feed_title, url=entry["link"]):
-                    transcript_caption += f"\n⚡️[即时预览]({telegraph_url})"
-                media = (
-                    [
-                        {
-                            "audio": backup_audio(info["asr_path"]),
-                            "title": entry["title"],
-                            "performer": feed_title,
-                            "thumb": info["thumb"],
-                        }
-                    ]
-                    if Path(info["path"]).suffix in [".aac", ".amr", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".wma"]
-                    else [{"video": info["path"], "thumb": info["thumb"]}]
-                )
-                await send2tg(client, message, texts=remove_img_tag(audio_caption), media=media, reply_msg_id=-1)  # Telegram DO NOT allow img tag in messages
-                with io.BytesIO(transcripts.encode("utf-8")) as f:
-                    txt_msg: Message = await client.send_document(message.chat.id, f, file_name=f"{entry['title']}.txt", caption=transcript_caption)  # type: ignore
-
+                caption = f"🎧[{feed_title}]({homepage})\n📝[{entry['title']}]({entry['link']})\n🕒{pubdate}\n⏳{duration}\n#️⃣字数: {count_subtitles(transcripts)}"
+                markdown_desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
+                markdown_desc = remove_consecutive_newlines(markdown_desc, newline_level=2)
                 prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
-                prompt += f"\n节目时长: {seconds_to_hms(duration)}\n节目简介: {desc}"
-                prompt += "\n请解读该播客内容, 只需关注内容本身, 不用概述播客的基本信息, 例如播客的标题, 日期, 时长等"
+                prompt += f"\n节目时长: {duration}\n节目简介: {markdown_desc}"
+                prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
                 # Construct a message to call GPT
-                cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
+                # cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
                 ai_msg = Message(
-                    id=message.id,
+                    id=rand_number(),
                     chat=message.chat,
-                    text=Str(f"/ai {prompt}"),
+                    text=Str(f"{strings_list(PREFIX.GPT)[0]} {prompt}"),
                     reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(transcripts)),
                 )
                 gpt_res = await gpt_response(
@@ -120,8 +99,29 @@ async def summary_pods(client: Client):
                     append_grounding=False,
                     silent=True,
                 )
+                telegraph_content = ""
                 if gpt_res.get("texts"):
-                    await send2tg(client, txt_msg, texts=gpt_res["prefix"] + blockquote(gpt_res["texts"]))
+                    telegraph_content += f"\n🤖**{gpt_res['model_name']}总结**:\n{gpt_res['texts']}"
+                telegraph_content += f"\n📖**节目简介**:\n {markdown_desc}" if markdown_desc else ""
+                telegraph_content += f"\n🔤**转录字幕**:\n{transcripts}"
+
+                if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(telegraph_content), author=feed_title, url=entry["link"]):
+                    caption += f"\n[🤖总结 & 🔤字幕]({telegraph_url})"
+
+                media = (
+                    [
+                        {
+                            "audio": info["asr_path"],
+                            "title": entry["title"],
+                            "performer": feed_title,
+                            "thumb": info["thumb"],
+                        }
+                    ]
+                    if Path(info["path"]).suffix in [".aac", ".amr", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".wma"]
+                    else [{"video": info["path"], "thumb": info["thumb"]}]
+                )
+
+                await send2tg(client, message, texts=caption, media=media, reply_msg_id=-1)
                 processed_xml = await update_xml_desc(feed_url, processed_xml, entry, summary=gpt_res.get("texts", ""), audio_path=info["asr_path"])
                 await set_cf_r2(entry["db_key"], data={"title": entry["title"], "url": entry["link"]})
                 has_update = True