Commit dd5f343
Changed files (1)
src
podcast
src/podcast/main.py
@@ -21,7 +21,6 @@ Besides, it will also upload the enclosure file to GitHub Releases
"""
import contextlib
-import io
from pathlib import Path
from urllib.parse import unquote_plus
@@ -32,16 +31,15 @@ from pyrogram.client import Client
from pyrogram.types import Chat, Message
from pyrogram.types.messages_and_media.message import Str
-from config import GPT, PODCAST, READING_SPEED, cache
+from config import GPT, PODCAST, PREFIX
from database.github import gh_clean_assets
from database.r2 import get_cf_r2, set_cf_r2
from llm.gpt import gpt_response
from llm.utils import convert_html, convert_md, remove_consecutive_newlines
from messages.sender import send2tg
-from messages.utils import blockquote
from networking import download_file, hx_req
-from podcast.asr import backup_audio, get_duration, get_transcripts
-from podcast.utils import HEADERS, clean_feed_url, feed_saved_target, get_pubdate, remove_img_tag
+from podcast.asr import get_duration, get_transcripts
+from podcast.utils import HEADERS, clean_feed_url, feed_saved_target, get_pubdate
from podcast.xml import get_feed_title, parse_feed, save_xml, update_xml_desc
from preview.bilibili import get_bilibili_vinfo
from preview.youtube import get_youtube_vinfo
@@ -75,40 +73,21 @@ async def summary_pods(client: Client):
if not transcripts:
continue
duration = await get_duration(info["asr_path"], entry)
+ duration = seconds_to_hms(duration)
dt = get_pubdate(entry)
pubdate = f"{dt:%Y-%m-%d %H:%M:%S}"
- base_caption = f"🎧播客: [{feed_title}]({homepage})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {seconds_to_hms(duration)}"
- desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
- desc = remove_consecutive_newlines(desc, newline_level=2)
- audio_caption = base_caption + f"\n📖简介: {desc}" if desc else base_caption
- transcript_caption = base_caption + f"\n#️⃣字数: {count_subtitles(transcripts)}\n⏳阅读: {seconds_to_hms(60 * count_subtitles(transcripts) / READING_SPEED)}"
- if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(f"{audio_caption}\n{transcripts}"), author=feed_title, url=entry["link"]):
- transcript_caption += f"\n⚡️[即时预览]({telegraph_url})"
- media = (
- [
- {
- "audio": backup_audio(info["asr_path"]),
- "title": entry["title"],
- "performer": feed_title,
- "thumb": info["thumb"],
- }
- ]
- if Path(info["path"]).suffix in [".aac", ".amr", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".wma"]
- else [{"video": info["path"], "thumb": info["thumb"]}]
- )
- await send2tg(client, message, texts=remove_img_tag(audio_caption), media=media, reply_msg_id=-1) # Telegram DO NOT allow img tag in messages
- with io.BytesIO(transcripts.encode("utf-8")) as f:
- txt_msg: Message = await client.send_document(message.chat.id, f, file_name=f"{entry['title']}.txt", caption=transcript_caption) # type: ignore
-
+ caption = f"🎧[{feed_title}]({homepage})\n📝[{entry['title']}]({entry['link']})\n🕒{pubdate}\n⏳{duration}\n#️⃣字数: {count_subtitles(transcripts)}"
+ markdown_desc = convert_md(html=glom(entry, Coalesce("content.0.value", "summary"), default=""))
+ markdown_desc = remove_consecutive_newlines(markdown_desc, newline_level=2)
prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
- prompt += f"\n节目时长: {seconds_to_hms(duration)}\n节目简介: {desc}"
- prompt += "\n请解读该播客内容, 只需关注内容本身, 不用概述播客的基本信息, 例如播客的标题, 日期, 时长等"
+ prompt += f"\n节目时长: {duration}\n节目简介: {markdown_desc}"
+ prompt += "\n请解读本期节目内容。要求: 直接输出节目内容解读, 以“该节目讲述了”开头"
# Construct a message to call GPT
- cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
+ # cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
ai_msg = Message(
- id=message.id,
+ id=rand_number(),
chat=message.chat,
- text=Str(f"/ai {prompt}"),
+ text=Str(f"{strings_list(PREFIX.GPT)[0]} {prompt}"),
reply_to_message=Message(id=rand_number(), chat=message.chat, text=Str(transcripts)),
)
gpt_res = await gpt_response(
@@ -120,8 +99,29 @@ async def summary_pods(client: Client):
append_grounding=False,
silent=True,
)
+ telegraph_content = ""
if gpt_res.get("texts"):
- await send2tg(client, txt_msg, texts=gpt_res["prefix"] + blockquote(gpt_res["texts"]))
+ telegraph_content += f"\n🤖**{gpt_res['model_name']}总结**:\n{gpt_res['texts']}"
+ telegraph_content += f"\n📖**节目简介**:\n {markdown_desc}" if markdown_desc else ""
+ telegraph_content += f"\n🔤**转录字幕**:\n{transcripts}"
+
+ if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(telegraph_content), author=feed_title, url=entry["link"]):
+ caption += f"\n[🤖总结 & 🔤字幕]({telegraph_url})"
+
+ media = (
+ [
+ {
+ "audio": info["asr_path"],
+ "title": entry["title"],
+ "performer": feed_title,
+ "thumb": info["thumb"],
+ }
+ ]
+ if Path(info["path"]).suffix in [".aac", ".amr", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".wma"]
+ else [{"video": info["path"], "thumb": info["thumb"]}]
+ )
+
+ await send2tg(client, message, texts=caption, media=media, reply_msg_id=-1)
processed_xml = await update_xml_desc(feed_url, processed_xml, entry, summary=gpt_res.get("texts", ""), audio_path=info["asr_path"])
await set_cf_r2(entry["db_key"], data={"title": entry["title"], "url": entry["link"]})
has_update = True