Commit 4caa9a4
Changed files (4)
src
src/others/podcast.py
@@ -26,7 +26,7 @@ from llm.utils import convert_html, convert_md, remove_consecutive_newlines
from messages.sender import send2tg
from networking import download_file, hx_req
from publish import publish_telegraph
-from utils import bare_url, count_subtitles, https_url, nowdt, rand_number, rand_string
+from utils import bare_url, count_subtitles, https_url, nowdt, rand_number, rand_string, readable_time
HEADERS = {
"User-Agent": "feedparser/6.0.11 +https://github.com/kurtmckee/feedparser/",
@@ -79,16 +79,16 @@ async def summary_pods(client: Client):
struct_time = entry["published_parsed"]
dt = datetime(*struct_time[:6], tzinfo=UTC).astimezone(ZoneInfo(TZ))
pubdate = f"{dt:%Y-%m-%d %H:%M:%S}"
- audio_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {entry['itunes_duration']}\n📖简介: {desc}"
+ audio_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {readable_time(entry['itunes_duration'])}\n📖简介: {desc}"
media = [{"video": path, "thumb": thumb}] if Path(path).suffix in AUDIO_EXT else [{"audio": path, "title": entry["title"], "performer": feed_title, "thumb": thumb}]
- prompt = f"请转录播客栏目《{feed_title}》的一期节目的音频。\n该期节目标题: {entry['title']}\n节目时长: {entry['itunes_duration']}\n节目简介: {desc}"
+ prompt = f"请转录播客栏目《{feed_title}》的一期节目的音频。\n该期节目标题: {entry['title']}\n节目时长: {readable_time(entry['itunes_duration'])}\n节目简介: {desc}"
engine = get_pod_asr_engine(feed_title, feed_url)
asr_res = await asr_file(asr_path, prompt=prompt, engine=engine, client=client, message=message, silent=True)
if asr_res.get("error") or len(asr_res.get("texts", "")) == 0:
return
subtitles = asr_res.get("texts", "")
- subtitle_caption = f"🎧播客名称: [{feed_title}]({pod_url})\n📝节目标题: [{entry['title']}]({entry['link']})\n🕒发布日期: {pubdate}\n"
- subtitle_caption += f"⏳节目时长: {entry['itunes_duration']}\n#️⃣文本字数: {count_subtitles(subtitles)}\n⏳阅读时长: {count_subtitles(subtitles) / READING_SPEED:.1f}分钟"
+ subtitle_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {readable_time(entry['itunes_duration'])}"
+ subtitle_caption += f"\n#️⃣字数: {count_subtitles(subtitles)}\n⏳阅读: {readable_time(60 * count_subtitles(subtitles) / READING_SPEED)}"
if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(audio_caption + subtitles), author=feed_title, url=entry["link"]):
subtitle_caption += f"\n⚡️[即时预览]({telegraph_url})"
await send2tg(client, message, texts=remove_img(audio_caption), media=media, reply_msg_id=-1) # Telegram DO NOT allow img tag in messages
@@ -96,7 +96,7 @@ async def summary_pods(client: Client):
await client.send_document(message.chat.id, f, file_name=f"{entry['title']}.txt", caption=subtitle_caption)
prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
- prompt += f"\n节目时长: {entry['itunes_duration']}\n节目简介: {desc}"
+ prompt += f"\n节目时长: {readable_time(entry['itunes_duration'])}\n节目简介: {desc}"
prompt += "\n请解读该播客内容, 只需关注内容本身, 不用概述播客的基本信息, 例如播客的标题, 日期, 时长等"
# Construct a message to call GPT
ai_msg = Message(
@@ -143,6 +143,7 @@ async def get_new_entries(feed_title: str, remote: dict) -> list[dict]:
guid = bare_url(unquote_plus(entry["link"]))
entry["db_key"] = f"Podcast/{feed_title}/{guid}"
entry["title"] = entry.get("title", "")
+ entry["itunes_duration"] = glom(entry, Coalesce("itunes_duration", "duration"), default="0")
struct_time = entry["published_parsed"]
dt = datetime(*struct_time[:6], tzinfo=UTC).astimezone(ZoneInfo(TZ))
delta = now - dt
src/preview/ytdlp.py
@@ -269,7 +269,7 @@ async def preview_ytdlp(
if subtitles:
if len(subtitles) > TEXT_LENGTH or transcription_force_file:
caption = f"{emoji}[{info['author']}]({info['author_url']})\n🕒{create_time}"
- caption += f"\n📝[{info['title']}]({url})\n#️⃣字符数: {count_subtitles(subtitles)}\n⏳阅读时长: {count_subtitles(subtitles) / READING_SPEED:.1f}分钟"
+ caption += f"\n📝[{info['title']}]({url})\n#️⃣字符数: {count_subtitles(subtitles)}\n⏳阅读时长: {readable_time(60 * count_subtitles(subtitles) / READING_SPEED)}"
if to_telegraph:
html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
src/subtitles/subtitle.py
@@ -23,7 +23,7 @@ from preview.utils import fetch_youtube_video_info, get_bilibili_video_info
from preview.ytdlp import preview_ytdlp
from publish import publish_telegraph
from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, rand_number, to_int
+from utils import count_subtitles, rand_number, readable_time, to_int
HELP = f"""📃**提取字幕**
使用说明:
@@ -120,7 +120,7 @@ async def get_subtitle(
return
logger.success(subtitles)
caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
- caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {res['reading_minutes']:.1f}分钟"
+ caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {readable_time(60 * res['reading_minutes'])}"
if to_telegraph:
html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
src/utils.py
@@ -196,7 +196,11 @@ def stringfy(d: dict) -> dict:
def readable_time(seconds: str | float) -> str:
"""Human readable time duration."""
- seconds = float(seconds)
+ try:
+ seconds = float(seconds)
+ except ValueError:
+ # already in reachable time
+ return str(seconds)
if seconds < 60:
return f"{seconds:.0f}s"
if seconds < 3600: