Commit 4caa9a4

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-26 11:06:48
style(time): improve podcast and subtitle reading time
1 parent d4460d1
Changed files (4)
src
others
preview
subtitles
src/others/podcast.py
@@ -26,7 +26,7 @@ from llm.utils import convert_html, convert_md, remove_consecutive_newlines
 from messages.sender import send2tg
 from networking import download_file, hx_req
 from publish import publish_telegraph
-from utils import bare_url, count_subtitles, https_url, nowdt, rand_number, rand_string
+from utils import bare_url, count_subtitles, https_url, nowdt, rand_number, rand_string, readable_time
 
 HEADERS = {
     "User-Agent": "feedparser/6.0.11 +https://github.com/kurtmckee/feedparser/",
@@ -79,16 +79,16 @@ async def summary_pods(client: Client):
                 struct_time = entry["published_parsed"]
                 dt = datetime(*struct_time[:6], tzinfo=UTC).astimezone(ZoneInfo(TZ))
                 pubdate = f"{dt:%Y-%m-%d %H:%M:%S}"
-                audio_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {entry['itunes_duration']}\n📖简介: {desc}"
+                audio_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {readable_time(entry['itunes_duration'])}\n📖简介: {desc}"
                 media = [{"video": path, "thumb": thumb}] if Path(path).suffix in AUDIO_EXT else [{"audio": path, "title": entry["title"], "performer": feed_title, "thumb": thumb}]
-                prompt = f"请转录播客栏目《{feed_title}》的一期节目的音频。\n该期节目标题: {entry['title']}\n节目时长: {entry['itunes_duration']}\n节目简介: {desc}"
+                prompt = f"请转录播客栏目《{feed_title}》的一期节目的音频。\n该期节目标题: {entry['title']}\n节目时长: {readable_time(entry['itunes_duration'])}\n节目简介: {desc}"
                 engine = get_pod_asr_engine(feed_title, feed_url)
                 asr_res = await asr_file(asr_path, prompt=prompt, engine=engine, client=client, message=message, silent=True)
                 if asr_res.get("error") or len(asr_res.get("texts", "")) == 0:
                     return
                 subtitles = asr_res.get("texts", "")
-                subtitle_caption = f"🎧播客名称: [{feed_title}]({pod_url})\n📝节目标题: [{entry['title']}]({entry['link']})\n🕒发布日期: {pubdate}\n"
-                subtitle_caption += f"⏳节目时长: {entry['itunes_duration']}\n#️⃣文本字数: {count_subtitles(subtitles)}\n⏳阅读时长: {count_subtitles(subtitles) / READING_SPEED:.1f}分钟"
+                subtitle_caption = f"🎧播客: [{feed_title}]({pod_url})\n📝标题: [{entry['title']}]({entry['link']})\n🕒日期: {pubdate}\n⏳时长: {readable_time(entry['itunes_duration'])}"
+                subtitle_caption += f"\n#️⃣字数: {count_subtitles(subtitles)}\n⏳阅读: {readable_time(60 * count_subtitles(subtitles) / READING_SPEED)}"
                 if telegraph_url := await publish_telegraph(title=entry["title"], html=convert_html(audio_caption + subtitles), author=feed_title, url=entry["link"]):
                     subtitle_caption += f"\n⚡️[即时预览]({telegraph_url})"
                 await send2tg(client, message, texts=remove_img(audio_caption), media=media, reply_msg_id=-1)  # Telegram DO NOT allow img tag in messages
@@ -96,7 +96,7 @@ async def summary_pods(client: Client):
                     await client.send_document(message.chat.id, f, file_name=f"{entry['title']}.txt", caption=subtitle_caption)
 
                 prompt = f"这是播客栏目《{feed_title}》的一期节目详情:\n节目标题: {entry['title']}\n节目播出日期: {pubdate}"
-                prompt += f"\n节目时长: {entry['itunes_duration']}\n节目简介: {desc}"
+                prompt += f"\n节目时长: {readable_time(entry['itunes_duration'])}\n节目简介: {desc}"
                 prompt += "\n请解读该播客内容, 只需关注内容本身, 不用概述播客的基本信息, 例如播客的标题, 日期, 时长等"
                 # Construct a message to call GPT
                 ai_msg = Message(
@@ -143,6 +143,7 @@ async def get_new_entries(feed_title: str, remote: dict) -> list[dict]:
             guid = bare_url(unquote_plus(entry["link"]))
             entry["db_key"] = f"Podcast/{feed_title}/{guid}"
             entry["title"] = entry.get("title", "")
+            entry["itunes_duration"] = glom(entry, Coalesce("itunes_duration", "duration"), default="0")
             struct_time = entry["published_parsed"]
             dt = datetime(*struct_time[:6], tzinfo=UTC).astimezone(ZoneInfo(TZ))
             delta = now - dt
src/preview/ytdlp.py
@@ -269,7 +269,7 @@ async def preview_ytdlp(
         if subtitles:
             if len(subtitles) > TEXT_LENGTH or transcription_force_file:
                 caption = f"{emoji}[{info['author']}]({info['author_url']})\n🕒{create_time}"
-                caption += f"\n📝[{info['title']}]({url})\n#️⃣字符数: {count_subtitles(subtitles)}\n⏳阅读时长: {count_subtitles(subtitles) / READING_SPEED:.1f}分钟"
+                caption += f"\n📝[{info['title']}]({url})\n#️⃣字符数: {count_subtitles(subtitles)}\n⏳阅读时长: {readable_time(60 * count_subtitles(subtitles) / READING_SPEED)}"
                 if to_telegraph:
                     html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
                     if telegraph_url := await publish_telegraph(title=info["title"], html=html, author=info["author"], url=url):
src/subtitles/subtitle.py
@@ -23,7 +23,7 @@ from preview.utils import fetch_youtube_video_info, get_bilibili_video_info
 from preview.ytdlp import preview_ytdlp
 from publish import publish_telegraph
 from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, rand_number, to_int
+from utils import count_subtitles, rand_number, readable_time, to_int
 
 HELP = f"""📃**提取字幕**
 使用说明:
@@ -120,7 +120,7 @@ async def get_subtitle(
             return
         logger.success(subtitles)
         caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['date']:%Y-%m-%d %H:%M:%S}\n"
-        caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {res['reading_minutes']:.1f}分钟"
+        caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {readable_time(60 * res['reading_minutes'])}"
         if to_telegraph:
             html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
             if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
src/utils.py
@@ -196,7 +196,11 @@ def stringfy(d: dict) -> dict:
 
 def readable_time(seconds: str | float) -> str:
     """Human readable time duration."""
-    seconds = float(seconds)
+    try:
+        seconds = float(seconds)
+    except ValueError:
+        # already in reachable time
+        return str(seconds)
     if seconds < 60:
         return f"{seconds:.0f}s"
     if seconds < 3600: