Commit 8e86115

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-08-16 07:25:19
chore(podcast): improves new entries checking
1 parent 00b9860
Changed files (1)
src
others
src/others/podcast.py
@@ -49,10 +49,10 @@ async def summary_pods(client: Client):
         feed_xml = load_xml(data["text"])
         save_feed_url = align_opml_url(feed_url)
         data = await hx_req(save_feed_url, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PODCAST.PROXY)
-        save_feed_xml = load_xml(data["text"]) if data.get("text") else feed_xml
+        saved_xml = load_xml(data["text"]) if data.get("text") else feed_xml
         has_update = False
         pod_url = clean_pod_url(feed.feed.link)  # type: ignore
-        for entry in await get_new_entries(feed_title, feed):
+        for entry in await get_new_entries(feed_title, feed, saved_xml):
             message = Message(id=rand_number(), chat=Chat(id=PODCAST.TID))
             enclosure = next((x["href"] for x in entry["links"] if x.get("rel", "") == "enclosure"), "")
             if not enclosure:
@@ -112,7 +112,7 @@ async def summary_pods(client: Client):
                 gpt_res = await gpt_response(client, ai_msg, include_thoughts=False, append_grounding=False, show_progress=True)
                 cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
                 feed_item = match_item(feed_xml, entry)
-                update_item(save_feed_xml, feed_item, prefix_desc=gpt_res.get("texts", ""))
+                update_item(saved_xml, feed_item, prefix_desc=gpt_res.get("texts", ""))
                 await set_cf_r2(entry["db_key"], data={"title": entry["title"], "url": entry["link"], "file": enclosure})
                 has_update = True
             except Exception as e:
@@ -120,7 +120,7 @@ async def summary_pods(client: Client):
                 await send2tg(client, message, texts=f"Failed podcast {feed_title} -- {entry['title']}: {e}", reply_msg_id=-1)
                 continue
         if has_update:
-            await save_xml(save_feed_xml, feed_url)
+            await save_xml(saved_xml, feed_url)
 
     # save opml
     opml = load_xml("", template="opml")
@@ -137,13 +137,17 @@ async def summary_pods(client: Client):
     logger.success("Podcast has been updated.")
 
 
-async def get_new_entries(feed_title: str, remote: dict) -> list[dict]:
+async def get_new_entries(feed_title: str, remote: dict, saved: dict) -> list[dict]:
     """Get new entries from feed."""
     try:
         now = nowdt()
         new_entries = []
+        saved_enclosure_urls = glom(saved, "rss.channel.item.*.enclosure.@url", default=[])
         sorted_entries = sorted(remote["entries"], key=lambda x: x.get("published_parsed", x.get("updated", now)), reverse=True)  # new to old
         for entry in sorted_entries:
+            enclosure = next((x["href"] for x in entry["links"] if x.get("rel", "") == "enclosure"), "")
+            if enclosure in saved_enclosure_urls:
+                continue
             entry["link"] = https_url(clean_pod_url(entry.get("link", "")))
             guid = bare_url(unquote_plus(entry["link"]))
             entry["db_key"] = f"Podcast/{feed_title}/{guid}"