Commit 8e86115
Changed files (1)
src
others
src/others/podcast.py
@@ -49,10 +49,10 @@ async def summary_pods(client: Client):
feed_xml = load_xml(data["text"])
save_feed_url = align_opml_url(feed_url)
data = await hx_req(save_feed_url, rformat="text", headers=HEADERS, timeout=10, silent=True, proxy=PODCAST.PROXY)
- save_feed_xml = load_xml(data["text"]) if data.get("text") else feed_xml
+ saved_xml = load_xml(data["text"]) if data.get("text") else feed_xml
has_update = False
pod_url = clean_pod_url(feed.feed.link) # type: ignore
- for entry in await get_new_entries(feed_title, feed):
+ for entry in await get_new_entries(feed_title, feed, saved_xml):
message = Message(id=rand_number(), chat=Chat(id=PODCAST.TID))
enclosure = next((x["href"] for x in entry["links"] if x.get("rel", "") == "enclosure"), "")
if not enclosure:
@@ -112,7 +112,7 @@ async def summary_pods(client: Client):
gpt_res = await gpt_response(client, ai_msg, include_thoughts=False, append_grounding=False, show_progress=True)
cache.delete(f"parse_msg-{txt_msg.chat.id}-{txt_msg.id}")
feed_item = match_item(feed_xml, entry)
- update_item(save_feed_xml, feed_item, prefix_desc=gpt_res.get("texts", ""))
+ update_item(saved_xml, feed_item, prefix_desc=gpt_res.get("texts", ""))
await set_cf_r2(entry["db_key"], data={"title": entry["title"], "url": entry["link"], "file": enclosure})
has_update = True
except Exception as e:
@@ -120,7 +120,7 @@ async def summary_pods(client: Client):
await send2tg(client, message, texts=f"Failed podcast {feed_title} -- {entry['title']}: {e}", reply_msg_id=-1)
continue
if has_update:
- await save_xml(save_feed_xml, feed_url)
+ await save_xml(saved_xml, feed_url)
# save opml
opml = load_xml("", template="opml")
@@ -137,13 +137,17 @@ async def summary_pods(client: Client):
logger.success("Podcast has been updated.")
-async def get_new_entries(feed_title: str, remote: dict) -> list[dict]:
+async def get_new_entries(feed_title: str, remote: dict, saved: dict) -> list[dict]:
"""Get new entries from feed."""
try:
now = nowdt()
new_entries = []
+ saved_enclosure_urls = glom(saved, "rss.channel.item.*.enclosure.@url", default=[])
sorted_entries = sorted(remote["entries"], key=lambda x: x.get("published_parsed", x.get("updated", now)), reverse=True) # new to old
for entry in sorted_entries:
+ enclosure = next((x["href"] for x in entry["links"] if x.get("rel", "") == "enclosure"), "")
+ if enclosure in saved_enclosure_urls:
+ continue
entry["link"] = https_url(clean_pod_url(entry.get("link", "")))
guid = bare_url(unquote_plus(entry["link"]))
entry["db_key"] = f"Podcast/{feed_title}/{guid}"