Commit d294f98
Changed files (6)
src/preview/reddit.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import contextlib
+import re
+from datetime import UTC, datetime
+from zoneinfo import ZoneInfo
+
+from glom import glom
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.types import Message
+
+from config import DB, PROXY, TZ
+from database import get_db
+from messages.database import copy_messages_from_db, save_messages
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import summay_media
+from networking import download_file, download_media, hx_req
+from preview.utils import has_markdown_img
+from utils import nowstr
+
+
+async def preview_reddit(client: Client, message: Message, url: str = "", db_key: str = "", **kwargs):
+ """Preview reddit link in the message.
+
+ Args:
+ client (Client): The Pyrogram client.
+ message (Message): The trigger message object.
+ url (str, optional): Reddit link
+ db_key (str, optional): The cache key.
+ """
+ if kwargs.get("show_progress") and "progress" not in kwargs:
+ res = await send2tg(client, message, texts=f"🔗正在解析Reddit链接\n{url}", **kwargs)
+ kwargs["progress"] = res[0]
+ if kv := await get_db(db_key):
+ logger.debug(f"Reddit preview {DB.ENGINE} cache hit for key={db_key}")
+ if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
+ return
+ await modify_progress(text=f"❌从{DB.ENGINE}缓存中转发失败, 尝试重新解析...", **kwargs)
+ logger.info(f"Reddit link preview for {url}")
+
+ post_info = await get_reddit_info(url)
+ if error := post_info.get("error"):
+ await modify_progress(text=f"❌Reddit链接解析失败{url}\n{error}", force_update=True, **kwargs)
+ return
+ sent_messages = await send2tg(client, message, **post_info, **kwargs)
+ await modify_progress(del_status=True, **kwargs)
+ await save_messages(messages=sent_messages, key=db_key)
+
+
+async def get_reddit_info(url: str, **kwargs) -> dict:
+ """Get Reddit post info."""
+ api_url = url + ".json"
+ resp = await hx_req(api_url, proxy=PROXY.REDDIT, check_kv={"0.data.dist": 1, "1.data.children.0.kind": "t1"}, check_keys=["0.data.children.0.data.selftext"], **kwargs)
+ if isinstance(resp, dict) and resp.get("hx_error"):
+ return {"error": resp["hx_error"]}
+ try:
+ data = glom(resp, "0.data.children.0.data")
+ title = data.get("title", "Title")
+ author = data.get("author", "author")
+ author_url = f"https://www.reddit.com/user/{author}"
+ dt = nowstr()
+ with contextlib.suppress(Exception):
+ dt = datetime.fromtimestamp(data["created_utc"], tz=UTC).astimezone(ZoneInfo(TZ))
+ dt = dt.strftime("%Y-%m-%d %H:%M:%S")
+ desc = remove_preview_links(data.get("selftext", "")).strip()
+ texts = f"🎈[{author}]({author_url})\n🕒{dt}\n**📝[{title}]({url})**\n{desc}"
+ media = []
+ if gallery := glom(data, "media_metadata.*", default=[]): # multiple images
+ for img in gallery:
+ ext = img.get("m", "").split("/")[-1] # image/png -> ping
+ img_url = f"https://i.redd.it/{img['id']}.{ext}"
+ media.append({"photo": download_file(img_url, proxy=PROXY.REDDIT, **kwargs)})
+ elif data.get("url", "").startswith("https://i.redd.it/"): # single image
+ media.append({"photo": download_file(data["url"], proxy=PROXY.REDDIT, **kwargs)})
+ if video_url := glom(data, "secure_media.reddit_video.fallback_url", default=""):
+ media.append({"video": download_file(video_url, proxy=PROXY.REDDIT, **kwargs)})
+ comments = []
+ for reply in glom(resp, "1.data.children.*.data"):
+ author = reply.get("author", "author")
+ author_url = f"https://www.reddit.com/user/{author}"
+ comment = reply.get("body", "")
+ if author == "[deleted]":
+ continue
+ if comment == "[removed]" or has_markdown_img(comment):
+ continue
+ comments.append(f"\n💬**[{author}]({author_url})**: {comment}")
+ if comments:
+ comments.insert(0, f"\n{BLOCKQUOTE_EXPANDABLE_DELIM}💬**点此展开评论区**:")
+ await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
+ media = await download_media(media, **kwargs)
+ except Exception as e:
+ logger.error(e)
+ return {"error": str(e)}
+ return {"texts": texts, "media": media, "comments": comments}
+
+
+def remove_preview_links(text: str) -> str:
+ """Remove the preview.redd.it links in the post contents."""
+ pattern = r"https?://preview\.redd\.it/\S+\s"
+ return re.sub(pattern, "", text)
src/preview/utils.py
@@ -79,3 +79,12 @@ def make_bvid_clickable(texts: str) -> str:
# match bilibili links or bvid only
pattern = r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/(BV1[a-zA-Z0-9]{9})\b|\bBV1[a-zA-Z0-9]{9}\b"
return re.sub(pattern, markdown_url, texts)
+
+
+def has_markdown_img(text: str) -> bool:
+ """Check if the text contains markdown img format.
+
+ 
+ """
+ pattern = r"!\[.*?\]\(.*?\)"
+ return bool(re.search(pattern, text))
src/config.py
@@ -49,6 +49,7 @@ class ENABLE: # see fine-grained permission in `src/permission.py`
TWITTER = os.getenv("ENABLE_TWITTER", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WEIBO = os.getenv("ENABLE_WEIBO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WECHAT = os.getenv("ENABLE_WECHAT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+ REDDIT = os.getenv("ENABLE_REDDIT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WGET = os.getenv("ENABLE_WGET", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
XHS = os.getenv("ENABLE_XHS", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
YTDLP = os.getenv("ENABLE_YTDLP", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -135,6 +136,7 @@ class PROXY: # format: socks5://127.0.0.1:7890
GOOGLE_SEARCH = os.getenv("GOOGLE_SEARCH_PROXY", None)
DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
WEIBO = os.getenv("WEIBO_PROXY", None)
+ REDDIT = os.getenv("REDDIT_PROXY", None)
YTDLP = os.getenv("YTDLP_PROXY", None) # general proxy for ytdlp
YTDLP_FALLBACK = os.getenv("YTDLP_PROXY_FALLBACK", None) # fallback proxy for ytdlp
# for ytdlp proxy of specific sites (Like Bilibili), use this format: YTDLP_PROXY_BILIBILI
src/handler.py
@@ -26,6 +26,7 @@ from permission import check_service
from preview.bilibili import preview_bilibili
from preview.douyin import preview_douyin
from preview.instagram import preview_instagram
+from preview.reddit import preview_reddit
from preview.twitter import preview_twitter
from preview.wechat import preview_wechat
from preview.weibo import preview_weibo
@@ -119,6 +120,7 @@ async def handle_social_media(
instagram: bool = True,
twitter: bool = True,
weibo: bool = True,
+ reddit: bool = True,
xhs: bool = True,
ytdlp: bool = True,
show_progress: bool = True,
@@ -226,6 +228,8 @@ async def handle_social_media(
await preview_xhs(client, message, **kwargs)
if xhs and matched["platform"] == "wechat":
await preview_wechat(client, message, **kwargs)
+ if reddit and matched["platform"] == "reddit":
+ await preview_reddit(client, message, **kwargs)
if matched["platform"].startswith("bilibili-"): # this is not bilibili video, for videos, use yt-dlp
await preview_bilibili(client, message, **kwargs)
try:
@@ -302,8 +306,10 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefixes: list[str] |
msg += "\n🎶TikTok"
if permission["instagram"]:
msg += "\n🏞Instagram"
+ if permission["reddit"]:
+ msg += "\n🎈Reddit"
if permission["wechat"]:
- msg += "\n🟢微信公众号文章"
+ msg += "\n🟢微信文章"
if permission["ytdlp"]:
msg += "\n🔴油管"
msg += "\n🅱️哔哩哔哩"
src/networking.py
@@ -361,6 +361,15 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
if matched := re.search(r"(https?://)?mp.weixin.qq.com/s[\/|\?]{1}([_A-Za-z\=\&0-9\#\-]+)", text):
return {"url": matched.group(0), "db_key": bare_url(matched.group(0)), "platform": "wechat"}
+ # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/%E8%B5%B5%E8%96%87%E4%BB%80%E4%B9%88%E6%97%B6%E5%80%99%E5%9B%9E%E6%9D%A5/
+ # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/赵薇什么时候回来
+ # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/comment/mk43l4t/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button
+ if matched := re.search(r"(https?://)?(:?m\.|www\.)?reddit\.com/r/([_A-Za-z0-9]+)/comments/(.*?)/([^,,.。\?\s]+)", text):
+ return {"url": matched.group(0).rstrip("/"), "db_key": bare_url(matched.group(0).rstrip("/")), "platform": "reddit"}
+ # https://reddit.com/comments/1kaazzn
+ if matched := re.search(r"(https?://)?(:?m\.|www\.)?reddit\.com/comments/([_A-Za-z0-9]+)", text):
+ return {"url": matched.group(0).rstrip("/"), "db_key": bare_url(matched.group(0).rstrip("/")), "platform": "reddit"}
+
# if all above pre-defined patterns failed, try to match ytdlp link
if urls := match_urls(text):
for url in urls:
@@ -444,7 +453,8 @@ if __name__ == "__main__":
check_data(json.dumps({"foo": "bar", "baz": {"qux": "quux"}, "lst": ["1", "2", "3"]}), check_keys=["baz.qux"], check_kv={"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]})
# asyncio.run(match_social_media_link("https://b23.tv/3MSgT4q/", flatten_first=True))
- print(asyncio.run(match_social_media_link("https://mp.weixin.qq.com/s/bd_giuPEyPBu9LTOtC2VHw", flatten_first=True)))
+ # print(asyncio.run(match_social_media_link("https://mp.weixin.qq.com/s/bd_giuPEyPBu9LTOtC2VHw", flatten_first=True)))
+ print(asyncio.run(match_social_media_link("https://reddit.com/comments/1kaazzn", flatten_first=True)))
# asyncio.run(match_social_media_link("https://www.facebook.com/share/r/19QGGp39T3/", flatten_first=True))
# asyncio.run(match_social_media_link("https://www.douyin.com/video/7398813386827468041"))
# asyncio.run(match_social_media_link("https://www.iesdouyin.com/share/note/7454527270925946138/"))
src/permission.py
@@ -113,6 +113,7 @@ def check_service(cid: int | str, ctype: str) -> dict:
"weibo": True,
"xhs": True,
"wechat": True,
+ "reddit": True,
"ytdlp": True,
}
@@ -135,6 +136,8 @@ def check_service(cid: int | str, ctype: str) -> dict:
permission["instagram"] = False
if not ENABLE.WECHAT:
permission["wechat"] = False
+ if not ENABLE.REDDIT:
+ permission["reddit"] = False
if not ENABLE.YTDLP:
permission["ytdlp"] = False
if not ENABLE.GPT: