Commit 7cbbd85

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-08-19 14:27:52
feat(v2ex): add `v2ex` link preview support
1 parent 67c390f
src/preview/v2ex.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+from datetime import UTC, datetime
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+from glom import glom
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import PROXY, TELEGRAM_UA, TOKEN, TZ
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import summay_media
+from networking import download_file, download_media, hx_req
+from utils import number_to_emoji
+
+
+async def preview_v2ex(client: Client, message: Message, url: str = "", topic_id: str = "", **kwargs):
+    """Preview v2ex link in the message.
+
+    Args:
+        client (Client): The Pyrogram client.
+        message (Message): The trigger message object.
+        url (str, optional): v2ex link
+        db_key (str, optional): The cache key.
+    """
+    if kwargs.get("show_progress") and "progress" not in kwargs:
+        res = await send2tg(client, message, texts=f"🔗正在解析V2ex链接\n{url}", **kwargs)
+        kwargs["progress"] = res[0]
+    logger.info(f"v2ex link preview for {url}")
+    headers = {"Authorization": f"Bearer {TOKEN.V2EX}"}
+    topic_api = f"https://www.v2ex.com/api/v2/topics/{topic_id}"
+    resp = await hx_req(topic_api, proxy=PROXY.V2EX, headers=headers, check_kv={"success": True, "result.id": topic_id})
+    if error := resp.get("error"):
+        await modify_progress(text=f"❌v2ex链接解析失败{url}\n{error}", force_update=True, **kwargs)
+        return
+    author = glom(resp, "result.member.username", default="V2EX_User")
+    author_url = f"https://www.v2ex.com/member/{author}"
+    title = glom(resp, "result.title", default="Title")
+    ts = glom(resp, "result.created", default=0)
+    texts = f"💻[{author}]({author_url})\n"
+    texts += f"🕒{datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(TZ)).strftime('%Y-%m-%d %H:%M:%S')}\n"
+    texts += f"📝[{title}]({url})\n"
+    content, img_urls = extract_and_remove_images_regex(glom(resp, "result.content", default=""))
+    texts += content + "\n"
+    if supplements := glom(resp, "result.supplements", default=[]):
+        for idx, supp in enumerate(supplements):
+            texts += f"\n补充留言{number_to_emoji(idx + 1)}:\n{supp.get('content', '')}\n"
+
+    media = await download_imgs(img_urls)
+    if media:
+        await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
+    media = await download_media(media, **kwargs)
+    await send2tg(client, message, texts=texts, media=media, **kwargs)
+    await modify_progress(del_status=True, **kwargs)
+
+
+def extract_and_remove_images_regex(markdown_text: str) -> tuple[str, list[str]]:
+    """Extract images from markdown text and remove them from the text.
+
+    Returns:
+        tuple[str, list[str]]: The markdown text without images and the extracted image URLs.
+    """
+    image_pattern = r'!\[([^\]]*)\]\((.*?)\s*(".*?")?\)'
+
+    image_urls = re.findall(image_pattern, markdown_text)
+    urls = [url[1].strip() for url in image_urls]  # only need urls
+
+    text_without_images = re.sub(image_pattern, "", markdown_text)
+
+    return text_without_images, urls
+
+
+async def download_imgs(img_urls: list[str]) -> list[dict]:
+    """Download images from img_urls."""
+    media = []
+    for img_url in img_urls:
+        # handle imgur.com
+        if img_url.startswith("https://i.imgur.com/"):
+            referer_url = f"https://imgur.com/{Path(img_url).stem}"
+            headers = {"Referer": referer_url, "User-Agent": TELEGRAM_UA}
+            media.append({"photo": download_file(img_url, proxy=PROXY.WARP, headers=headers)})
+        else:
+            media.append({"photo": download_file(img_url, proxy=PROXY.V2EX)})
+    return media
src/config.py
@@ -53,6 +53,7 @@ class ENABLE:  # see fine-grained permission in `src/permission.py`
     WEIBO = os.getenv("ENABLE_WEIBO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     WECHAT = os.getenv("ENABLE_WECHAT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     REDDIT = os.getenv("ENABLE_REDDIT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+    V2EX = os.getenv("ENABLE_V2EX", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     WGET = os.getenv("ENABLE_WGET", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     GITHUB = os.getenv("ENABLE_GITHUB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     MUSIC163 = os.getenv("ENABLE_MUSIC163", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -161,6 +162,7 @@ class TOKEN:
     GITHUB = os.getenv("GITHUB_TOKEN", "")
     SPOTIFY_CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID", "")
     SPOTIFY_CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET", "")
+    V2EX = os.getenv("V2EX_TOKEN", "")
 
 
 class PROXY:  # format: socks5://127.0.0.1:7890
@@ -175,6 +177,7 @@ class PROXY:  # format: socks5://127.0.0.1:7890
     TIKTOK = os.getenv("TIKTOK_PROXY", None)
     INSTAGRAM = os.getenv("INSTAGRAM_PROXY", None)
     TWITTER = os.getenv("TWITTER_PROXY", None)
+    WARP = os.getenv("WARP_PROXY", None)
     SPOTIFY = os.getenv("SPOTIFY_PROXY", None)
     SUBTITLE = os.getenv("SUBTITLE_PROXY", None)
     YOUTUBE_SEARCH = os.getenv("YOUTUBE_SEARCH_PROXY", None)
@@ -183,6 +186,7 @@ class PROXY:  # format: socks5://127.0.0.1:7890
     DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
     WEIBO = os.getenv("WEIBO_PROXY", None)
     REDDIT = os.getenv("REDDIT_PROXY", None)
+    V2EX = os.getenv("V2EX_PROXY", None)
     GITHUB = os.getenv("GITHUB_PROXY", None)
     YTDLP = os.getenv("YTDLP_PROXY", None)  # general proxy for ytdlp
     YTDLP_FALLBACK = os.getenv("YTDLP_PROXY_FALLBACK", None)  # fallback proxy for ytdlp
src/handler.py
@@ -34,6 +34,7 @@ from preview.netease import preview_music163
 from preview.reddit import preview_reddit
 from preview.spotify import preview_spotify
 from preview.twitter import preview_twitter
+from preview.v2ex import preview_v2ex
 from preview.wechat import preview_wechat
 from preview.weibo import preview_weibo
 from preview.xiaohongshu import preview_xhs
@@ -158,6 +159,7 @@ async def handle_social_media(
     reddit: bool = True,
     github: bool = True,
     xhs: bool = True,
+    v2ex: bool = True,
     music163: bool = True,
     spotify: bool = True,
     ytdlp: bool = True,
@@ -269,6 +271,8 @@ async def handle_social_media(
             return await preview_music163(client, message, **kwargs)
         if spotify and matched["platform"] == "spotify":
             return await preview_spotify(client, message, **kwargs)
+        if v2ex and matched["platform"] == "v2ex":
+            return await preview_v2ex(client, message, **kwargs)
         if matched["platform"].startswith("bilibili-"):  # this is not bilibili video, for videos, use yt-dlp
             return await preview_bilibili(client, message, **kwargs)
         sent_messages = []
@@ -352,6 +356,8 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefix: str):
         msg += "\n🎧Spotify"
     if permission["reddit"]:
         msg += "\n🎈Reddit"
+    if permission["v2ex"]:
+        msg += "\n💻V2EX"
     if permission["wechat"]:
         msg += "\n🟢微信文章"
     if permission["github"]:
src/networking.py
@@ -358,6 +358,12 @@ async def match_social_media_link(text: str, *, flatten_first: bool = True) -> d
         url = f"https://github.com/{gh_user}/{gh_repo}"
         return {"url": url, "db_key": bare_url(url), "gh_user": gh_user, "gh_repo": gh_repo, "platform": "github"}
 
+    # https://www.v2ex.com/t/1153086
+    if matched := re.search(r"(https?://)?(www\.)?v2ex\.com/t/(\d+)", text):
+        topic_id = matched.group(3)
+        url = f"https://www.v2ex.com/t/{topic_id}"
+        return {"url": url, "db_key": bare_url(url), "topic_id": topic_id, "platform": "v2ex"}
+
     # https://open.spotify.com/track/0cOMncRq4cmDLO4tPQnkBF
     if matched := re.search(r"(https?://)?open\.spotify\.com/(:?track|album|artist|playlist)/([a-zA-Z0-9]+)", text):
         resource = matched.group(2)
src/permission.py
@@ -116,6 +116,7 @@ def check_service(cid: int | str, ctype: str) -> dict:
         "twitter": True,
         "weibo": True,
         "xhs": True,
+        "v2ex": True,
         "music163": True,
         "spotify": True,
         "github": True,
@@ -153,6 +154,8 @@ def check_service(cid: int | str, ctype: str) -> dict:
         permission["instagram"] = False
     if not ENABLE.WECHAT:
         permission["wechat"] = False
+    if not ENABLE.V2EX:
+        permission["v2ex"] = False
     if not ENABLE.REDDIT:
         permission["reddit"] = False
     if not ENABLE.YTDLP: