Commit 7cbbd85
Changed files (5)
src/preview/v2ex.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+from datetime import UTC, datetime
+from pathlib import Path
+from zoneinfo import ZoneInfo
+
+from glom import glom
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.types import Message
+
+from config import PROXY, TELEGRAM_UA, TOKEN, TZ
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import summay_media
+from networking import download_file, download_media, hx_req
+from utils import number_to_emoji
+
+
+async def preview_v2ex(client: Client, message: Message, url: str = "", topic_id: str = "", **kwargs):
+ """Preview v2ex link in the message.
+
+ Args:
+ client (Client): The Pyrogram client.
+ message (Message): The trigger message object.
+ url (str, optional): v2ex link
+ db_key (str, optional): The cache key.
+ """
+ if kwargs.get("show_progress") and "progress" not in kwargs:
+ res = await send2tg(client, message, texts=f"🔗正在解析V2ex链接\n{url}", **kwargs)
+ kwargs["progress"] = res[0]
+ logger.info(f"v2ex link preview for {url}")
+ headers = {"Authorization": f"Bearer {TOKEN.V2EX}"}
+ topic_api = f"https://www.v2ex.com/api/v2/topics/{topic_id}"
+ resp = await hx_req(topic_api, proxy=PROXY.V2EX, headers=headers, check_kv={"success": True, "result.id": topic_id})
+ if error := resp.get("error"):
+ await modify_progress(text=f"❌v2ex链接解析失败{url}\n{error}", force_update=True, **kwargs)
+ return
+ author = glom(resp, "result.member.username", default="V2EX_User")
+ author_url = f"https://www.v2ex.com/member/{author}"
+ title = glom(resp, "result.title", default="Title")
+ ts = glom(resp, "result.created", default=0)
+ texts = f"💻[{author}]({author_url})\n"
+ texts += f"🕒{datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(TZ)).strftime('%Y-%m-%d %H:%M:%S')}\n"
+ texts += f"📝[{title}]({url})\n"
+ content, img_urls = extract_and_remove_images_regex(glom(resp, "result.content", default=""))
+ texts += content + "\n"
+ if supplements := glom(resp, "result.supplements", default=[]):
+ for idx, supp in enumerate(supplements):
+ texts += f"\n补充留言{number_to_emoji(idx + 1)}:\n{supp.get('content', '')}\n"
+
+ media = await download_imgs(img_urls)
+ if media:
+ await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
+ media = await download_media(media, **kwargs)
+ await send2tg(client, message, texts=texts, media=media, **kwargs)
+ await modify_progress(del_status=True, **kwargs)
+
+
+def extract_and_remove_images_regex(markdown_text: str) -> tuple[str, list[str]]:
+ """Extract images from markdown text and remove them from the text.
+
+ Returns:
+ tuple[str, list[str]]: The markdown text without images and the extracted image URLs.
+ """
+ image_pattern = r'!\[([^\]]*)\]\((.*?)\s*(".*?")?\)'
+
+ image_urls = re.findall(image_pattern, markdown_text)
+ urls = [url[1].strip() for url in image_urls] # only need urls
+
+ text_without_images = re.sub(image_pattern, "", markdown_text)
+
+ return text_without_images, urls
+
+
+async def download_imgs(img_urls: list[str]) -> list[dict]:
+ """Download images from img_urls."""
+ media = []
+ for img_url in img_urls:
+ # handle imgur.com
+ if img_url.startswith("https://i.imgur.com/"):
+ referer_url = f"https://imgur.com/{Path(img_url).stem}"
+ headers = {"Referer": referer_url, "User-Agent": TELEGRAM_UA}
+ media.append({"photo": download_file(img_url, proxy=PROXY.WARP, headers=headers)})
+ else:
+ media.append({"photo": download_file(img_url, proxy=PROXY.V2EX)})
+ return media
src/config.py
@@ -53,6 +53,7 @@ class ENABLE: # see fine-grained permission in `src/permission.py`
WEIBO = os.getenv("ENABLE_WEIBO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WECHAT = os.getenv("ENABLE_WECHAT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
REDDIT = os.getenv("ENABLE_REDDIT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+ V2EX = os.getenv("ENABLE_V2EX", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WGET = os.getenv("ENABLE_WGET", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
GITHUB = os.getenv("ENABLE_GITHUB", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
MUSIC163 = os.getenv("ENABLE_MUSIC163", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -161,6 +162,7 @@ class TOKEN:
GITHUB = os.getenv("GITHUB_TOKEN", "")
SPOTIFY_CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID", "")
SPOTIFY_CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET", "")
+ V2EX = os.getenv("V2EX_TOKEN", "")
class PROXY: # format: socks5://127.0.0.1:7890
@@ -175,6 +177,7 @@ class PROXY: # format: socks5://127.0.0.1:7890
TIKTOK = os.getenv("TIKTOK_PROXY", None)
INSTAGRAM = os.getenv("INSTAGRAM_PROXY", None)
TWITTER = os.getenv("TWITTER_PROXY", None)
+ WARP = os.getenv("WARP_PROXY", None)
SPOTIFY = os.getenv("SPOTIFY_PROXY", None)
SUBTITLE = os.getenv("SUBTITLE_PROXY", None)
YOUTUBE_SEARCH = os.getenv("YOUTUBE_SEARCH_PROXY", None)
@@ -183,6 +186,7 @@ class PROXY: # format: socks5://127.0.0.1:7890
DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
WEIBO = os.getenv("WEIBO_PROXY", None)
REDDIT = os.getenv("REDDIT_PROXY", None)
+ V2EX = os.getenv("V2EX_PROXY", None)
GITHUB = os.getenv("GITHUB_PROXY", None)
YTDLP = os.getenv("YTDLP_PROXY", None) # general proxy for ytdlp
YTDLP_FALLBACK = os.getenv("YTDLP_PROXY_FALLBACK", None) # fallback proxy for ytdlp
src/handler.py
@@ -34,6 +34,7 @@ from preview.netease import preview_music163
from preview.reddit import preview_reddit
from preview.spotify import preview_spotify
from preview.twitter import preview_twitter
+from preview.v2ex import preview_v2ex
from preview.wechat import preview_wechat
from preview.weibo import preview_weibo
from preview.xiaohongshu import preview_xhs
@@ -158,6 +159,7 @@ async def handle_social_media(
reddit: bool = True,
github: bool = True,
xhs: bool = True,
+ v2ex: bool = True,
music163: bool = True,
spotify: bool = True,
ytdlp: bool = True,
@@ -269,6 +271,8 @@ async def handle_social_media(
return await preview_music163(client, message, **kwargs)
if spotify and matched["platform"] == "spotify":
return await preview_spotify(client, message, **kwargs)
+ if v2ex and matched["platform"] == "v2ex":
+ return await preview_v2ex(client, message, **kwargs)
if matched["platform"].startswith("bilibili-"): # this is not bilibili video, for videos, use yt-dlp
return await preview_bilibili(client, message, **kwargs)
sent_messages = []
@@ -352,6 +356,8 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefix: str):
msg += "\n🎧Spotify"
if permission["reddit"]:
msg += "\n🎈Reddit"
+ if permission["v2ex"]:
+ msg += "\n💻V2EX"
if permission["wechat"]:
msg += "\n🟢微信文章"
if permission["github"]:
src/networking.py
@@ -358,6 +358,12 @@ async def match_social_media_link(text: str, *, flatten_first: bool = True) -> d
url = f"https://github.com/{gh_user}/{gh_repo}"
return {"url": url, "db_key": bare_url(url), "gh_user": gh_user, "gh_repo": gh_repo, "platform": "github"}
+ # https://www.v2ex.com/t/1153086
+ if matched := re.search(r"(https?://)?(www\.)?v2ex\.com/t/(\d+)", text):
+ topic_id = matched.group(3)
+ url = f"https://www.v2ex.com/t/{topic_id}"
+ return {"url": url, "db_key": bare_url(url), "topic_id": topic_id, "platform": "v2ex"}
+
# https://open.spotify.com/track/0cOMncRq4cmDLO4tPQnkBF
if matched := re.search(r"(https?://)?open\.spotify\.com/(:?track|album|artist|playlist)/([a-zA-Z0-9]+)", text):
resource = matched.group(2)
src/permission.py
@@ -116,6 +116,7 @@ def check_service(cid: int | str, ctype: str) -> dict:
"twitter": True,
"weibo": True,
"xhs": True,
+ "v2ex": True,
"music163": True,
"spotify": True,
"github": True,
@@ -153,6 +154,8 @@ def check_service(cid: int | str, ctype: str) -> dict:
permission["instagram"] = False
if not ENABLE.WECHAT:
permission["wechat"] = False
+ if not ENABLE.V2EX:
+ permission["v2ex"] = False
if not ENABLE.REDDIT:
permission["reddit"] = False
if not ENABLE.YTDLP: