Commit c320e53
Changed files (9)
src/messages/database.py
@@ -73,6 +73,10 @@ async def save_messages(messages: list[Message | None], key: str, metadata: dict
logger.trace(f"Saving text message {msg.id}")
data.append({"type": "text", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
continue
+ if info["mtype"] == "document":
+ logger.trace(f"Saving document message {msg.id}")
+ data.append({"type": "document", "cid": msg.chat.id, "mid": msg.id} | msg_extra)
+ continue
logger.warning(f"Skip save message {msg.id} to {DB.ENGINE} due to unknown type: {msg}")
if data:
return await set_db(key, metadata=metadata, data={"data": data})
src/preview/wechat.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import contextlib
+from pathlib import Path
+from urllib.parse import quote_plus
+
+from loguru import logger
+from pyrogram.client import Client
+from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM, BLOCKQUOTE_EXPANDABLE_END_DELIM
+from pyrogram.types import Message
+from telegraph.aio import Telegraph
+
+from config import API, CAPTION_LENGTH, DB, DOWNLOAD_DIR, PROXY, TEXT_LENGTH, TOKEN
+from database import get_db
+from messages.database import copy_messages_from_db, save_messages
+from messages.progress import modify_progress
+from messages.sender import send2tg
+from messages.utils import count_without_entities, summay_media
+from networking import download_file, download_media, hx_req
+from utils import nowstr, rand_string
+
+
+async def preview_wechat(client: Client, message: Message, url: str = "", db_key: str = "", **kwargs):
+ """Preview wechat link in the message.
+
+ Args:
+ client (Client): The Pyrogram client.
+ message (Message): The trigger message object.
+ url (str, optional): wechat link
+ db_key (str, optional): The cache key.
+ """
+ if kwargs.get("show_progress") and "progress" not in kwargs:
+ res = await send2tg(client, message, texts=f"🔗正在解析微信链接\n{url}", **kwargs)
+ kwargs["progress"] = res[0]
+ if kv := await get_db(db_key):
+ logger.debug(f"WeChat preview {DB.ENGINE} cache hit for key={db_key}")
+ if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
+ return
+ await modify_progress(text=f"❌从{DB.ENGINE}缓存中转发失败, 尝试重新解析...", **kwargs)
+ logger.info(f"WeChat link preview for {url}")
+
+ post_info = await get_wechat_info(url)
+ if error := post_info.get("error"):
+ await modify_progress(text=f"❌微信链接解析失败{url}\n{error}", force_update=True, **kwargs)
+ return
+ sent_messages = []
+ length = await count_without_entities(post_info["header"] + post_info["markdown"])
+ if not post_info.get("media"): # 无图片
+ if length < TEXT_LENGTH - 8: # 无图片短文
+ texts = f"{post_info['header']}\n{BLOCKQUOTE_EXPANDABLE_DELIM}{post_info['markdown']}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}"
+ sent_messages.extend(await send2tg(client, message, texts=texts, **kwargs))
+ else: # 无图片长文
+ texts = f"{post_info['header']}"
+ telegraph_url = await publish_telegraph(title=post_info["title"], html=post_info["html"], author=post_info["author"], url=url)
+ if telegraph_url:
+ texts += f"\n[⚡️点击此处即时预览]({telegraph_url})"
+ sent_messages.extend(await send2tg(client, message, texts=texts, media=[{"document": post_info["html_path"]}], **kwargs))
+ elif length < CAPTION_LENGTH - 8: # 有图片短文
+ texts = f"{post_info['header']}\n{BLOCKQUOTE_EXPANDABLE_DELIM}{post_info['markdown']}\n{BLOCKQUOTE_EXPANDABLE_END_DELIM}"
+ sent_messages.extend(await send2tg(client, message, texts=texts, media=post_info["media"], **kwargs))
+ else: # 有图片长文
+ texts = f"{post_info['header']}"
+ telegraph_url = await publish_telegraph(title=post_info["title"], html=post_info["html"], author=post_info["author"], url=url)
+ if telegraph_url:
+ texts += f"\n**⚡️[点击此处即时预览]({telegraph_url})**"
+ sent_messages.extend(await send2tg(client, message, texts=texts, media=[{"document": post_info["path"]}], **kwargs))
+ kwargs["reply_msg_id"] = -1 # do not send as reply
+ sent_messages.extend(await send2tg(client, message, texts=texts, media=post_info["media"], **kwargs))
+ await modify_progress(del_status=True, **kwargs)
+ await save_messages(messages=sent_messages, key=db_key)
+
+
+async def get_wechat_info(url: str, **kwargs) -> dict:
+ """Get WeChat post info."""
+ api_url = API.TIKHUB_WECHAT + quote_plus(url)
+ logger.info(f"Preview WeChat TikHub for {api_url}")
+ headers = {"authorization": f"Bearer {TOKEN.TIKHUB}", "accept": "application/json"}
+ resp = await hx_req(api_url, headers=headers, check_keys=["data.content.raw_content", "data.title"], check_kv={"code": 200})
+ if resp.get("hx_error"):
+ return {"error": resp["hx_error"]}
+
+ try:
+ data = resp["data"]
+ title = data["title"]
+ author = data.get("author", "author")
+ dt = nowstr()
+ with contextlib.suppress(Exception):
+ dt = data["datetime"] # 2025-04-28T06:12:35.833830
+ dt = dt[:19].replace("T", " ") # 2025-04-28 06:12:35
+ header = f"🟢[{author}]({url})\n🕒{dt}\n**📝{title}**"
+ media = []
+ htmls = ""
+ texts = ""
+ markdowns = ""
+ for tag in data["content"]["raw_content"]:
+ html = ""
+ if text := tag.get("text", ""):
+ html = f"<h3>{text}</h3>" if tag.get("type", "") == "section" else f"<p>{text}</p>"
+ markdown = f"\n\n**{text}**" if tag.get("type", "") == "section" else f"\n{text}"
+ text = f"\n\n{text}" if tag.get("type", "") == "section" else f"\n{text}"
+ htmls += f"<br>{html}"
+ markdowns += f"\n{markdown}"
+ texts += f"\n{text}"
+ if images := tag.get("images", []):
+ for img in images:
+ src = img.get("src", "")
+ ext = img.get("type", "png")
+ media.append({"photo": download_file(src, path=f"{DOWNLOAD_DIR}/{rand_string()}.{ext}", proxy=PROXY.WECHAT, **kwargs)})
+ htmls += f"<br><img src='{PROXY.IMG}{src}' alt='微信图片'/>"
+ await modify_progress(text=f"✅解析成功...\n⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
+ media = await download_media(media, **kwargs)
+ txt_path = Path(DOWNLOAD_DIR) / f"{title}.txt"
+ with txt_path.open("w") as f:
+ f.write(f"📝{title}\n👤{author}\n🕒{dt}\n🔗{url}\n\n" + texts.strip())
+ except Exception as e:
+ logger.error(e)
+ return {"error": str(e)}
+ return {"html": htmls, "path": txt_path.as_posix(), "markdown": markdowns, "media": media, "title": title, "author": author, "header": header}
+
+
+async def publish_telegraph(title: str, html: str, author: str = "", url: str = "") -> str:
+ """Publish to Telegraph."""
+ if not TOKEN.TELEGRAPH or not html:
+ return ""
+ telegraph = Telegraph(access_token=TOKEN.TELEGRAPH)
+ try:
+ page = await telegraph.create_page(title=title, author_name=author, author_url=url, html_content=html)
+ return page["url"]
+ except Exception as e:
+ logger.error(f"Telegraph publish error: {e}")
+ return ""
src/config.py
@@ -48,6 +48,7 @@ class ENABLE: # see fine-grained permission in `src/permission.py`
TIKTOK = os.getenv("ENABLE_TIKTOK", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
TWITTER = os.getenv("ENABLE_TWITTER", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WEIBO = os.getenv("ENABLE_WEIBO", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
+ WECHAT = os.getenv("ENABLE_WECHAT", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
WGET = os.getenv("ENABLE_WGET", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
XHS = os.getenv("ENABLE_XHS", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
YTDLP = os.getenv("ENABLE_YTDLP", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
@@ -88,6 +89,7 @@ class API:
TIKHUB_INSTAGRAM = os.getenv("TIKHUB_INSTAGRAM_API", "https://api.tikhub.io/api/v1/instagram/web_app/fetch_post_info_by_url?url=")
TIKHUB_TWITTER = os.getenv("TIKHUB_TWITTER_API", "https://api.tikhub.io/api/v1/twitter/web/fetch_post_comments?tweet_id=")
TIKHUB_WEIBO_VIDEO = os.getenv("TIKHUB_WEIBO_VIDEO_API", "https://api.tikhub.io/api/v1/weibo/web/fetch_short_video_data?share_text=")
+ TIKHUB_WECHAT = os.getenv("TIKHUB_WECHAT", "https://api.tikhub.io/api/v1/wechat_mp/web/fetch_mp_article_detail_json?url=")
BINANCE_SPOT = os.getenv("BINANCE_SPOT_API", "https://data-api.binance.vision")
BINANCE_UM = os.getenv("BINANCE_UM_API", "https://fapi.binance.com")
OKX = os.getenv("OKX_API", "https://www.okx.com")
@@ -113,13 +115,16 @@ class TOKEN:
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY", "")
GOOGLE_SEARCH_CX = os.getenv("GOOGLE_SEARCH_CX", "")
CHART_IMG = os.getenv("CHART_IMG_KEY", "")
+ TELEGRAPH = os.getenv("TELEGRAPH_TOKEN", "")
class PROXY: # format: socks5://127.0.0.1:7890
TELEGRAM = os.getenv("TELEGRAM_PROXY", None) # Telegram
WORKERS = os.getenv("WORKERS_PROXY", "") # https://github.com/netnr/workers
+ IMG = os.getenv("IMG_PROXY", "") # https://caravaggio.ramielcreations.com/docs/install
XHS = os.getenv("XHS_PROXY", None) # Banned VPS IP, need residential proxy
GPT = os.getenv("GPT_PROXY", None)
+ WECHAT = os.getenv("WECHAT_PROXY", None)
DOUYIN = os.getenv("DOUYIN_PROXY", None)
TIKTOK = os.getenv("TIKTOK_PROXY", None)
INSTAGRAM = os.getenv("INSTAGRAM_PROXY", None)
src/handler.py
@@ -27,6 +27,7 @@ from preview.bilibili import preview_bilibili
from preview.douyin import preview_douyin
from preview.instagram import preview_instagram
from preview.twitter import preview_twitter
+from preview.wechat import preview_wechat
from preview.weibo import preview_weibo
from preview.xiaohongshu import preview_xhs
from preview.ytdlp import ProxyError, preview_ytdlp
@@ -223,6 +224,8 @@ async def handle_social_media(
await preview_weibo(client, message, **kwargs)
if xhs and matched["platform"] == "xiaohongshu":
await preview_xhs(client, message, **kwargs)
+ if xhs and matched["platform"] == "wechat":
+ await preview_wechat(client, message, **kwargs)
if matched["platform"].startswith("bilibili-"): # this is not bilibili video, for videos, use yt-dlp
await preview_bilibili(client, message, **kwargs)
try:
@@ -299,6 +302,8 @@ def get_social_media_help(chat_id: int | str, ctype: str, prefixes: list[str] |
msg += "\n🎶TikTok"
if permission["instagram"]:
msg += "\n🏞Instagram"
+ if permission["wechat"]:
+ msg += "\n🟢微信公众号文章"
if permission["ytdlp"]:
msg += "\n🔴油管"
msg += "\n🅱️哔哩哔哩"
src/networking.py
@@ -356,6 +356,11 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
vid = matched.group(3)
return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "youtube"}
+ # https://mp.weixin.qq.com/s/bd_giuPEyPBu9LTOtC2VHw
+ # https://mp.weixin.qq.com/s?__biz=MzI5Njc4NTYyOQ==&mid=2247494800&idx=1&sn=43a5732bd3a205d4dbdcd523afc0ca4a&sharer_shareinfo=1923203fd24bfa47c5b36b690026f5c8&sharer_shareinfo_first=8814eca80b4a37d10aa9b725e61f9486
+ if matched := re.search(r"(https?://)?mp.weixin.qq.com/s[\/|\?]{1}([_A-Za-z\=\&0-9\#\-]+)", text):
+ return {"url": matched.group(0), "db_key": bare_url(matched.group(0)), "platform": "wechat"}
+
# if all above pre-defined patterns failed, try to match ytdlp link
if urls := match_urls(text):
for url in urls:
@@ -438,7 +443,8 @@ if __name__ == "__main__":
import asyncio
check_data(json.dumps({"foo": "bar", "baz": {"qux": "quux"}, "lst": ["1", "2", "3"]}), check_keys=["baz.qux"], check_kv={"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]})
- asyncio.run(match_social_media_link("https://b23.tv/3MSgT4q/", flatten_first=True))
+ # asyncio.run(match_social_media_link("https://b23.tv/3MSgT4q/", flatten_first=True))
+ print(asyncio.run(match_social_media_link("https://mp.weixin.qq.com/s/bd_giuPEyPBu9LTOtC2VHw", flatten_first=True)))
# asyncio.run(match_social_media_link("https://www.facebook.com/share/r/19QGGp39T3/", flatten_first=True))
# asyncio.run(match_social_media_link("https://www.douyin.com/video/7398813386827468041"))
# asyncio.run(match_social_media_link("https://www.iesdouyin.com/share/note/7454527270925946138/"))
src/permission.py
@@ -112,6 +112,7 @@ def check_service(cid: int | str, ctype: str) -> dict:
"twitter": True,
"weibo": True,
"xhs": True,
+ "wechat": True,
"ytdlp": True,
}
@@ -132,6 +133,8 @@ def check_service(cid: int | str, ctype: str) -> dict:
permission["tiktok"] = False
if not ENABLE.INSTAGRAM:
permission["instagram"] = False
+ if not ENABLE.WECHAT:
+ permission["wechat"] = False
if not ENABLE.YTDLP:
permission["ytdlp"] = False
if not ENABLE.GPT:
src/utils.py
@@ -26,6 +26,11 @@ def nowdt(tz: str = "UTC") -> datetime:
return datetime.now(ZoneInfo(tz))
+def nowstr(tz: str = TZ) -> str:
+ now = nowdt(tz)
+ return f"{now:%Y-%m-%d %H:%M:%S}"
+
+
def number(n: float | str | Decimal, precision: int = -1, *, sign: bool = False) -> str:
"""Normalize a number to its simplest decimal.
pyproject.toml
@@ -18,13 +18,14 @@ dependencies = [
"pysocks>=1.7.1",
"pytgcrypto>=1.2.9.2",
"python-ffmpeg>=2.0.12",
+ "python-magic>=0.4.27",
"pyyaml>=6.0.2",
"quickchart-io>=2.0.0",
+ "telegraph[aio]>=2.2.0",
"tiktoken>=0.8.0",
"uvloop>=0.21.0",
"youtube-transcript-api>=0.6.3",
"yt-dlp>=2025.1.12rc",
- "python-magic>=0.4.27",
]
name = "bennybot"
requires-python = ">=3.11"
uv.lock
@@ -234,6 +234,7 @@ dependencies = [
{ name = "python-magic" },
{ name = "pyyaml" },
{ name = "quickchart-io" },
+ { name = "telegraph", extra = ["aio"] },
{ name = "tiktoken" },
{ name = "uvloop" },
{ name = "youtube-transcript-api" },
@@ -268,6 +269,7 @@ requires-dist = [
{ name = "python-magic", specifier = ">=0.4.27" },
{ name = "pyyaml", specifier = ">=6.0.2" },
{ name = "quickchart-io", specifier = ">=2.0.0" },
+ { name = "telegraph", extras = ["aio"], specifier = ">=2.2.0" },
{ name = "tiktoken", specifier = ">=0.8.0" },
{ name = "uvloop", specifier = ">=0.21.0" },
{ name = "youtube-transcript-api", specifier = ">=0.6.3" },
@@ -1627,6 +1629,23 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 },
]
+[[package]]
+name = "telegraph"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/49/81/1c9f99004e23555fb21b80a2ef6ddbecb3a7a4eefbc4aac75ffb5a9ccf71/telegraph-2.2.0.tar.gz", hash = "sha256:012908f18208c451c7189f4bda7c39a1369241ac436c7543bb6c3fccbe9cfd5d", size = 8011 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d3/75/1bc2f8c4e4a736e2e582a1518eb4621db3c9dcb100f379fab5ab49c8d1ac/telegraph-2.2.0-py3-none-any.whl", hash = "sha256:d20b2a5d7cfdd66890c8c3fd60aa8585cabb7c6b03579d3eb1cd8af056ed9971", size = 10749 },
+]
+
+[package.optional-dependencies]
+aio = [
+ { name = "httpx" },
+]
+
[[package]]
name = "tiktoken"
version = "0.9.0"