Commit 557e163

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-01-22 13:43:22
feat: add multiple social media bridges
1 parent d9e5263
src/bridge/ocr.py
@@ -39,7 +39,7 @@ async def send_to_ocr_bridge(client: Client, message: Message, **kwargs):
     else:
         return
 
-    cid = kwargs.get("target_chat", message.chat.id)  # MSG-A's cid
+    cid = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id  # MSG-A's cid
     mid = kwargs.get("reply_msg_id", message.id)  # MSG-A's mid
     msg += f" \n#ID=({cid},{mid})".replace("None", "0")
     logger.warning(f"OCR via 妙妙小工具 (@{OCR_BOT}): {msg!r}")
src/bridge/social.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import contextlib
 import re
 
 from loguru import logger
@@ -11,31 +12,99 @@ from config import cache
 from message_utils import parse_msg
 from utils import i_am_bot
 
-BOT_NAME = "ParsehubBot"
+SOCIAL_BOTS = {
+    "ParsehubBot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "GLBetabot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "douyin_download_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "bilibiliparse_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "web2album_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "icbcbot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "KyDownloaderBot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+}
 
 
 @cache.memoize(ttl=10)
-async def send_to_socia_media_bridge(client: Client, message: Message, text: str, **kwargs):
+async def send_to_social_media_bridge(client: Client, message: Message, url: str, platform: str, **kwargs):
     """See docs in `bridge/README.md` for details."""
     if await i_am_bot(client):  # bot can't send message to other bots
         return
-    cid = kwargs.get("target_chat", message.chat.id)  # MSG-A's cid
+    cid = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id  # MSG-A's cid
     mid = kwargs.get("reply_msg_id", message.id)  # MSG-A's mid
-    msg = f"#ID=({cid},{mid})\n{text}".replace("None", "0")
-    logger.warning(f"Trying bridge (@{BOT_NAME}): {msg!r}")
-    await client.send_message(chat_id=f"@{BOT_NAME}", text=msg)
+    msg = f"#URL=( {url} ) \n#ID=({cid},{mid})".replace("None", "0")
+
+    # add progress message
+    if prog := kwargs.get("progress"):
+        msg += f"\n#PROGRESS=({prog.chat.id},{prog.id})"
+    cache.set(f"bridge-{url}", "pending", ttl=1200)  # save to cache
+    for bot, platforms in SOCIAL_BOTS.items():
+        if platform in platforms:
+            logger.warning(f"Trying {platform} bridge (@{bot}): {msg!r}")
+            await client.send_message(chat_id=f"@{bot}", text=msg)
 
 
 @cache.memoize(ttl=10)
-async def forward_socia_media_results(client: Client, message: Message):
+async def forward_social_media_results(client: Client, message: Message):
     """See docs in `bridge/README.md` for details."""
-    if message.from_user.username != BOT_NAME or not message.media:
+    if message.from_user.username not in SOCIAL_BOTS or not message.media:
         return
+    #  got a media message
     parse_msg(message)
-    if message.reply_to_message and (matched := re.search(r"#ID=\((-?\d+),(-?\d+)\)", str(message.reply_to_message.text))):
-        target_cid = matched.group(1)  # MSG-A's cid
-        target_mid = int(matched.group(2)) if int(matched.group(2)) != 0 else None  # MSG-A's mid
-        cid = message.chat.id  # result's cid
-        mid = message.id  # result's mid
-        logger.info(f"Forwarding chat=@{BOT_NAME}, id={mid} -> chat={target_cid}, id={target_mid}")
-        await client.copy_message(chat_id=target_cid, from_chat_id=cid, message_id=mid, reply_parameters=ReplyParameters(message_id=target_mid))  # type: ignore
+
+    # Helper to extract forwarding parameters
+    def extract_forwarding_params(msg_text: str) -> dict:
+        """Extract target chat ID, message ID, and URL from message text."""
+        params = {}
+        id_match = re.search(r"#ID=\((-?\d+),(\d+)\)", msg_text)
+        url_match = re.search(r"#URL=\( (.*?) \)", msg_text)
+        if id_match and url_match:
+            params = {
+                "target_cid": id_match.group(1),
+                "target_mid": int(id_match.group(2)) if int(id_match.group(2)) != 0 else None,
+                "url": url_match.group(1),
+            }
+        if prog_match := re.search(r"#PROGRESS=\((-?\d+),(\d+)\)", msg_text):
+            params["prog_cid"] = int(prog_match.group(1))
+            params["prog_mid"] = int(prog_match.group(2)) if int(prog_match.group(2)) != 0 else None
+        return params
+
+    async def forward_message(client: Client, message: Message, params: dict):
+        """Forward the message to the target chat and delete the pending cache."""
+        logger.info(f"Forwarding chat=@{message.from_user.username}, id={message.id} -> chat={params['target_cid']}, id={params['target_mid']}")
+        await client.copy_message(
+            chat_id=params["target_cid"],
+            from_chat_id=message.chat.id,
+            message_id=message.id,
+            reply_parameters=ReplyParameters(message_id=params["target_mid"]),  # type: ignore
+        )
+        cache.delete(f"bridge-{params['url']}")
+        with contextlib.suppress(Exception):
+            if params.get("prog_cid") and params.get("prog_mid"):
+                await client.delete_messages(chat_id=params["prog_cid"], message_ids=params["prog_mid"])
+
+    # Process reply-to messages
+    if message.reply_to_message:
+        params = extract_forwarding_params(str(message.reply_to_message.text))
+        if params and cache.get(f"bridge-{params['url']}"):
+            await forward_message(client, message, params)
+            return
+
+    # Process messages not in reply context
+    my_msg = await get_last_message_from_me(client, message.from_user.username, message.from_user.id)
+    params = extract_forwarding_params(my_msg)
+    if params and cache.get(f"bridge-{params['url']}"):
+        await forward_message(client, message, params)
+
+
+@cache.memoize(ttl=3)
+async def get_last_message_from_me(client: Client, chat_id: int | str, opponent_id: int) -> str:
+    """Get the last message from me in the chat.
+
+    Args:
+        client (Client): The Pyrogram client.
+        chat_id (int | str): The chat id.
+        opponent_id (int): The opponent id.
+    """
+    async for message in client.get_chat_history(chat_id, limit=20):  # type: ignore
+        if message.from_user.id != opponent_id:
+            return message.text or message.caption or ""
+    return ""
src/preview/douyin.py
@@ -8,7 +8,7 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
-from bridge.social import send_to_socia_media_bridge
+from bridge.social import send_to_social_media_bridge
 from config import API, DB, TOKEN, TZ, cache
 from database import get_db
 from message_utils import copy_messages_from_db, modify_progress, save_messages, send2tg, summay_media
@@ -73,8 +73,7 @@ async def preview_douyin(
             logger.warning(f"{platform} API [tikhub] failed: {resp}")
             if fallback:
                 await modify_progress(text="❌抖音解析失败, 尝试第三方Bot...", **kwargs)
-                await send_to_socia_media_bridge(client, message, text=url, **kwargs)
-            await modify_progress(del_status=True, **kwargs)
+                await send_to_social_media_bridge(client, message, url, platform, **kwargs)
             return
 
     aweme_id = data.get("aweme_id", Path(url).stem)
src/preview/instagram.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import asyncio
 from datetime import datetime
 from zoneinfo import ZoneInfo
 
@@ -10,7 +9,7 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
-from bridge.social import send_to_socia_media_bridge
+from bridge.social import send_to_social_media_bridge
 from config import API, DB, DOWNLOAD_DIR, TOKEN, TZ, UA, cache
 from database import get_db
 from message_utils import copy_messages_from_db, modify_progress, save_messages, send2tg, summay_media
@@ -32,6 +31,7 @@ async def preview_instagram(client: Client, message: Message, url: str = "", db_
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析Instagram链接\n{url}", **kwargs)
         kwargs["progress"] = res[0]
+
     if kv := await get_db(db_key):
         logger.debug(f"Instagram preview {DB.ENGINE} cache hit for key={db_key}")
         if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
@@ -45,8 +45,6 @@ async def preview_instagram(client: Client, message: Message, url: str = "", db_
     if resp.status_code != 200:
         await modify_progress(text="❌Instagram解析失败, 使用DDInstagram预览", **kwargs)
         await preview_ddinstagram(client, message, fallback=fallback, **kwargs)
-        await asyncio.sleep(2)
-        await modify_progress(del_status=True, **kwargs)
         return
 
     data = resp.json()["data"]
@@ -117,7 +115,7 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
     resp = await hx_req(api_url, headers=headers)
     if not resp.text:
         if fallback:
-            await send_to_socia_media_bridge(client, message, text=url, **kwargs)
+            await send_to_social_media_bridge(client, message, url, **kwargs)
         return
 
     soup = BeautifulSoup(resp.text, "html.parser")
@@ -136,7 +134,7 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
             raw_url = f"{API.DDINSTAGRAM}{img_url}"
             media["photo"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.jpg", workers_proxy=True, **kwargs)
             if not bool(validate_img(media["photo"])):
-                await send_to_socia_media_bridge(client, message, text=url, **kwargs)
+                await send_to_social_media_bridge(client, message, text=url, **kwargs)
                 return
 
     if tag := soup.find("meta", attrs={"property": "og:video"}):
@@ -145,7 +143,7 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
             raw_url = f"{API.DDINSTAGRAM}{video_url}"
             media["video"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.mp4", workers_proxy=True, **kwargs)
             if not is_valid_video(media["video"]):
-                await send_to_socia_media_bridge(client, message, text=url, **kwargs)
+                await send_to_social_media_bridge(client, message, text=url, **kwargs)
                 return
 
     await send2tg(client, message, texts=texts, media=[media], **kwargs)
src/preview/twitter.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import asyncio
 import copy
 import re
 from datetime import UTC, datetime
@@ -10,6 +9,7 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
+from bridge.social import send_to_social_media_bridge
 from config import API, DB, TOKEN, TZ, UA, cache
 from database import get_db
 from message_utils import copy_messages_from_db, modify_progress, save_messages, send2tg, summay_media
@@ -18,7 +18,17 @@ from utils import remove_none_values, split_parts, true
 
 
 @cache.memoize(ttl=10)
-async def preview_twitter(client: Client, message: Message, url: str = "", db_key: str = "", platform: str = "", twitter_extractor: str | None = None, **kwargs):
+async def preview_twitter(
+    client: Client,
+    message: Message,
+    url: str = "",
+    db_key: str = "",
+    platform: str = "",
+    twitter_extractor: str | None = None,
+    *,
+    fallback: bool = True,
+    **kwargs,
+):
     """Preview twitter link in the message.
 
     Args:
@@ -28,14 +38,17 @@ async def preview_twitter(client: Client, message: Message, url: str = "", db_ke
         handle (str): The twitter handle.
         post_id (str): The twitter post id.
         twitter_extractor (str): The extractor to use: fxtwitter or tikhub. Defaults to "tikhub".
+        fallback (bool, optional): Fallback to other bots. Defaults to True.
 
     If skip_fxtwitter is set to True, and the domain is fxtwitter or fixupx, this function is skipped.
     """
     if true(kwargs.get("skip_fxtwitter")) and platform in ["fxtwitter", "fixupx"]:
         return
+    platform = "x"  # set to x for all twitter links
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析推特链接\n{url}", **kwargs)
         kwargs["progress"] = res[0]
+
     if kv := await get_db(db_key):
         logger.debug(f"Twitter preview {DB.ENGINE} cache hit for key={db_key}")
         if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
@@ -46,9 +59,9 @@ async def preview_twitter(client: Client, message: Message, url: str = "", db_ke
         try:
             this_info = await get_tweet_info_via_tikhub(url=url, **kwargs)
             if not this_info:
-                await modify_progress(text="❌推特解析失败", **kwargs)
-                await asyncio.sleep(1)
-                await modify_progress(del_status=True, **kwargs)
+                error = "❌[Tikhub]推特解析失败"
+                await modify_progress(text=error, **kwargs)
+                raise APIError(error)  # noqa: TRY301
             quote_info = await get_tweet_info_via_tikhub(quote_info=this_info["quote_info"], **kwargs) if this_info["has_quote"] else {}
             master_info = await get_tweet_info_via_tikhub(post_id=this_info["master_thread_id"], **kwargs) if this_info["has_master"] else {}
             succ = True
@@ -59,14 +72,19 @@ async def preview_twitter(client: Client, message: Message, url: str = "", db_ke
         try:
             this_info = await get_tweet_info_via_fxtwitter(url=url)
             if not this_info:
-                await modify_progress(text="❌推特解析失败", **kwargs)
-                await asyncio.sleep(1)
-                await modify_progress(del_status=True, **kwargs)
+                error = "❌[FxTwitter]推特解析失败"
+                await modify_progress(text=error, **kwargs)
+                raise APIError(error)  # noqa: TRY301
             master_info = await get_tweet_info_via_fxtwitter(handle=this_info["replying_to_user"], post_id=this_info["replying_post_id"]) if this_info["has_master"] else {}
             quote_info = await get_tweet_info_via_fxtwitter(quote_info=this_info["quote_info"]) if this_info["has_quote"] else {}
+            succ = True
         except Exception as e:
             logger.warning(f"Twitter API [fxtwitter] failed: {e}")
-            return
+    if not succ:
+        if fallback:
+            await modify_progress(text="❌推特解析失败, 尝试第三方Bot...", **kwargs)
+            await send_to_social_media_bridge(client, message, url, platform, **kwargs)
+        return
 
     media = []
     media_ids = set()  # deduplicate media
@@ -371,3 +389,7 @@ async def remove_tco_suffix(text: str, post_id: str = "") -> str:
         return text.removesuffix(t_co_url).strip()
 
     return text
+
+
+class APIError(Exception):
+    pass
src/preview/weibo.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import asyncio
 import contextlib
 import json
 import re
@@ -13,7 +12,7 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
-from bridge.social import send_to_socia_media_bridge
+from bridge.social import send_to_social_media_bridge
 from config import API, DB, DOWNLOAD_DIR, TOKEN, TZ, cache
 from cookies import get_weibo_cookies
 from database import get_db
@@ -51,12 +50,10 @@ async def preview_weibo(client: Client, message: Message, url: str, post_id: str
     this_info = await parse_weibo_info(post_id, **kwargs)
     if error_msg := this_info.get("error_msg"):
         if this_info.get("fallback", fallback):
-            await modify_progress(text=f"❌微博解析失败: {error_msg}\n\n尝试第三方Bot...", **kwargs)
-            await send_to_socia_media_bridge(client, message, text=url, **kwargs)
+            await modify_progress(text=f"❌微博解析失败: {error_msg}\n尝试第三方Bot...", **kwargs)
+            await send_to_social_media_bridge(client, message, url, **kwargs)
         else:
             await modify_progress(text=f"❌微博解析失败: {error_msg}", **kwargs)
-        await asyncio.sleep(3)
-        await modify_progress(del_status=True, **kwargs)
         return
     quote_info = await parse_weibo_info(post_id, this_info["reply_data"], **kwargs) if this_info.get("reply_data") else {}
 
src/preview/xiaohongshu.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import asyncio
 import json
 from datetime import datetime
 from zoneinfo import ZoneInfo
@@ -10,7 +9,7 @@ from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import Message
 
-from bridge.social import send_to_socia_media_bridge
+from bridge.social import send_to_social_media_bridge
 from config import DB, PROXY, TZ, UA, cache
 from database import get_db
 from message_utils import copy_messages_from_db, modify_progress, save_messages, send2tg, summay_media
@@ -19,7 +18,7 @@ from others.emoji import emojify
 
 
 @cache.memoize(ttl=10)
-async def preview_xhs(client: Client, message: Message, url: str = "", db_key: str = "", *, fallback: bool = True, **kwargs):
+async def preview_xhs(client: Client, message: Message, url: str = "", db_key: str = "", xsec: str = "", *, fallback: bool = True, **kwargs):
     """Preview xiaohongshu link in the message.
 
     Args:
@@ -27,6 +26,7 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
         message (Message): The trigger message object.
         url (str, optional): xiaohongshu link
         db_key (str, optional): The cache key.
+        xsec (str, optional): The xsec token.
         fallback (bool, optional): Fallback to other bots. Defaults to True.
     """
     if kwargs.get("show_progress") and "progress" not in kwargs:
@@ -51,8 +51,8 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
     if not note:
         if fallback:
             await modify_progress(text="❌小红书解析失败, 尝试第三方Bot...", **kwargs)
-            await send_to_socia_media_bridge(client, message, text=url, **kwargs)
-            await asyncio.sleep(3)
+            full_url = f"https://{db_key}?xsec_token={xsec}"
+            await send_to_social_media_bridge(client, message, full_url, **kwargs)
         else:
             await modify_progress(text="❌小红书解析失败, 请稍候再尝试", **kwargs)
 
src/main.py
@@ -16,7 +16,7 @@ from pyrogram.sync import idle
 from pyrogram.types import LinkPreviewOptions, Message
 
 from bridge.ocr import forward_ocr_results
-from bridge.social import forward_socia_media_results
+from bridge.social import forward_social_media_results
 from config import DEVICE_NAME, ENABLE, PROXY, TID, TOKEN, cache
 from handler import handle_social_media, handle_utilities
 from message_utils import parse_msg
@@ -68,7 +68,7 @@ async def main():
         if not ENABLE.BOTS:
             return
         parse_msg(message, verbose=True)
-        await forward_socia_media_results(client, message)
+        await forward_social_media_results(client, message)
         await forward_ocr_results(client, message)
         await handle_utilities(client, message, detail_progress=True)
         await handle_social_media(client, message, cmd_prefix=["/dl", "!dl", "!dl"], detail_progress=True)
src/networking.py
@@ -351,15 +351,15 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
         base_url = flatten.split("?")[0]
         post_id = Path(base_url).stem
         queries = parse_qs(urlparse(flatten).query)
-        xsec_token = queries.get("xsec_token", [""])[0]
-        matched_info = {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "platform": "xiaohongshu"}
+        xsec = queries.get("xsec_token", [""])[0]
+        matched_info = {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
     # https://www.xiaohongshu.com/explore/671a3dfe00000000240161db?xsec_token=ABY-b1JKuAlIm2dX1OSdIFHD7cQFHEdThv5aMyccvmbJo=
     if matched := re.search(r"(https?://)?(www\.)?xiaohongshu\.com/([^.。,,\s]+)", text):
         base_url = matched.group(0).split("?")[0]
         post_id = Path(base_url).stem
         queries = parse_qs(urlparse(matched.group(0)).query)
-        xsec_token = queries.get("xsec_token", [""])[0]
-        matched_info = {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec_token}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "platform": "xiaohongshu"}
+        xsec = queries.get("xsec_token", [""])[0]
+        matched_info = {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
 
     # https://www.bilibili.com/video/BV1TC411J7PK
     if matched := re.search(r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/([^,,.。\s]+)", str(text)):