Commit 0572e32

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-06-21 15:17:56
refactor(social): add `bridge` as providers
1 parent 157ae8c
Changed files (4)
src/bridge/social.py
@@ -1,9 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
-import asyncio
 import contextlib
-import random
 
 from loguru import logger
 from pyrogram.client import Client
@@ -14,16 +11,19 @@ from messages.parser import parse_msg
 from utils import i_am_bot
 
 SOCIAL_BOTS = {
-    "ParsehubBot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "GLBetabot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "douyin_download_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "bilibiliparse_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "web2album_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "icbcbot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "KyDownloaderBot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
-    "@DouYintg_bot": ["bilibili", "douyin", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "ParsehubBot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "GLBetabot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "douyin_download_bot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "bilibiliparse_bot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "web2album_bot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "icbcbot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "KyDownloaderBot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "DouYintg_bot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
+    "MultiSaverXbot": ["bilibili", "douyin", "tiktok", "instagram", "instagram", "weibo", "x", "xiaohongshu", "youtube"],
 }
 
+CAPTIONS = {"MultiSaverXbot": ""}
+
 
 async def send_to_social_media_bridge(client: Client, message: Message, url: str, platform: str, **kwargs):
     """See docs in `bridge/README.md` for details."""
@@ -43,13 +43,14 @@ async def send_to_social_media_bridge(client: Client, message: Message, url: str
 
     # save a global flag to cache, if any bot is processing this url, set this the the bot name
     # ! Warning: currently we can only handle single url at a time
-    cache.set("social-global", "bot_name", ttl=120)
+    cache.set("social-global", "waiting-for-bots", ttl=120)
 
     for bot, platforms in SOCIAL_BOTS.items():
         if platform in platforms:
             logger.warning(f"Trying {platform} bridge (@{bot}): {url}")
             cache.set(f"social-{bot}", params, ttl=120)  # save params to cache for each bot
-            await client.send_message(chat_id=f"@{bot}", text=url)
+            with contextlib.suppress(Exception):
+                await client.send_message(chat_id=f"@{bot}", text=url)
 
 
 async def forward_social_media_results(client: Client, message: Message):
@@ -59,34 +60,42 @@ async def forward_social_media_results(client: Client, message: Message):
     First, we need to check the global flag in the cache. Format: social-global-{url}
     Then, we need to check the cache for each bot. Format: social-individual-{bot}-{url}
     """
-    if message.from_user.username not in SOCIAL_BOTS or not message.media:
+    if message.from_user.username not in SOCIAL_BOTS or not (message.photo or message.video):
         return
     #  got a media message
     info = parse_msg(message)
 
     bot_name = cache.get("social-global", default="")
-    if bot_name == "bot_name":  # this bot is the first one to get the results
+    if bot_name == "waiting-for-bots":  # this bot is the first one to get the results
         logger.success(f"Bridge @{info['handle']} is the first bot to get a {info['mtype']}")
         cache.set("social-global", info["handle"], ttl=120)
     elif bot_name != info["handle"]:  # already processed by other bot
         return
-
     params = cache.get(f"social-{info['handle']}")
     if not params:
         return
 
-    if not message.media_group_id:  # single media, send diectly
-        logger.info(f"Forwarding {info['mtype']} from @{bot_name} -> chat={params['target_cid']}, id={params['target_mid']}")
-        await client.copy_message(chat_id=params["target_cid"], from_chat_id=info["cid"], message_id=info["mid"], reply_parameters=ReplyParameters(message_id=params["target_mid"]))
-
-    # Media group. We will receive multiple messages at nearly the same time
-    # Send media_group only once
-    if not cache.get(f"social-{bot_name}-{message.media_group_id}"):
-        cache.set(f"social-{bot_name}-{message.media_group_id}", info["mid"], ttl=120)  # this may be overwritten by the next message of same media_group
-        await asyncio.sleep(1 + random.random())  # wait for other messages
-        if mid := cache.get(f"social-{bot_name}-{message.media_group_id}"):
-            logger.info(f"Forwarding media_group from @{bot_name} -> chat={params['target_cid']}, id={params['target_mid']}")
-            await client.copy_media_group(chat_id=params["target_cid"], from_chat_id=info["cid"], message_id=mid, reply_parameters=ReplyParameters(message_id=params["target_mid"]))
+    logger.info(f"Forwarding {info['mtype']} from @{bot_name} -> chat={params['target_cid']}, id={params['target_mid']}")
+    await client.copy_message(
+        chat_id=params["target_cid"],
+        from_chat_id=info["cid"],
+        message_id=info["mid"],
+        caption=CAPTIONS.get(info["handle"]),  # type: ignore
+        reply_parameters=ReplyParameters(message_id=params["target_mid"]),
+    )
+
+    # ! Because `cache` is not shared between different processes, we can't safely use it to store media_group_id
+    # if not message.media_group_id:  # single media, send diectly
+    #     logger.info(f"Forwarding {info['mtype']} from @{bot_name} -> chat={params['target_cid']}, id={params['target_mid']}")
+    #     await client.copy_message(chat_id=params["target_cid"], from_chat_id=info["cid"], message_id=info["mid"], reply_parameters=ReplyParameters(message_id=params["target_mid"]))
+    # # Media group. We will receive multiple messages at nearly the same time
+    # # Send media_group only once
+    # if not cache.get(f"social-{bot_name}-{message.media_group_id}"):
+    #     cache.set(f"social-{bot_name}-{message.media_group_id}", info["mid"], ttl=120)  # this may be overwritten by the next message of same media_group
+    #     await asyncio.sleep(1 + random.random())  # wait for other messages
+    #     if mid := cache.get(f"social-{bot_name}-{message.media_group_id}"):
+    #         logger.info(f"Forwarding media_group from @{bot_name} -> chat={params['target_cid']}, id={params['target_mid']}")
+    #         await client.copy_media_group(chat_id=params["target_cid"], from_chat_id=info["cid"], message_id=mid, reply_parameters=ReplyParameters(message_id=params["target_mid"]))
     with contextlib.suppress(Exception):
         if params.get("prog_cid") and params.get("prog_mid"):
             await client.delete_messages(chat_id=params["prog_cid"], message_ids=params["prog_mid"])
src/preview/douyin.py
@@ -30,8 +30,6 @@ async def preview_douyin(
     platform: str = "douyin",
     douyin_provider: str = PROVIDER.DOUYIN,
     douyin_comments_provider: str = PROVIDER.DOUYIN_COMMENTS,
-    *,
-    fallback: bool = True,
     **kwargs,
 ):
     """Preview douyin or tiktok link in the message.
@@ -42,9 +40,8 @@ async def preview_douyin(
         url (str, optional): The douyin or tiktok link.
         db_key (str, optional): The cache key.
         platform(str, optional): The platform name. Defaults to "douyin".
-        douyin_provider (str, optional): The douyin extractor: "free", "tikhub" or "free-tikhub".
+        douyin_provider (str, optional): The douyin extractor: "free", "tikhub", "bridge", or combined strings.
         douyin_comments_provider (str, optional): The douyin comments extractor: "free", "tikhub" or "free-tikhub".
-        fallback (bool, optional): Fallback to other bots. Defaults to True.
     """
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析抖音链接\n{url}", **kwargs)
@@ -74,13 +71,13 @@ async def preview_douyin(
         try:
             resp = await hx_req(api_url, headers=headers, check_keys=["data"], check_kv={"code": 200})
             data = resp["data"]
+            succ = True
         except Exception:
             logger.warning(f"{platform} API [tikhub] failed")
-            if fallback:
-                await modify_progress(text="❌抖音解析失败, 尝试第三方Bot...", **kwargs)
-                await send_to_social_media_bridge(client, message, url, platform, **kwargs)
-            return
-
+    if not succ and "bridge" in douyin_provider:  # try bridge
+        logger.error("❌抖音解析失败, 尝试第三方Bot...")
+        await send_to_social_media_bridge(client, message, url, platform, **kwargs)
+        return
     aweme_id = glom(data, "aweme_id", default=Path(url).stem)
     if int(glom(data, "media_type", default=4)) == 2:  # image post
         media = [{"photo": download_first_success_urls(glom(x, "url_list", default=[]), proxy=proxy, **kwargs)} for x in glom(data, "images", default=[])]
src/preview/instagram.py
@@ -29,8 +29,8 @@ async def preview_instagram(
     url: str = "",
     db_key: str = "",
     *,
+    instagram_provider: str = PROVIDER.INSTAGRAM,
     instagram_comments_provider: str = PROVIDER.INSTAGRAM_COMMENTS,
-    fallback: bool = True,
     **kwargs,
 ):
     """Preview instagram link in the message.
@@ -40,8 +40,8 @@ async def preview_instagram(
         message (Message): The trigger message object.
         url (str, optional): Tnstagram link.
         db_key (str, optional): The cache key.
+        instagram_provider (str, optional): The instagram extractor: tikhub, ddinstagram, bridge
         instagram_comments_provider (str, optional): The instagram comments extractor: "tikhub" or "false".
-        fallback (bool, optional): Fallback to other bots. Defaults to True.
     """
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析Instagram链接\n{url}", **kwargs)
@@ -52,14 +52,17 @@ async def preview_instagram(
         if await copy_messages_from_db(client, message, key=db_key, kv=kv, **kwargs):
             return
         await modify_progress(text=f"❌从{DB.ENGINE}缓存中转发失败, 尝试重新解析...", **kwargs)
-
-    api_url = API.TIKHUB_INSTAGRAM + url
-    logger.info(f"Preview Instagram TikHub for {api_url}")
-    headers = {"authorization": f"Bearer {TOKEN.TIKHUB}", "accept": "application/json"}
-    resp = await hx_req(api_url, headers=headers, check_keys=["data"], check_kv={"code": 200})
-    if resp.get("hx_error"):
-        await modify_progress(text=f"❌Instagram解析失败, 使用DDInstagram预览\n{resp['hx_error']}", **kwargs)
-        await preview_ddinstagram(client, message, url=url, fallback=fallback, **kwargs)
+    succ = False
+    if "tikhub" in instagram_provider:  # try tikhub
+        api_url = API.TIKHUB_INSTAGRAM + url
+        logger.info(f"Preview Instagram TikHub for {api_url}")
+        headers = {"authorization": f"Bearer {TOKEN.TIKHUB}", "accept": "application/json"}
+        resp = await hx_req(api_url, headers=headers, check_keys=["data"], check_kv={"code": 200})
+        if not resp.get("hx_error"):
+            succ = True
+    if not succ:
+        logger.error("❌Instagram解析失败, 使用DDInstagram预览")
+        await preview_ddinstagram(client, message, url=url, instagram_provider=instagram_provider, **kwargs)
         return
 
     data = resp["data"]
@@ -104,7 +107,7 @@ async def preview_instagram(
     await save_messages(messages=sent_messages, key=db_key)
 
 
-async def preview_ddinstagram(client: Client, message: Message, url: str, post_type: str, post_id: str, *, fallback: bool = True, **kwargs):
+async def preview_ddinstagram(client: Client, message: Message, url: str, post_type: str, post_id: str, *, instagram_provider: str, **kwargs):
     """Preview instagram link in the message via DDInstagram.
 
     https://ddinstagram.com/
@@ -117,33 +120,34 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
         post_id (str): post id.
         fallback (bool, optional): Fallback to other bots. Defaults to True.
     """
+    if "ddinstagram" not in instagram_provider:
+        if "bridge" in instagram_provider:
+            await send_to_social_media_bridge(client, message, url, **kwargs)
+        return
     api_url = f"{API.DDINSTAGRAM}/{post_type}/{post_id}"
     logger.info(f"Instagram link preview for {api_url}")
     headers = {"user-agent": TELEGRAM_UA}
     resp = await hx_req(api_url, headers=headers, rformat="text")
     if not resp.get("text"):
-        if fallback:
+        if "bridge" in instagram_provider:
             await send_to_social_media_bridge(client, message, url, **kwargs)
         return
-
     soup = BeautifulSoup(resp["text"], "html.parser")
     logger.trace(soup.prettify())
 
     texts = ""
     media = {}
-    if tag := soup.find("meta", attrs={"name": "twitter:title"}):
+    if tag := soup.find("meta", attrs={"property": "twitter:title"}):
         author = tag.get("content", "Unknown")  # type: ignore
         texts += f"🏞**[{author}]({url})\n"
     if tag := soup.find("meta", attrs={"property": "og:description"}):
-        texts: str = tag.get("content", "")  # type: ignore
-    if tag := soup.find("meta", attrs={"name": "twitter:image"}):
-        img_url = tag.get("content", "")  # type: ignore
-        if img_url:
-            raw_url = f"{API.DDINSTAGRAM}{img_url}"
-            media["photo"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.jpg", proxy=PROXY.INSTAGRAM, **kwargs)
-            if not bool(await validate_img(media["photo"])):
-                await send_to_social_media_bridge(client, message, text=url, **kwargs)
-                return
+        texts += tag.get("content", "")  # type: ignore
+    if (tag := soup.find("meta", attrs={"property": "twitter:image"})) and (img_url := tag.get("content")):  # type: ignore
+        raw_url = f"{API.DDINSTAGRAM}{img_url}"
+        media["photo"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.jpg", proxy=PROXY.INSTAGRAM, **kwargs)
+        if not bool(await validate_img(media["photo"])):
+            await send_to_social_media_bridge(client, message, text=url, **kwargs)
+            return
 
     if tag := soup.find("meta", attrs={"property": "og:video"}):
         video_url = tag.get("content", "")  # type: ignore
@@ -155,3 +159,4 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
                 return
 
     await send2tg(client, message, texts=texts, media=[media], **kwargs)
+    await modify_progress(del_status=True, **kwargs)
src/config.py
@@ -120,10 +120,11 @@ class DANMU:
 
 
 class PROVIDER:  # default API provider
-    DOUYIN = os.getenv("DOUYIN_PROVIDER", "free-tikhub").lower()  # free or tikhub
+    DOUYIN = os.getenv("DOUYIN_PROVIDER", "free-tikhub-bridge").lower()  # free or tikhub
     DOUYIN_COMMENTS = os.getenv("DOUYIN_COMMENTS_PROVIDER", "free-tikhub").lower()  # free or tikhub or a false value (0, false, none, null, etc.)
     TWITTER = os.getenv("TWITTER_PROVIDER", "tikhub-fxtwitter").lower()  # tikhub or fxtwitter
     TWITTER_COMMENTS = os.getenv("TWITTER_COMMENTS_PROVIDER", "tikhub").lower()  # tikhub or a false value (0, false, none, null, etc.)
+    INSTAGRAM = os.getenv("INSTAGRAM_PROVIDER", "tikhub-ddinstagram-bridge").lower()  # tikhub, ddinstagram, bridge
     INSTAGRAM_COMMENTS = os.getenv("INSTAGRAM_COMMENTS_PROVIDER", "tikhub").lower()  # tikhub or a false value (0, false, none, null, etc.)
     WEIBO_COMMENTS = os.getenv("WEIBO_COMMENTS_PROVIDER", "free").lower()  # free or a false value (0, false, none, null, etc.)
     YOUTUBE_COMMENTS = os.getenv("YOUTUBE_COMMENTS_PROVIDER", "free").lower()  # free or a false value (0, false, none, null, etc.)