Commit 1f79e03

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-03-07 11:58:13
feat: add proxy support for douyin, tiktok, instagram, twitter, weibo
1 parent 0ff321f
src/preview/douyin.py
@@ -10,7 +10,7 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 
 from bridge.social import send_to_social_media_bridge
-from config import API, DB, PROVIDER, TOKEN, TZ, cache
+from config import API, DB, PROVIDER, PROXY, TOKEN, TZ, cache
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.progress import modify_progress
@@ -55,6 +55,7 @@ async def preview_douyin(
             return
         await modify_progress(text=f"❌从{DB.ENGINE}缓存中转发失败, 尝试重新解析...", **kwargs)
 
+    proxy = PROXY.DOUYIN if platform == "douyin" else PROXY.TIKTOK
     logger.info(f"{platform} link preview for {url}")
     succ = False
     data = {}
@@ -82,12 +83,12 @@ async def preview_douyin(
 
     aweme_id = glom(data, "aweme_id", default=Path(url).stem)
     if int(glom(data, "media_type", default=4)) == 2:  # image post
-        media = [{"photo": download_first_success_urls(glom(x, "url_list", default=[]), workers_proxy=True, **kwargs)} for x in glom(data, "images", default=[])]
+        media = [{"photo": download_first_success_urls(glom(x, "url_list", default=[]), proxy=proxy, **kwargs)} for x in glom(data, "images", default=[])]
     else:  # video post
         video_urls = []
         for key in ["play_addr_h264", "play_addr_265", "play_addr"]:
             video_urls.extend(glom(data, f"video.{key}.url_list", default=[]))
-        media = [{"video": download_first_success_urls(video_urls, suffix=".mp4", workers_proxy=True, **kwargs)}]
+        media = [{"video": download_first_success_urls(video_urls, suffix=".mp4", proxy=proxy, **kwargs)}]
     await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
     media = await download_media(media, **kwargs)
     texts = ""
src/preview/instagram.py
@@ -11,7 +11,7 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 
 from bridge.social import send_to_social_media_bridge
-from config import API, DB, DOWNLOAD_DIR, PROVIDER, TELEGRAM_UA, TOKEN, TZ, cache
+from config import API, DB, DOWNLOAD_DIR, PROVIDER, PROXY, TELEGRAM_UA, TOKEN, TZ, cache
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.progress import modify_progress
@@ -67,14 +67,14 @@ async def preview_instagram(
     # parse media
     media = []
     if data.get("video_url"):  # reel
-        media.append({"video": download_file(data.get("video_url", ""), **kwargs)})
+        media.append({"video": download_file(data.get("video_url", ""), proxy=PROXY.INSTAGRAM, **kwargs)})
     elif media_nodes := glom(data, "edge_sidecar_to_children.edges", default=[]):
         for node in media_nodes:
             ftype = "photo" if not glom(node, "node.is_video", default=False) else "video"
             media_url = glom(node, "node.display_url", default="") if ftype == "photo" else glom(node, "node.video_url", default="")
-            media.append({ftype: download_file(media_url, **kwargs)})
+            media.append({ftype: download_file(media_url, proxy=PROXY.INSTAGRAM, **kwargs)})
     elif data.get("display_url"):
-        media.append({"photo": download_file(data.get("display_url"), **kwargs)})
+        media.append({"photo": download_file(data.get("display_url"), proxy=PROXY.INSTAGRAM, **kwargs)})
 
     texts = ""
     if fullname := glom(data, "owner.full_name", default=""):
@@ -144,7 +144,7 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
         img_url = tag.get("content", "")  # type: ignore
         if img_url:
             raw_url = f"{API.DDINSTAGRAM}{img_url}"
-            media["photo"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.jpg", workers_proxy=True, **kwargs)
+            media["photo"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.jpg", proxy=PROXY.INSTAGRAM, **kwargs)
             if not bool(validate_img(media["photo"])):
                 await send_to_social_media_bridge(client, message, text=url, **kwargs)
                 return
@@ -153,7 +153,7 @@ async def preview_ddinstagram(client: Client, message: Message, url: str, post_t
         video_url = tag.get("content", "")  # type: ignore
         if video_url:
             raw_url = f"{API.DDINSTAGRAM}{video_url}"
-            media["video"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.mp4", workers_proxy=True, **kwargs)
+            media["video"] = await download_file(raw_url, path=f"{DOWNLOAD_DIR}/{post_id}.mp4", proxy=PROXY.INSTAGRAM, **kwargs)
             if not is_valid_video_or_audio(media["video"]):
                 await send_to_social_media_bridge(client, message, text=url, **kwargs)
                 return
src/preview/twitter.py
@@ -11,7 +11,7 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 
 from bridge.social import send_to_social_media_bridge
-from config import API, DB, PROVIDER, TELEGRAM_UA, TOKEN, TZ, cache
+from config import API, DB, PROVIDER, PROXY, TELEGRAM_UA, TOKEN, TZ, cache
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.progress import modify_progress
@@ -104,7 +104,7 @@ async def preview_twitter(
         if x["id"] in media_ids:
             continue
         media_ids.add(x["id"])
-        x[x["type"]] = download_file(x["url"], **kwargs)
+        x[x["type"]] = download_file(x["url"], proxy=PROXY.TWITTER, **kwargs)
         master_media.append(x)
 
     this_media = []
@@ -112,7 +112,7 @@ async def preview_twitter(
         if x["id"] in media_ids:
             continue
         media_ids.add(x["id"])
-        x[x["type"]] = download_file(x["url"], **kwargs)
+        x[x["type"]] = download_file(x["url"], proxy=PROXY.TWITTER, **kwargs)
         this_media.append(x)
 
     quote_media = []
@@ -120,7 +120,7 @@ async def preview_twitter(
         if x["id"] in media_ids:
             continue
         media_ids.add(x["id"])
-        x[x["type"]] = download_file(x["url"], **kwargs)
+        x[x["type"]] = download_file(x["url"], proxy=PROXY.TWITTER, **kwargs)
         quote_media.append(x)
 
     # 生成图片数量说明
src/preview/weibo.py
@@ -14,7 +14,7 @@ from pyrogram.client import Client
 from pyrogram.types import Message
 
 from bridge.social import send_to_social_media_bridge
-from config import API, DB, DOWNLOAD_DIR, PROVIDER, TELEGRAM_UA, TOKEN, TZ, cache
+from config import API, DB, DOWNLOAD_DIR, PROVIDER, PROXY, TELEGRAM_UA, TOKEN, TZ, cache
 from cookies import get_weibo_cookies
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
@@ -171,16 +171,16 @@ async def parse_weibo_info(post_id: str, data: dict | None = None, **kwargs) ->
         video_url = x.get("videoSrc")
         if mtype == "livephoto":
             # media.append({"photo": download_file(photo_url, **kwargs)})  # main photo
-            media.append({"video": download_file(video_url, path=f"{DOWNLOAD_DIR}/{pid}.mov", headers={"user-agent": TELEGRAM_UA}, **kwargs)})
+            media.append({"video": download_file(video_url, path=f"{DOWNLOAD_DIR}/{pid}.mov", headers={"user-agent": TELEGRAM_UA}, proxy=PROXY.WEIBO, **kwargs)})
         elif mtype in ["video", "gifvideos"]:
-            media.append({"video": download_file(video_url, suffix=".mp4", headers={"user-agent": TELEGRAM_UA}, **kwargs)})
+            media.append({"video": download_file(video_url, suffix=".mp4", headers={"user-agent": TELEGRAM_UA}, proxy=PROXY.WEIBO, **kwargs)})
         else:
-            media.append({"photo": download_file(photo_url, headers={"user-agent": TELEGRAM_UA}, **kwargs)})
+            media.append({"photo": download_file(photo_url, headers={"user-agent": TELEGRAM_UA}, proxy=PROXY.WEIBO, **kwargs)})
     if page_info := data.get("page_info", {}):
         videos = page_info.get("urls", {})
         if video_urls := [videos.get(quality) for quality in ["mp4_720p_mp4", "mp4_hd_mp4", "mp4_ld_mp4"] if videos.get(quality)]:
             # This maybe already downloaded by the above loop (for loop in data['pics'])
-            media.append({"video": download_first_success_urls(video_urls, skip_exist=True, suffix=".mp4", headers={"user-agent": TELEGRAM_UA}, **kwargs)})
+            media.append({"video": download_first_success_urls(video_urls, skip_exist=True, suffix=".mp4", headers={"user-agent": TELEGRAM_UA}, proxy=PROXY.WEIBO, **kwargs)})
     info["post_id"] = glom(data, "id", default=post_id)
     info["author"] = glom(data, "user.screen_name", default="")
     info["author_url"] = f"https://m.weibo.cn/detail/{post_id}"  # for weibo post, use post url as author url
@@ -220,7 +220,7 @@ async def parse_weibo_video(post_id: str, **kwargs) -> dict:
             return info
         data = resp["data"]["data"]["Component_Play_Playinfo"]
         urls = [https_url(x) for x in data.get("urls", {}).values()]
-        info["media"] = [{"video": await download_first_success_urls(urls, suffix=".mp4", **kwargs)}]
+        info["media"] = [{"video": await download_first_success_urls(urls, suffix=".mp4", proxy=PROXY.WEIBO, **kwargs)}]
         info["dt"] = ""
         if dt := ts_to_dt(data.get("real_date")):
             info["dt"] = f"{dt:%Y-%m-%d %H:%M:%S}"
src/preview/xiaohongshu.py
@@ -70,7 +70,7 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
                     video_urls.append(x["masterUrl"])
                 if x.get("backupUrls"):
                     video_urls.extend(x.get("backupUrls", []))
-        media.append({"video": download_first_success_urls(video_urls, suffix=".mp4", **kwargs)})
+        media.append({"video": download_first_success_urls(video_urls, suffix=".mp4", proxy=PROXY.XHS, **kwargs)})
     else:
         for img_info in note.get("imageList", []):
             img_url = img_info.get("urlDefault") or img_info.get("url") or ""
@@ -83,9 +83,9 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
                             video_urls.append(x["masterUrl"])
                         if x.get("backupUrls"):
                             video_urls.extend(x.get("backupUrls", []))
-                media.append({"livephoto": download_first_success_urls(video_urls, suffix=".mp4", **kwargs)})
+                media.append({"livephoto": download_first_success_urls(video_urls, suffix=".mp4", proxy=PROXY.XHS, **kwargs)})
             else:
-                media.append({"photo": download_file(img_url, suffix=".jpg", **kwargs)})
+                media.append({"photo": download_file(img_url, suffix=".jpg", proxy=PROXY.XHS, **kwargs)})
 
     title = note.get("title", "")
     author = note.get("user", {}).get("nickname", "")
src/config.py
@@ -114,11 +114,15 @@ class PROXY:  # format: socks5://127.0.0.1:7890
     XHS = os.getenv("XHS_PROXY", None)  # Banned VPS IP, need residential proxy
     TENCENT = os.getenv("TENCENT_PROXY", None)  # Banned oversea IP, need a back to China proxy
     GPT = os.getenv("GPT_PROXY", None)
+    DOUYIN = os.getenv("DOUYIN_PROXY", None)
+    TIKTOK = os.getenv("TIKTOK_PROXY", None)
+    INSTAGRAM = os.getenv("INSTAGRAM_PROXY", None)
+    TWITTER = os.getenv("TWITTER_PROXY", None)
     SUBTITLE = os.getenv("SUBTITLE_PROXY", None)
     CRYPTO = os.getenv("CRYPTO_PROXY", None)
     GOOGLE_SEARCH = os.getenv("GOOGLE_SEARCH_PROXY", None)
     DOWNLOAD = os.getenv("DOWNLOAD_PROXY", None)
-    WEIBO_COOKIE = os.getenv("WEIBO_COOKIE_PROXY", None)  # Weibo visitor cookie
+    WEIBO = os.getenv("WEIBO_PROXY", None)
     YTDLP = os.getenv("YTDLP_PROXY", None)  # general proxy for ytdlp
     YTDLP_FALLBACK = os.getenv("YTDLP_PROXY_FALLBACK", None)  # fallback proxy for ytdlp
     # for ytdlp proxy of specific sites (Like Bilibili), use this format: YTDLP_PROXY_BILIBILI
src/cookies.py
@@ -67,7 +67,7 @@ async def get_weibo_cookies() -> str:
     payload = {"cb": "visitor_gray_callback", "tid": "", "from": "weibo"}
     headers = {"Content-Type": "application/x-www-form-urlencoded"}
     try:
-        async with AsyncClient(http2=True, proxy=PROXY.WEIBO_COOKIE) as client:
+        async with AsyncClient(http2=True, proxy=PROXY.WEIBO) as client:
             response = await client.post(url, headers=headers, data=payload, follow_redirects=True, timeout=10)
             response.raise_for_status()
             set_cookie = response.headers.get("set-cookie", "")
src/networking.py
@@ -114,6 +114,7 @@ async def download_file(
     *,
     suffix: str = "",
     skip_exist: bool = False,
+    proxy: str | None = None,
     workers_proxy: bool = False,
     headers: dict | None = None,
     stream: bool = False,
@@ -126,6 +127,7 @@ async def download_file(
         path (str | Path, optional): The path to save the downloaded file. Defaults to auto detect.
         suffix (str, optional): The suffix to append to the file name. Defaults to auto detect.
         skip_exist (bool, optional): Skip downloading if the file already exists. Defaults to False.
+        proxy (str, optional): The proxy to use for the request.
         workers_proxy (bool, optional): Use workers proxy. Defaults to False.
         headers (dict, optional): The headers to use for the request.
         stream (bool, optional): Stream the download. Defaults to False.
@@ -147,11 +149,12 @@ async def download_file(
     if workers_proxy and PROXY.WORKERS:
         link = PROXY.WORKERS + quote_plus(link)
     path.parent.mkdir(parents=True, exist_ok=True)
-    logger.trace(f"Downloading {link} to {path}")
+    proxy = proxy or PROXY.DOWNLOAD
+    logger.trace(f"Downloading {link} to {path} with proxy={proxy}")
     hx = AsyncClient(
         headers=headers,
-        transport=AsyncCurlTransport(proxy=PROXY.DOWNLOAD, impersonate="safari_ios", default_headers=True, curl_options={CurlOpt.FRESH_CONNECT: True}),
-        proxy=PROXY.DOWNLOAD,
+        transport=AsyncCurlTransport(proxy=proxy, impersonate="safari_ios", default_headers=True, curl_options={CurlOpt.FRESH_CONNECT: True}),
+        proxy=proxy,
         timeout=REQUEST_TIMEOUT,
         follow_redirects=True,
         event_hooks={"request": [log_req], "response": [log_resp]},