Commit 34c298b

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-01-30 13:34:32
perf: return early in match_social_media_link
1 parent 012fa1d
Changed files (3)
src/handler.py
@@ -168,6 +168,8 @@ async def handle_social_media(
         kwargs["send_from_user"] = f"👤[@{info['full_name']}](tg://user?id={info['uid']})//"
     try:
         matched = await match_social_media_link(info["text"], flatten_first=True)  # match "platform" and "url" (and other info)
+        if matched["platform"]:
+            logger.success(f"Matched: {matched}")
         kwargs |= matched
         if startswith_prefix(this_texts, prefix=["/retry"], ignore_prefix=ignore_prefix):
             await del_db(matched["db_key"])
src/networking.py
@@ -316,27 +316,26 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
     text = str(text)
     if flatten_first:
         text = await flatten_rediercts(text)
-    matched_info = {"platform": ""}
     # https://www.douyin.com/video/7398813386827468041
     # https://www.douyin.com/note/7458195074434846004
     # https://www.iesdouyin.com/share/video/7454527270925946138/
     # https://www.iesdouyin.com/share/note/7454527270925946138/
     if matched := re.search(r"(https?://)?(www\.)?(ies)?douyin\.com/(share/)?(:?|video|note)/(\d+)", text):
-        matched_info = {"url": f"https://www.douyin.com/{matched.group(5)}/{matched.group(6)}", "db_key": f"www.douyin.com/{matched.group(5)}/{matched.group(6)}", "platform": "douyin"}
+        return {"url": f"https://www.douyin.com/{matched.group(5)}/{matched.group(6)}", "db_key": f"www.douyin.com/{matched.group(5)}/{matched.group(6)}", "platform": "douyin"}
     # https://www.douyin.com/user/MS4wLjABAAAAXgBuOEcyavDhrRBqnD8x7d4pj7RIL5QFRlLehCnem8couoAg8yXR-MGhUK0i4riF?modal_id=7451543857952492810
     if matched := re.search(r"(https?://)?(www\.)?douyin\.com/user/(.*?)\?(.*?)modal_id=(\d+)", text):
-        matched_info = {"url": f"https://www.douyin.com/video/{matched.group(5)}", "db_key": f"www.douyin.com/video/{matched.group(5)}", "platform": "douyin"}
+        return {"url": f"https://www.douyin.com/video/{matched.group(5)}", "db_key": f"www.douyin.com/video/{matched.group(5)}", "platform": "douyin"}
     # https://www.tiktok.com/@baymermel/video/7460653893941267755\?_t\=ZS-8t8YbVWqv5k\&_r\=1
     if matched := re.search(r"(https?://)?(www\.)?tiktok\.com/(.*?)/(\d+)", text):
-        matched_info = {"url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "tiktok"}
+        return {"url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "tiktok"}
 
     # https://www.instagram.com/p/C7P3jN8vmEN
     # https://www.instagram.com/reel/DBBEGXpvwNF
     if matched := re.search(r"(https?://)?(www\.)?instagram\.com/(:?|p|reel)/([^.。,,/\s]+)", text):
-        matched_info = {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
+        return {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
     # https://www.instagram.com/yifaer_chen/p/DEzv9x-vzOn/
     if matched := re.search(r"(https?://)?(www\.)?instagram\.com/\w+/(:?|p|reel)/([^.。,,/\s]+)", text):
-        matched_info = {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
+        return {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
 
     # https://x.com/taylorswift13/status/1794805688696275131
     # https://twitter.com/taylorswift13/status/1794805688696275131
@@ -347,21 +346,21 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
         handle = matched.group(3)
         post_id = matched.group(4)
         url = f"https://x.com/{handle}/status/{post_id}"
-        matched_info = {"platform": platform, "handle": handle, "post_id": post_id, "url": url, "db_key": bare_url(url)}
+        return {"platform": platform, "handle": handle, "post_id": post_id, "url": url, "db_key": bare_url(url)}
 
     # https://weibo.com/1736562685/P6lhSjRnI
     if matched := re.search(r"(https?://)?(www\.)?weibo\.com/(.*?)/(\w+)", text):
-        matched_info = {"post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(4)}", "platform": "weibo"}
+        return {"post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(4)}", "platform": "weibo"}
     # https://m.weibo.cn/detail/5113333048938691
     # https://m.weibo.cn/status/5113333048938691
     if matched := re.search(r"(https?://)?m\.weibo\.cn/(:?detail|status)/(\w+)", text):
-        matched_info = {"post_id": matched.group(3), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(3)}", "platform": "weibo"}
+        return {"post_id": matched.group(3), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(3)}", "platform": "weibo"}
     # https://video.weibo.com/show?fid=1034:5123779299311660
     if matched := re.search(r"(https?://)?video\.weibo\.(:?com|cn)/show\?fid=(\d+):(\d+)", text):
-        matched_info = {"post_id": f"weibovideo{matched.group(3)}:{matched.group(4)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
+        return {"post_id": f"weibovideo{matched.group(3)}:{matched.group(4)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
     # https://weibo.com/tv/show/1034:5123779299311660?from=old_pc_videoshow
     if matched := re.search(r"(https?://)?(www\.)?weibo\.(:?com|cn)/tv/show/(\d+):(\d+)", text):
-        matched_info = {"post_id": f"weibovideo{matched.group(4)}:{matched.group(5)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
+        return {"post_id": f"weibovideo{matched.group(4)}:{matched.group(5)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
 
     # http://xhslink.com/a/Z3VPXAReU1Y1
     xhs_pattern = r"(https?://)?xhslink\.com/(\w?/?)([^,,.。?\s]+)"
@@ -371,14 +370,14 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
         post_id = Path(base_url).stem
         queries = parse_qs(urlparse(flatten).query)
         xsec = queries.get("xsec_token", [""])[0]
-        matched_info = {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
+        return {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
     # https://www.xiaohongshu.com/explore/671a3dfe00000000240161db?xsec_token=ABY-b1JKuAlIm2dX1OSdIFHD7cQFHEdThv5aMyccvmbJo=
     if matched := re.search(r"(https?://)?(www\.)?xiaohongshu\.com/([^.。,,\s]+)", text):
         base_url = matched.group(0).split("?")[0]
         post_id = Path(base_url).stem
         queries = parse_qs(urlparse(matched.group(0)).query)
         xsec = queries.get("xsec_token", [""])[0]
-        matched_info = {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
+        return {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
 
     # https://www.bilibili.com/video/BV1TC411J7PK
     if matched := re.search(r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/([^,,.。\s]+)", text):
@@ -387,29 +386,27 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
         queries = parse_qs(urlparse(matched.group(0)).query)
         pid = queries.get("p", ["1"])[0]
         url = f"https://www.bilibili.com/video/{bvid}?p={pid}".removesuffix("?p=1")
-        matched_info = {"url": url, "db_key": bare_url(url), "bvid": bvid, "pid": pid, "platform": "ytdlp"}
+        return {"url": url, "db_key": bare_url(url), "bvid": bvid, "pid": pid, "platform": "ytdlp"}
 
     # https://www.youtube.com/watch?v=D6aE2E0RHTc
     if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/watch([^,,.。\s]+)", text):
         queries = parse_qs(urlparse(matched.group(0)).query)
         if vid := queries.get("v", [""])[0]:
-            matched_info = {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
+            return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
     # https://youtube.com/shorts/lFKHbluAlJw
     if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/shorts/([^,,.。?\s]+)", text):
         vid = matched.group(3)
-        matched_info = {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
+        return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
 
     # if all above pre-defined patterns failed, try to match ytdlp link
-    if not matched_info["platform"] and (urls := match_urls(text)):
+    if urls := match_urls(text):
         for url in urls:
-            if any(x in url.lower() for x in ["bilibili", "youtube"]):  # handled above
+            if any(x in url.lower() for x in ["bilibili", "douyin", "instagram", "tiktok", "twitter", "weibo", "xiaohongshu", "youtube"]):
+                # handled above
                 continue
             if is_supported_by_ytdlp(url):
-                matched_info = {"url": url, "db_key": bare_url(url), "platform": "ytdlp"}
-                break
-    if matched_info["platform"]:
-        logger.success(f"Matched: {matched_info}")
-    return matched_info
+                return {"url": url, "db_key": bare_url(url), "platform": "ytdlp"}
+    return {"platform": ""}
 
 
 @cache.memoize(ttl=60)
src/utils.py
@@ -276,3 +276,4 @@ if __name__ == "__main__":
     print(ascii_to_unicode("1.1"))
     print(ascii_to_unicode("test"))
     print(match_urls("http://a.com/BmT8gZ 匹配不到就删除了https://b.com/MxRdMO"))
+    print(is_supported_by_ytdlp("https://www.bilibili.com/video/BV15n61YtEmk"))