Commit 34c298b
Changed files (3)
src/handler.py
@@ -168,6 +168,8 @@ async def handle_social_media(
kwargs["send_from_user"] = f"👤[@{info['full_name']}](tg://user?id={info['uid']})//"
try:
matched = await match_social_media_link(info["text"], flatten_first=True) # match "platform" and "url" (and other info)
+ if matched["platform"]:
+ logger.success(f"Matched: {matched}")
kwargs |= matched
if startswith_prefix(this_texts, prefix=["/retry"], ignore_prefix=ignore_prefix):
await del_db(matched["db_key"])
src/networking.py
@@ -316,27 +316,26 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
text = str(text)
if flatten_first:
text = await flatten_rediercts(text)
- matched_info = {"platform": ""}
# https://www.douyin.com/video/7398813386827468041
# https://www.douyin.com/note/7458195074434846004
# https://www.iesdouyin.com/share/video/7454527270925946138/
# https://www.iesdouyin.com/share/note/7454527270925946138/
if matched := re.search(r"(https?://)?(www\.)?(ies)?douyin\.com/(share/)?(:?|video|note)/(\d+)", text):
- matched_info = {"url": f"https://www.douyin.com/{matched.group(5)}/{matched.group(6)}", "db_key": f"www.douyin.com/{matched.group(5)}/{matched.group(6)}", "platform": "douyin"}
+ return {"url": f"https://www.douyin.com/{matched.group(5)}/{matched.group(6)}", "db_key": f"www.douyin.com/{matched.group(5)}/{matched.group(6)}", "platform": "douyin"}
# https://www.douyin.com/user/MS4wLjABAAAAXgBuOEcyavDhrRBqnD8x7d4pj7RIL5QFRlLehCnem8couoAg8yXR-MGhUK0i4riF?modal_id=7451543857952492810
if matched := re.search(r"(https?://)?(www\.)?douyin\.com/user/(.*?)\?(.*?)modal_id=(\d+)", text):
- matched_info = {"url": f"https://www.douyin.com/video/{matched.group(5)}", "db_key": f"www.douyin.com/video/{matched.group(5)}", "platform": "douyin"}
+ return {"url": f"https://www.douyin.com/video/{matched.group(5)}", "db_key": f"www.douyin.com/video/{matched.group(5)}", "platform": "douyin"}
# https://www.tiktok.com/@baymermel/video/7460653893941267755\?_t\=ZS-8t8YbVWqv5k\&_r\=1
if matched := re.search(r"(https?://)?(www\.)?tiktok\.com/(.*?)/(\d+)", text):
- matched_info = {"url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "tiktok"}
+ return {"url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "tiktok"}
# https://www.instagram.com/p/C7P3jN8vmEN
# https://www.instagram.com/reel/DBBEGXpvwNF
if matched := re.search(r"(https?://)?(www\.)?instagram\.com/(:?|p|reel)/([^.。,,/\s]+)", text):
- matched_info = {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
+ return {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
# https://www.instagram.com/yifaer_chen/p/DEzv9x-vzOn/
if matched := re.search(r"(https?://)?(www\.)?instagram\.com/\w+/(:?|p|reel)/([^.。,,/\s]+)", text):
- matched_info = {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
+ return {"post_type": matched.group(3), "post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "instagram"}
# https://x.com/taylorswift13/status/1794805688696275131
# https://twitter.com/taylorswift13/status/1794805688696275131
@@ -347,21 +346,21 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
handle = matched.group(3)
post_id = matched.group(4)
url = f"https://x.com/{handle}/status/{post_id}"
- matched_info = {"platform": platform, "handle": handle, "post_id": post_id, "url": url, "db_key": bare_url(url)}
+ return {"platform": platform, "handle": handle, "post_id": post_id, "url": url, "db_key": bare_url(url)}
# https://weibo.com/1736562685/P6lhSjRnI
if matched := re.search(r"(https?://)?(www\.)?weibo\.com/(.*?)/(\w+)", text):
- matched_info = {"post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(4)}", "platform": "weibo"}
+ return {"post_id": matched.group(4), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(4)}", "platform": "weibo"}
# https://m.weibo.cn/detail/5113333048938691
# https://m.weibo.cn/status/5113333048938691
if matched := re.search(r"(https?://)?m\.weibo\.cn/(:?detail|status)/(\w+)", text):
- matched_info = {"post_id": matched.group(3), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(3)}", "platform": "weibo"}
+ return {"post_id": matched.group(3), "url": https_url(matched.group(0)), "db_key": f"m.weibo.cn/detail/{matched.group(3)}", "platform": "weibo"}
# https://video.weibo.com/show?fid=1034:5123779299311660
if matched := re.search(r"(https?://)?video\.weibo\.(:?com|cn)/show\?fid=(\d+):(\d+)", text):
- matched_info = {"post_id": f"weibovideo{matched.group(3)}:{matched.group(4)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
+ return {"post_id": f"weibovideo{matched.group(3)}:{matched.group(4)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
# https://weibo.com/tv/show/1034:5123779299311660?from=old_pc_videoshow
if matched := re.search(r"(https?://)?(www\.)?weibo\.(:?com|cn)/tv/show/(\d+):(\d+)", text):
- matched_info = {"post_id": f"weibovideo{matched.group(4)}:{matched.group(5)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
+ return {"post_id": f"weibovideo{matched.group(4)}:{matched.group(5)}", "url": https_url(matched.group(0)), "db_key": bare_url(matched.group(0)), "platform": "weibo"}
# http://xhslink.com/a/Z3VPXAReU1Y1
xhs_pattern = r"(https?://)?xhslink\.com/(\w?/?)([^,,.。?\s]+)"
@@ -371,14 +370,14 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
post_id = Path(base_url).stem
queries = parse_qs(urlparse(flatten).query)
xsec = queries.get("xsec_token", [""])[0]
- matched_info = {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
+ return {"url": https_url(matched.group(0)), "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
# https://www.xiaohongshu.com/explore/671a3dfe00000000240161db?xsec_token=ABY-b1JKuAlIm2dX1OSdIFHD7cQFHEdThv5aMyccvmbJo=
if matched := re.search(r"(https?://)?(www\.)?xiaohongshu\.com/([^.。,,\s]+)", text):
base_url = matched.group(0).split("?")[0]
post_id = Path(base_url).stem
queries = parse_qs(urlparse(matched.group(0)).query)
xsec = queries.get("xsec_token", [""])[0]
- matched_info = {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
+ return {"url": f"https://www.xiaohongshu.com/explore/{post_id}?xsec_token={xsec}", "db_key": f"www.xiaohongshu.com/explore/{post_id}", "xsec": xsec, "platform": "xiaohongshu"}
# https://www.bilibili.com/video/BV1TC411J7PK
if matched := re.search(r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/([^,,.。\s]+)", text):
@@ -387,29 +386,27 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
queries = parse_qs(urlparse(matched.group(0)).query)
pid = queries.get("p", ["1"])[0]
url = f"https://www.bilibili.com/video/{bvid}?p={pid}".removesuffix("?p=1")
- matched_info = {"url": url, "db_key": bare_url(url), "bvid": bvid, "pid": pid, "platform": "ytdlp"}
+ return {"url": url, "db_key": bare_url(url), "bvid": bvid, "pid": pid, "platform": "ytdlp"}
# https://www.youtube.com/watch?v=D6aE2E0RHTc
if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/watch([^,,.。\s]+)", text):
queries = parse_qs(urlparse(matched.group(0)).query)
if vid := queries.get("v", [""])[0]:
- matched_info = {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
+ return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
# https://youtube.com/shorts/lFKHbluAlJw
if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/shorts/([^,,.。?\s]+)", text):
vid = matched.group(3)
- matched_info = {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
+ return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "ytdlp"}
# if all above pre-defined patterns failed, try to match ytdlp link
- if not matched_info["platform"] and (urls := match_urls(text)):
+ if urls := match_urls(text):
for url in urls:
- if any(x in url.lower() for x in ["bilibili", "youtube"]): # handled above
+ if any(x in url.lower() for x in ["bilibili", "douyin", "instagram", "tiktok", "twitter", "weibo", "xiaohongshu", "youtube"]):
+ # handled above
continue
if is_supported_by_ytdlp(url):
- matched_info = {"url": url, "db_key": bare_url(url), "platform": "ytdlp"}
- break
- if matched_info["platform"]:
- logger.success(f"Matched: {matched_info}")
- return matched_info
+ return {"url": url, "db_key": bare_url(url), "platform": "ytdlp"}
+ return {"platform": ""}
@cache.memoize(ttl=60)
src/utils.py
@@ -276,3 +276,4 @@ if __name__ == "__main__":
print(ascii_to_unicode("1.1"))
print(ascii_to_unicode("test"))
print(match_urls("http://a.com/BmT8gZ 匹配不到就删除了https://b.com/MxRdMO"))
+ print(is_supported_by_ytdlp("https://www.bilibili.com/video/BV15n61YtEmk"))