Commit 6a17b43
Changed files (1)
src/networking.py
@@ -379,12 +379,15 @@ async def match_social_media_link(text: str, *, flatten_first: bool = True) -> d
return {"url": url, "db_key": bare_url(url), "platform": "music163"}
# https://www.youtube.com/watch?v=D6aE2E0RHTc
- if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/watch([^,,.。\s]+)", text):
- queries = parse_qs(urlparse(matched.group(0)).query)
- if vid := queries.get("v", [""])[0]:
- return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "youtube"}
+ if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/watch.*?v=([a-zA-Z0-9_-]{11})", text):
+ vid = matched.group(3)
+ return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "youtube"}
# https://youtube.com/shorts/lFKHbluAlJw
- if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/shorts/([^,,.。?\s]+)", text):
+ if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/(:?shorts|live)/([a-zA-Z0-9_-]{11})", text):
+ vid = matched.group(4)
+ return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "youtube"}
+ # https://youtu.be/vOiP3kfFlrE
+ if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtu\.be/([a-zA-Z0-9_-]{11})", text):
vid = matched.group(3)
return {"url": f"https://www.youtube.com/watch?v={vid}", "db_key": f"www.youtube.com/watch?v={vid}", "vid": vid, "platform": "youtube"}
@@ -424,9 +427,6 @@ async def flatten_rediercts(texts: str | None = None, pattern: str | None = None
return ""
url = ""
- # youtu.be
- if matched := re.search(r"(https?://)?youtu\.be/([^.。,,?&/\s]+)", texts):
- url = matched.group(0)
# v.douyin.com
if matched := re.search(r"(https?://)?v\.douyin\.com/([^.。,,?&/\s]+)", texts):
method = "GET" # use GET for v.douyin.com
@@ -507,11 +507,14 @@ if __name__ == "__main__":
# asyncio.run(flatten_rediercts("https://v.douyin.com/CeiJfJMQG/"))
# asyncio.run(flatten_rediercts("https://www.tiktok.com/t/ZT2mcMA7f/"))
# asyncio.run(flatten_rediercts("https://t.co/Wwo3x69CQz"))
+ print(asyncio.run(match_social_media_link("https://www.youtube.com/watch?v=D6aE2E0RHTc")))
+ print(asyncio.run(match_social_media_link("https://youtube.com/shorts/lFKHbluAlJw")))
+ print(asyncio.run(match_social_media_link("https://youtu.be/vOiP3kfFlrE?si=zPd-Bt1GO03jxpI_")))
# res = asyncio.run(hx_req("https://httpbin.org/delay/10"))
# asyncio.run(hx_req("https://httpbin.org/get", check_kv={"url": "https://httpbin.org/get", "headers.Pragma": "no-cache1"}, max_retry=1))
# resp = asyncio.run(hx_req("https://httpbin.org/get", check_kv={"headers": {"Accept-Language": "en-US,en;q=0.8"}}))
- resp = asyncio.run(hx_req("https://httpbin.org/headers", headers={"referer": "https://www.xiaohongshu.com/"}))
- print(resp)
+ # resp = asyncio.run(hx_req("https://httpbin.org/headers", headers={"referer": "https://www.xiaohongshu.com/"}))
+ # print(resp)
# asyncio.run(download_file("https://httpbin.org/image/jpeg", suffix=".jpg"))
# asyncio.run(match_social_media_link("https://www.instagram.com/p/C7P3jN8vmEN"))