Commit 290890a

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-12 07:43:44
fix(reddit): fix regex for reddit link
1 parent 45ece97
Changed files (1)
src/networking.py
@@ -368,6 +368,11 @@ async def match_social_media_link(text: str, *, flatten_first: bool = True) -> d
     if matched := re.search(r"(https?://)?mp.weixin.qq.com/s[\/|\?]{1}([_A-Za-z\=\&0-9\#\-]+)", text):
         return {"url": matched.group(0), "db_key": bare_url(matched.group(0)), "platform": "wechat"}
 
+    # !Put this before all reddit rules
+    # https://www.reddit.com/r/China_irl/s/bA50WleCBM
+    reddit_pattern = r"(https?://)?(:?m\.|www\.)reddit\.com/r/\w+/s/([^.。,,?&/\s]+)"
+    if matched := re.search(reddit_pattern, text):
+        text = await flatten_rediercts(https_url(matched.group(0)), pattern=reddit_pattern, proxy=PROXY.REDDIT)
     # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/%E8%B5%B5%E8%96%87%E4%BB%80%E4%B9%88%E6%97%B6%E5%80%99%E5%9B%9E%E6%9D%A5/
     # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/赵薇什么时候回来
     # https://www.reddit.com/r/DoubanGoosegroup/comments/1jkpgvp/comment/mk43l4t/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button
@@ -461,7 +466,8 @@ if __name__ == "__main__":
     check_data(json.dumps({"foo": "bar", "baz": {"qux": "quux"}, "lst": ["1", "2", "3"]}), check_keys=["baz.qux"], check_kv={"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]})
     # asyncio.run(match_social_media_link("https://b23.tv/3MSgT4q/", flatten_first=True))
     # print(asyncio.run(match_social_media_link("https://mp.weixin.qq.com/s/bd_giuPEyPBu9LTOtC2VHw", flatten_first=True)))
-    print(asyncio.run(match_social_media_link("https://reddit.com/comments/1kaazzn", flatten_first=True)))
+    # print(asyncio.run(match_social_media_link("https://reddit.com/comments/1kaazzn", flatten_first=True)))
+    print(asyncio.run(match_social_media_link("https://www.reddit.com/r/China_irl/s/bA50WleCBM")))
     # asyncio.run(match_social_media_link("https://www.facebook.com/share/r/19QGGp39T3/", flatten_first=True))
     # asyncio.run(match_social_media_link("https://www.douyin.com/video/7398813386827468041"))
     # asyncio.run(match_social_media_link("https://www.iesdouyin.com/share/note/7454527270925946138/"))