Commit 2c9c3fd

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-02-08 09:08:26
fix(weibo): use decimal post ID for cache key
1 parent d1f0088
Changed files (1)
src
preview
src/preview/weibo.py
@@ -50,6 +50,10 @@ async def preview_weibo(
     """
     if post_id.startswith("weibovideo"):  # disable comments for weibo video
         weibo_comments_provider = "0"
+        post_id = post_id.removeprefix("weibovideo")  # 1034:5123779299311660
+    else:
+        real_post_id = real_weibo_post_id(post_id)
+        db_key = db_key.replace(post_id, real_post_id)
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析微博链接\n{url}", **kwargs)
         kwargs["progress"] = res[0]
@@ -58,7 +62,6 @@ async def preview_weibo(
         if await copy_messages_from_db(client, message, key=url, kv=kv, **kwargs):
             return
         await modify_progress(text=f"❌从{DB.ENGINE}缓存中转发失败, 尝试重新解析...", **kwargs)
-
     this_info = await parse_weibo_info(post_id, **kwargs)
     if error_msg := this_info.get("error_msg"):
         if this_info.get("fallback", fallback):
@@ -298,3 +301,45 @@ async def parse_weibo_comments(post_id: str) -> list[str]:
     if len(comments) > 2:
         comments[-1] += "||"
     return comments
+
+
+def real_weibo_post_id(post_id: str) -> str:
+    """Convert weibo post ID from base62 to decimal format.
+
+    These are the same post:
+    - https://m.weibo.cn/detail/Pdlnlnt0E
+    - https://m.weibo.cn/status/5131804355593060
+
+    This function converts: "Pdlnlnt0E" -> "5131804355593060"
+
+    Args:
+        post_id (str): The base62 weibo post ID to convert.
+
+    Returns:
+        str: The decimal weibo post ID.
+
+    Reference:
+        https://blog.csdn.net/steven30832/article/details/8292230
+    """
+    if post_id.isdigit():
+        return post_id
+    mapping = {c: i for i, c in enumerate("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")}
+
+    def base62_to_b10(str_62):
+        value = 0
+        for s in str_62:
+            value = value * 62 + mapping[s]
+        return value
+
+    length = len(post_id)
+    mid = ""
+    group = int(length / 4)  # four characters per group
+    last_count = length % 4  # head group character counts
+
+    for loop in range(group):
+        value = base62_to_b10(post_id[length - (loop + 1) * 4 : length - loop * 4])
+        mid = str(value) + mid
+    if last_count:
+        value = base62_to_b10(post_id[: length - group * 4])
+        mid = str(value) + mid
+    return mid