Commit 2a7d1b7

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-02-12 05:26:10
fix(bilibili): convert AV to BV ID
1 parent a0c29bf
Changed files (3)
src/preview/utils.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import re
+
+XOR_CODE = 23442827791579
+MASK_CODE = 2251799813685247
+MAX_AID = 1 << 51
+ALPHABET = "FcwAPNKTMug3GV5Lj7EJnHpWsx4tb8haYeviqBz6rkCy12mUSDQX9RdoZf"
+ENCODE_MAP = 8, 7, 0, 5, 1, 3, 2, 4, 6
+DECODE_MAP = tuple(reversed(ENCODE_MAP))
+
+BASE = len(ALPHABET)
+CODE_LEN = len(ENCODE_MAP)
+
+
+def av2bv(aid: int | str) -> str:
+    """Bilibili AV -> BV ID converter."""
+    aid = str(aid)
+    if aid[:3].upper() == "BV1":  # BV1Y4UHYyE2z
+        return aid
+    aid = int(aid[2:]) if aid[:2].lower() == "av" else int(aid)
+    bvid = [""] * 9
+    tmp = (MAX_AID | aid) ^ XOR_CODE
+    for i in range(CODE_LEN):
+        bvid[ENCODE_MAP[i]] = ALPHABET[tmp % BASE]
+        tmp //= BASE
+    return "BV1" + "".join(bvid)
+
+
+def bv2av(bvid: str | int) -> int:
+    """Bilibili BV -> AV ID converter."""
+    bvid = str(bvid)
+    if bvid[:2].lower() == "av":  # av113503016851915
+        return int(bvid[2:])
+    if bvid.isdigit():  # 113503016851915
+        return int(bvid)
+    assert bvid[:3].upper() == "BV1"
+    bvid = bvid[3:]
+    tmp = 0
+    for i in range(CODE_LEN):
+        idx = ALPHABET.index(bvid[DECODE_MAP[i]])
+        tmp = tmp * BASE + idx
+    return (tmp & MASK_CODE) ^ XOR_CODE
+
+
+# assert av2bv("av113503016851915") == "BV1Y4UHYyE2z"
+# assert av2bv("113503016851915") == "BV1Y4UHYyE2z"
+# assert av2bv(113503016851915) == "BV1Y4UHYyE2z"
+# assert av2bv("BV1Y4UHYyE2z") == "BV1Y4UHYyE2z"
+# assert bv2av("BV1Y4UHYyE2z") == 113503016851915
+# assert bv2av("113503016851915") == 113503016851915
+# assert bv2av("av113503016851915") == 113503016851915
+# assert bv2av(113503016851915) == 113503016851915
+
+
+def make_bvid_clickable(texts: str) -> str:
+    """Make bvid in texts clickable.
+
+    "BV1234567890" -> [BV1234567890](https://www.bilibili.com/video/BV1234567890)
+
+    bvid format: https://github.com/SocialSisterYi/bilibili-API-collect/blob/18c1efb/docs/misc/bvid_desc.md
+    Args:
+        texts (str): The texts to process.
+
+    Returns:
+        str: bvid with markdown url.
+    """
+    if not texts:
+        return ""
+
+    def markdown_url(match):
+        if match.group(1):  # full url
+            bvid = match.group(3)
+            return f"[{bvid}](https://www.bilibili.com/video/{bvid})"
+        # bvid only
+        bvid = match.group(0)
+        return f"[{bvid}](https://www.bilibili.com/video/{bvid})"
+
+    # match bilibili links or bvid only
+    pattern = r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/(BV1[a-zA-Z0-9]{9})\b|\bBV1[a-zA-Z0-9]{9}\b"
+    return re.sub(pattern, markdown_url, texts)
src/preview/ytdlp.py
@@ -3,7 +3,6 @@
 import asyncio
 import json
 import os
-import re
 import threading
 import time
 import warnings
@@ -27,6 +26,7 @@ from messages.utils import get_reply_to
 from multimedia import convert_to_h264, generate_cover
 from networking import hx_req
 from others.emoji import emojify
+from preview.utils import make_bvid_clickable
 from utils import readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
 
 
@@ -532,34 +532,6 @@ async def get_youtube_comments(vid: str | None, provider: str = PROVIDER.YOUTUBE
     return comments
 
 
-def make_bvid_clickable(texts: str) -> str:
-    """Make bvid in texts clickable.
-
-    "BV1234567890" -> [BV1234567890](https://www.bilibili.com/video/BV1234567890)
-
-    bvid format: https://github.com/SocialSisterYi/bilibili-API-collect/blob/18c1efb/docs/misc/bvid_desc.md
-    Args:
-        texts (str): The texts to process.
-
-    Returns:
-        str: bvid with markdown url.
-    """
-    if not texts:
-        return ""
-
-    def markdown_url(match):
-        if match.group(1):  # full url
-            bvid = match.group(3)
-            return f"[{bvid}](https://www.bilibili.com/video/{bvid})"
-        # bvid only
-        bvid = match.group(0)
-        return f"[{bvid}](https://www.bilibili.com/video/{bvid})"
-
-    # match bilibili links or bvid only
-    pattern = r"(https?://)?(:?m\.|www\.)?bilibili\.com/video/(BV1[a-zA-Z0-9]{9})\b|\bBV1[a-zA-Z0-9]{9}\b"
-    return re.sub(pattern, markdown_url, texts)
-
-
 def cleanup_ytdlp(vid: str):
     if not vid:
         return
src/networking.py
@@ -15,6 +15,7 @@ from loguru import logger
 from config import DOWNLOAD_DIR, PROXY, UA, cache, semaphore
 from messages.progress import modify_progress
 from messages.utils import summay_media
+from preview.utils import av2bv
 from utils import bare_url, check_data, https_url, is_supported_by_ytdlp, match_urls, readable_size
 
 # ruff: noqa: RUF001
@@ -341,7 +342,7 @@ async def match_social_media_link(text: str, *, flatten_first: bool = False) ->
         queries = parse_qs(urlparse(matched.group(0)).query)
         pid = queries.get("p", ["1"])[0]
         url = f"https://www.bilibili.com/video/{bvid}?p={pid}".removesuffix("?p=1")
-        return {"url": url, "db_key": bare_url(url), "bvid": bvid, "pid": pid, "platform": "bilibili"}
+        return {"url": url, "db_key": bare_url(url), "bvid": av2bv(bvid), "pid": pid, "platform": "bilibili"}
 
     # https://www.youtube.com/watch?v=D6aE2E0RHTc
     if matched := re.search(r"(https?://)?(:?m\.|www\.)?youtube\.com/watch([^,,.。\s]+)", text):