bennybot/src/utils.py at main

  1#!/usr/bin/env python
  2# -*- coding: utf-8 -*-
  3import base64
  4import contextlib
  5import hashlib
  6import json
  7import os
  8import random
  9import re
 10import string
 11import tempfile
 12from datetime import UTC, datetime
 13from decimal import Decimal
 14from pathlib import Path
 15from typing import Any
 16from zoneinfo import ZoneInfo
 17
 18import chardet
 19import markdown
 20import puremagic
 21import zhconv
 22from bilibili_api.utils.aid_bvid_transformer import aid2bvid, bvid2aid
 23from bs4 import BeautifulSoup
 24from bs4.element import PageElement
 25from glom import PathAccessError, glom
 26from loguru import logger
 27from markitdown import MarkItDown
 28from pyrogram.client import Client
 29from pyrogram.types import User
 30from yt_dlp.extractor import gen_extractors
 31
 32from config import CLEAN_OLD_FILES_OLDER_THAN_SECONDS, DOWNLOAD_DIR, TZ, cache
 33
 34# ruff: noqa: RUF001
 35
 36
 37def nowdt(tz: str = "UTC") -> datetime:
 38    return datetime.now(ZoneInfo(tz))
 39
 40
 41def nowstr(tz: str = TZ) -> str:
 42    now = nowdt(tz)
 43    return f"{now:%Y-%m-%d %H:%M:%S}"
 44
 45
 46def number(n: float | str | Decimal, precision: int = -1, *, sign: bool = False) -> str:
 47    """Normalize a number to its simplest decimal.
 48
 49    Example:
 50    "1.2340000" -> "1.234"
 51    1.000000 -> "1"
 52    """
 53    n = Decimal(n)
 54    if precision == -1:  # auto precision (up to 8 decimal places)
 55        return f"{n:.8f}".rstrip("0").rstrip(".")
 56    if precision == 0:
 57        return f"{n:.0f}"
 58    return f"{n:+.{precision}f}" if sign else f"{n:.{precision}f}"
 59
 60
 61def split_parts(first: int = 0, middle: int = 0, last: int = 0) -> dict:
 62    """Split a list of items into three parts: first, middle, and last.
 63
 64    Useful for determine the number of media files in master / reply / quote posts.
 65    """
 66    data = {
 67        "first": f"🏞P1-P{first}",
 68        "middle": f"🏞P{first + 1}-P{first + middle}",
 69        "last": f"🏞P{first + middle + 1}-P{first + middle + last}",
 70    }
 71    for k, v in data.items():
 72        idx1, idx2 = (s.strip("🏞P") for s in v.split("-"))
 73        if int(idx1) > int(idx2):
 74            data[k] = ""
 75        elif int(idx1) == int(idx2):
 76            data[k] = f"🏞P{idx1}"
 77    return data
 78
 79
 80def to_int(var: str | float) -> str | int:
 81    """Convert a string or float to an integer."""
 82    try:
 83        return int(float(var))
 84    except (ValueError, TypeError):
 85        return str(var)
 86
 87
 88def read_text(path: str | Path) -> str:
 89    """Read text file with any encoding."""
 90    path = Path(path).expanduser().resolve()
 91    if not path.is_file():
 92        logger.warning(f"File not found: {path.name}")
 93        return ""
 94    try:
 95        return path.read_text(encoding="utf-8")
 96    except UnicodeDecodeError:
 97        with path.open("rb") as f:  # Open in binary mode for detection
 98            raw_data = f.read()
 99            result = chardet.detect(raw_data)
100            detected_encoding = result["encoding"]
101            if detected_encoding:
102                logger.success(f"File: `{path.name}` Encoding: {detected_encoding}")
103                return path.read_text(encoding=detected_encoding)
104    logger.warning(f"Could not detect encoding: {path.name}")
105    return ""
106
107
108def rand_string(length: int = 48) -> str:
109    return "".join(random.choices(string.ascii_letters + string.digits, k=length))
110
111
112def rand_number(length: int = 8) -> int:
113    return int("".join(random.choices(string.digits, k=length)))
114
115
116def true(value: Any) -> bool:
117    if not value:
118        return False
119    if isinstance(value, str):
120        return str(value).lower() not in {"0", "n", "na", "n/a", "no", "not", "f", "false", "off", "none", "null", "disable", "disabled"}
121    return True
122
123
124def sanitize_filename(filename: str, replacement: str = "_") -> str:
125    # 替换所有跨平台非法字符和ASCII控制字符(0-31)
126    illegal_chars = re.compile(r'[\\/:*?"<>|\x00-\x1f]')
127    cleaned = illegal_chars.sub(replacement, filename)
128
129    # 处理Windows特有的跨平台兼容性问题
130    # 1. 移除文件名末尾的点和空格（Windows会自动忽略，导致跨平台问题）
131    cleaned = cleaned.rstrip(". ")
132
133    # 2. 处理Windows系统保留名称（即使在Linux/macOS创建，Windows也无法访问）
134    reserved_names = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"}
135
136    # 检查是否为纯保留名称（不包含扩展名）
137    upper_cleaned = cleaned.upper()
138    if upper_cleaned in reserved_names:
139        cleaned = f"{cleaned}{replacement}"
140
141    # 处理清理后为空的情况
142    if not cleaned:
143        cleaned = replacement
144
145    return cleaned
146
147
148def digest(s: Any, length: int = 32, *, to_int: bool = False) -> str | int:
149    hasher = hashlib.shake_256()
150    if isinstance(s, Path) and s.is_file():
151        with open(s, "rb") as f:
152            for chunk in iter(lambda: f.read(65536), b""):
153                hasher.update(chunk)
154    elif isinstance(s, bytes):
155        hasher.update(s)
156    else:
157        hasher.update(str(s).encode())
158
159    raw_bytes = hasher.digest(length * 2)
160    b64_str = base64.urlsafe_b64encode(raw_bytes).decode("ascii")
161    b64_str = b64_str.replace("=", "").replace("-", "").replace("_", "")
162    if to_int:
163        b64_str = int.from_bytes(b64_str.encode("ascii"), byteorder="big")
164    return str(b64_str)[:length]
165
166
167def remove_none_values(d: dict | list) -> dict:
168    """Recursively removes keys with None values from a nested dictionary.
169
170    Cleans None values from lists and processes nested structures.
171
172    Args:
173        d (dict | list): The input dict or list
174
175    Returns:
176        dict: A cleaned dictionary or list with None values removed.
177    """
178    if isinstance(d, dict):  # If the input is a dictionary
179        cleaned_dict = {}
180        for key, value in d.items():
181            if isinstance(value, dict):
182                # Recursively clean nested dictionaries
183                nested_cleaned = remove_none_values(value)
184                if nested_cleaned:  # Only add non-empty cleaned dict
185                    cleaned_dict[key] = nested_cleaned
186            elif isinstance(value, list):
187                # Clean lists recursively
188                cleaned_list = [remove_none_values(item) if isinstance(item, dict | list) else item for item in value if item is not None]
189                if cleaned_list:  # Only add non-empty cleaned lists
190                    cleaned_dict[key] = cleaned_list
191            elif value is not None:
192                cleaned_dict[key] = value
193        return cleaned_dict
194    if isinstance(d, list):  # If the input is a list
195        return [remove_none_values(item) if isinstance(item, dict | list) else item for item in d if item is not None]  # type: ignore
196
197    return d  # Return non-dict, non-list values as is
198
199
200def soup_to_text(soup: PageElement) -> str:
201    text = ""
202    if not hasattr(soup, "children"):
203        return soup.text
204    for tag in soup.children:  # type: ignore
205        if tag.name == "img" and "alt" in tag.attrs:
206            text += tag["alt"]
207        elif tag.name == "br":
208            text += "\n"
209        elif hasattr(tag, "children"):
210            text += soup_to_text(tag)
211        else:
212            text += tag.text
213    return text
214
215
216def markdown_to_text(mkdown: str) -> str:
217    with contextlib.suppress(Exception):
218        html = markdown.markdown(mkdown).replace("\n", "<br>")
219        soup = BeautifulSoup(html, "html.parser")
220        return soup.get_text()
221    return mkdown
222
223
224def number_to_emoji(num: int | str, default: str | None = None) -> str:
225    """Convert a number to an emoji."""
226    num = str(num)
227    if default is None:
228        default = num
229    return {"0": "0️⃣", "1": "1️⃣", "2": "2️⃣", "3": "3️⃣", "4": "4️⃣", "5": "5️⃣", "6": "6️⃣", "7": "7️⃣", "8": "8️⃣", "9": "9️⃣", "10": "🔟"}.get(num, default)
230
231
232def seconds_to_hms(seconds: float | str) -> str:
233    """Convert seconds to hms format."""
234    seconds = int(float(seconds))
235    m, s = divmod(seconds, 60)
236    h, m = divmod(m, 60)
237    if h == 0:
238        return f"{m:02d}:{s:02d}"
239    return f"{h:02d}:{m:02d}:{s:02d}"
240
241
242def count_subtitles(texts: str) -> int:
243    """Count number of characters in texts after removing initial timestamp.
244
245    Args:
246        texts: Input string potentially containing [hh:ss] timestamps
247
248    Returns:
249        int: Character count after timestamp removal
250    """
251    cleaned_text = re.sub(r"^\[.*?\]\s?", "", texts, flags=re.MULTILINE)
252    return len(cleaned_text)
253
254
255def stringfy(d: dict) -> dict:
256    """Convert dict values to string.
257
258    Args:
259        d (dict | list): The input dict or list
260
261    Returns:
262        dict: A stringfy dictionary or list.
263    """
264    if isinstance(d, dict):  # If the input is a dictionary
265        stringfy_dict = {}
266        for key, value in d.items():
267            if isinstance(value, dict | list | set):
268                stringfy_dict[key] = json.dumps(value)
269            else:
270                stringfy_dict[key] = unicode_to_ascii(value)
271        return stringfy_dict
272    return d  # Return non-dict, non-list values as is
273
274
275def seconds_to_time(seconds: float) -> str:
276    """Seconds to time string.
277
278    100 -> "01:40"
279    1000 -> "16:40"
280    10000 -> "02:46:40"
281    100000 -> "27:46:40"
282    """
283    seconds = round(float(seconds))
284    m, s = divmod(seconds, 60)
285    h, m = divmod(m, 60)
286    if h:
287        return f"{h:02d}:{m:02d}:{s:02d}"
288    return f"{m:02d}:{s:02d}"
289
290
291def to_dt(t: float | str | datetime | None, tz="UTC") -> datetime:
292    """Convert float, str, datetime to datetime."""
293    if isinstance(t, datetime):
294        return t
295    if isinstance(t, float):
296        ts = round(t)
297        ts = ts / 10**6 if ts > 10**14 else ts
298        ts = ts / 10**3 if ts > 10**11 else ts
299        return datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(tz))
300    if not isinstance(t, str):
301        return nowdt(tz)
302    t = str(t).strip()
303    if len(t) == 4:  # 2026
304        return datetime.strptime(t, "%Y").astimezone(ZoneInfo(tz))
305    if len(t) == 7:  # 2026-02
306        return datetime.strptime(t, "%Y-%m").astimezone(ZoneInfo(tz))
307    if len(t) == 10:  # 2026-02-01
308        return datetime.strptime(t, "%Y-%m-%d").astimezone(ZoneInfo(tz))
309    if len(t) == 19:  # 2026-02-01 12:46:40
310        return datetime.strptime(t, "%Y-%m-%d %H:%M:%S").astimezone(ZoneInfo(tz))
311    return nowdt(tz)
312
313
314def readable_time(seconds: str | float) -> str:
315    """Human readable time duration.
316
317    100 -> "1m40s"
318    1000 -> "16m40s"
319    10000 -> "2h46m40s"
320    100000 -> "1d3h46m40s"
321    """
322    try:
323        seconds = float(seconds)
324    except ValueError:
325        # already in reachable time
326        return str(seconds)
327    if seconds < 60:
328        return f"{seconds:.0f}s"
329    if seconds < 3600:
330        minutes, seconds = divmod(seconds, 60)
331        return f"{minutes:.0f}m{seconds:.0f}s"
332    if seconds < 86400:
333        hours, seconds = divmod(seconds, 3600)
334        minutes, seconds = divmod(seconds, 60)
335        return f"{hours:.0f}h{minutes:.0f}m{seconds:.0f}s"
336    days, seconds = divmod(seconds, 86400)
337    hours, seconds = divmod(seconds, 3600)
338    minutes, seconds = divmod(seconds, 60)
339    return f"{days:.0f}d{hours:.0f}h{minutes:.0f}m{seconds:.0f}s"
340
341
342def readable_size(num_bytes: str | float = 0, path: str | Path | None = None) -> str:
343    """Human readable file size."""
344    num_bytes = Path(path).stat().st_size if path is not None else float(num_bytes)
345    # for unit in ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]:
346    for unit in ["B", "KB"]:
347        if abs(num_bytes) < 1024:
348            return f"{num_bytes:.1f} {unit}"
349        num_bytes /= 1024
350    return f"{num_bytes:.1f} MB"
351
352
353def readable_count(num: int | str) -> str:
354    count = to_int(num)
355    if not isinstance(count, int):
356        return str(num)
357    if count >= 100000:
358        return f"{count // 10000}万"
359    if count >= 10000:
360        m, n = divmod(count, 10000)
361        return f"{m}万" if n < 1000 else f"{m}.{n // 1000}万"
362    return str(count)
363
364
365def find_url(text: str) -> str:
366    if not isinstance(text, str):
367        return ""
368    regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
369    if matched := re.findall(regex, text):
370        url = matched[0][0]
371        logger.debug(f"URL found from message text: {url}")
372        return url
373    return ""
374
375
376def https_url(url: str) -> str:
377    return "https://" + str(url).removeprefix("https://").removeprefix("http://").lstrip("/").rstrip("/")
378
379
380def bare_url(url: str) -> str:
381    return str(url).removeprefix("https://").removeprefix("http://").lstrip("/").rstrip("/")
382
383
384def ts_to_dt(ts: str | float | None) -> datetime | None:
385    if not ts:
386        return None
387
388    try:  # not number
389        ts = float(ts)
390    except ValueError:
391        return None
392
393    if 0 < float(ts) < 1:
394        return None
395    try:
396        return datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(TZ))
397    except Exception as e:
398        if "out of range" in str(e):
399            return ts_to_dt(ts / 1000)
400        logger.error(e)
401        return None
402
403
404def slim_cid(cid: int | str) -> str:
405    return str(cid).strip().removeprefix("-100")
406
407
408def strings_list(value: str | None = None, *, env_key: str = "", separator: str = ",", shuffle: bool = False) -> list[str]:
409    """Get list from environment variable."""
410    if value is None:
411        value = os.getenv(env_key, "")
412    results = [s.strip() for s in value.split(separator) if s.strip()]
413    if shuffle:
414        random.shuffle(results)
415    return results
416
417
418def parse_time(timestr: str) -> dict[str, int]:
419    """Parse time string.
420
421    Support formats:
422        length= 4: yyyy
423        length= 6: yyyymm
424        length= 7: yyyy-mm
425        length= 8: yyyymmdd
426        length=10: yyyy-mm-dd
427        length=14: yyyymmddHHMMSS
428        length=15: yyyymmdd-HHMMSS
429        length=17: yyyymmdd HH:MM:SS
430        length=19: yyyy-mm-dd HH:MM:SS
431
432    Returns:
433    {"year": int, "month": int, "day": int, "hour": int, "minute": int, "second": int}
434    """
435    res = {"year": 0, "month": 0, "day": 0, "hour": 0, "minute": 0, "second": 0}
436    if not timestr:
437        return {}
438    if len(timestr) not in [4, 6, 7, 8, 10, 14, 15, 19]:
439        logger.warning(f"Invalid time format: {timestr}")
440        return res
441
442    # first 4 digits are year
443    res["year"] = int(timestr[:4])
444    if len(timestr) == 6:  # yyyymm
445        res["month"] = int(timestr[4:6])
446    elif len(timestr) == 7:  # yyyy-mm
447        res["month"] = int(timestr[5:7])
448    elif len(timestr) == 8:  # yyyymmdd
449        res["month"] = int(timestr[4:6])
450        res["day"] = int(timestr[6:8])
451    elif len(timestr) == 10:  # yyyy-mm-dd
452        res["month"] = int(timestr[5:7])
453        res["day"] = int(timestr[8:10])
454    elif len(timestr) == 14:  # yyyymmddHHMMSS
455        res["month"] = int(timestr[4:6])
456        res["day"] = int(timestr[6:8])
457        res["hour"] = int(timestr[8:10])
458        res["minute"] = int(timestr[10:12])
459        res["second"] = int(timestr[12:14])
460    elif len(timestr) == 15:  # yyyymmdd-HHMMSS
461        res["month"] = int(timestr[4:6])
462        res["day"] = int(timestr[6:8])
463        res["hour"] = int(timestr[9:11])
464        res["minute"] = int(timestr[11:13])
465        res["second"] = int(timestr[13:15])
466    elif len(timestr) == 17:  # yyyymmdd HH:MM:SS
467        res["month"] = int(timestr[4:6])
468        res["day"] = int(timestr[6:8])
469        res["hour"] = int(timestr[9:11])
470        res["minute"] = int(timestr[12:14])
471        res["second"] = int(timestr[15:17])
472    elif len(timestr) == 19:  # yyyy-mm-dd HH:MM:SS
473        res["month"] = int(timestr[5:7])
474        res["day"] = int(timestr[8:10])
475        res["hour"] = int(timestr[11:13])
476        res["minute"] = int(timestr[14:16])
477        res["second"] = int(timestr[17:19])
478    return res
479
480
481async def myself(client: Client) -> User:
482    """Get myself info."""
483    if cache.get("me"):
484        return cache.get("me")
485    try:
486        me = await client.get_me()
487    except Exception as e:
488        logger.error(e)
489        return User(id=1, is_bot=False)
490    cache.set("me", me, ttl=0)
491    return me
492
493
494async def i_am_bot(client: Client) -> bool:
495    """Check if this clinet is a bot or not."""
496    if cache.get("i_am_bot"):
497        return cache.get("i_am_bot")
498    try:
499        me = await myself(client)
500    except Exception as e:
501        logger.error(e)
502        return False
503    cache.set("i_am_bot", me.is_bot, ttl=0)
504    return me.is_bot
505
506
507def match_urls(text: str) -> list[str]:
508    """Match all urls in a text."""
509    res = re.findall(
510        r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
511        str(text),
512    )
513    return [https_url(x[0]) for x in res]
514
515
516def remove_dash(text: str) -> str:
517    if not text:
518        return ""
519    while "---" in text:
520        text = text.replace("---", "")
521    while "--" in text:
522        text = text.replace("--", "")
523    return text
524
525
526def remove_pound(text: str) -> str:
527    if not text:
528        return ""
529    while "# " in text:
530        text = text.replace("# ", " ")
531    return text
532
533
534def remove_consecutive_newlines(text: str, newline_level: int = 3) -> str:
535    if not text:
536        return ""
537    while "\n\n\n" in text:
538        text = text.replace("\n\n\n", "\n\n")
539    if newline_level == 2:
540        while "\n\n" in text:
541            text = text.replace("\n\n", "\n")
542    return text
543
544
545def is_supported_by_ytdlp(url: str) -> bool:
546    """Check if this url is supported by ytdlp."""
547    if "t.me" in url:  # tg link
548        return False
549    extractors = gen_extractors()
550    extractors = [e for e in extractors if e.IE_NAME != "generic"]  # filter out generic extractors
551    return any(extractor.suitable(url) for extractor in extractors)
552
553
554def guess_mime(path: str | Path) -> str:
555    path = Path(path).expanduser().resolve()
556    if not path.is_file():
557        return ""
558    with contextlib.suppress(Exception):
559        import magic  # magic needs `libmagic` to be installed.
560
561        # `sudo apt-get install libmagic1` or `brew install libmagic`
562        return magic.from_file(path, mime=True)
563
564    # infer from `magic` failed
565    with contextlib.suppress(Exception):
566        return puremagic.from_file(path, mime=True)
567    return ""
568
569
570def unicode_to_ascii(text: str | float) -> str:
571    if not text:
572        return ""
573    return str(text).encode("unicode_escape").decode("ascii")
574
575
576def ascii_to_unicode(text: str) -> str:
577    if not text:
578        return ""
579    return bytes(str(text), "ascii").decode("unicode_escape")
580
581
582def save_txt(text: str, path: Path | str | None = None) -> str:
583    if path is None:
584        path = Path(DOWNLOAD_DIR) / f"{rand_string()}.txt"
585    Path(path).write_text(text)
586    return Path(path).as_posix()
587
588
589def check_data(text: str, check_keys: list[str] | None = None, check_kv: dict | None = None):
590    """Check if data contains required keys and key-value pairs.
591
592    Example data:
593    {
594        "foo": "bar",
595        "baz": {
596            "qux": "quux"
597        },
598        "lst": ["1", "2", "3"]
599    }
600
601    check_keys: ["foo", "baz.qux", "lst"]
602    check_kv: {"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]}
603    """
604    if not check_keys and not check_kv:  # no need to check
605        return
606    try:
607        data = json.loads(text)
608    except json.JSONDecodeError:
609        logger.error(f"Failed to parse data as json: {text}")
610        raise
611
612    # ["foo", "baz.qux", "lst"]
613    if check_keys:
614        for key in check_keys:
615            try:
616                glom(data, key)
617            except PathAccessError as e:
618                logger.error(e)
619                raise
620
621    # {"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]}
622    if check_kv:
623        for key, required_value in check_kv.items():
624            try:
625                value = glom(data, key)
626            except PathAccessError as e:
627                logger.error(e)
628                raise
629
630            if str(value) != str(required_value):  # convert to str to compare
631                msg = f"{data=}, {key=}, {value=}, but required: {required_value}"
632                logger.error(msg)
633                raise ValueError
634
635
636def cleanup_old_files(root: Path | str | None = None, duration: int = CLEAN_OLD_FILES_OLDER_THAN_SECONDS) -> None:
637    """Clean up files older than duration seconds."""
638    if root is None:
639        root = DOWNLOAD_DIR
640    root = Path(root).expanduser().resolve()
641    if not root.is_dir():
642        return
643    now = datetime.now(UTC).timestamp()
644    for path in root.glob("*"):
645        if not path.is_file():
646            continue
647        if all(now - x > duration for x in [path.stat().st_atime, path.stat().st_mtime]):
648            logger.warning(f"Deleting old file: {path}")
649            path.unlink(missing_ok=True)
650
651
652def convert2md(*, html: str | None = None, path: str | Path | None = None) -> str:
653    """Convert html or local file to markdown format."""
654    md = MarkItDown()
655    if path is not None:
656        path = Path(path).expanduser().resolve()
657        if not path.is_file():
658            return ""
659        result = md.convert(path)
660        return result.text_content
661    if html is not None:
662        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
663            f.write(html)
664        result = md.convert(f.name)
665        Path(f.name).unlink(missing_ok=True)
666        return result.text_content
667    return ""
668
669
670def convert2html(texts: str = "") -> str:
671    """Convert texts to html format."""
672    if not isinstance(texts, str) or not str(texts).strip():
673        return ""
674    texts = markdown.markdown(texts)
675    return texts.replace("\n", "<br>")
676
677
678def av2bv(aid: int | str) -> str:
679    """Bilibili AV -> BV ID converter."""
680    aid = str(aid)
681    if aid[:3].upper() == "BV1":  # BV1Y4UHYyE2z
682        return aid
683    aid = int(aid[2:]) if aid[:2].lower() == "av" else int(aid)
684    return aid2bvid(aid)
685
686
687def bv2av(bvid: str | int) -> int:
688    """Bilibili BV -> AV ID converter."""
689    bvid = str(bvid)
690    if bvid[:2].lower() == "av":  # av113503016851915
691        return int(bvid[2:])
692    if bvid.isdigit():  # 113503016851915
693        return int(bvid)
694    assert bvid[:3].upper() == "BV1"
695    return bvid2aid(bvid)
696
697
698def zhcn(text: str) -> str:
699    """Convert zh-tw to zh-cn."""
700    return zhconv.convert_for_mw(text, locale="zh-cn")
701
702
703if __name__ == "__main__":
704    print(digest("1"))
705    print(rand_string())
706    print(rand_number())
707    # print(cleanup_old_files())
708    print(readable_size(0))
709    print(readable_size(2000 * 1024 * 1024))
710    print(unicode_to_ascii("你好"))
711    print(unicode_to_ascii(1.1))
712    print(unicode_to_ascii("test"))
713    print(ascii_to_unicode("1.1"))
714    print(ascii_to_unicode("test"))
715    print(match_urls("http://a.com/BmT8gZ 匹配不到就删除了https://b.com/MxRdMO"))
716    print(is_supported_by_ytdlp("https://www.bilibili.com/video/BV15n61YtEmk"))
717    print(is_supported_by_ytdlp("https://t.me/c/1744444199/2475260"))
718    print(is_supported_by_ytdlp("https://test.com/"))
719    print(find_url("https://test.com/"))
720    print(find_url("test.com/"))
721
722    # assert av2bv("av113503016851915") == "BV1Y4UHYyE2z"
723    # assert av2bv("113503016851915") == "BV1Y4UHYyE2z"
724    # assert av2bv(113503016851915) == "BV1Y4UHYyE2z"
725    # assert av2bv("BV1Y4UHYyE2z") == "BV1Y4UHYyE2z"
726    # assert bv2av("BV1Y4UHYyE2z") == 113503016851915
727    # assert bv2av("113503016851915") == 113503016851915
728    # assert bv2av("av113503016851915") == 113503016851915
729    # assert bv2av(113503016851915) == 113503016851915