main
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3import base64
4import contextlib
5import hashlib
6import json
7import os
8import random
9import re
10import string
11import tempfile
12from datetime import UTC, datetime
13from decimal import Decimal
14from pathlib import Path
15from typing import Any
16from zoneinfo import ZoneInfo
17
18import chardet
19import markdown
20import puremagic
21import zhconv
22from bilibili_api.utils.aid_bvid_transformer import aid2bvid, bvid2aid
23from bs4 import BeautifulSoup
24from bs4.element import PageElement
25from glom import PathAccessError, glom
26from loguru import logger
27from markitdown import MarkItDown
28from pyrogram.client import Client
29from pyrogram.types import User
30from yt_dlp.extractor import gen_extractors
31
32from config import CLEAN_OLD_FILES_OLDER_THAN_SECONDS, DOWNLOAD_DIR, TZ, cache
33
34# ruff: noqa: RUF001
35
36
37def nowdt(tz: str = "UTC") -> datetime:
38 return datetime.now(ZoneInfo(tz))
39
40
41def nowstr(tz: str = TZ) -> str:
42 now = nowdt(tz)
43 return f"{now:%Y-%m-%d %H:%M:%S}"
44
45
46def number(n: float | str | Decimal, precision: int = -1, *, sign: bool = False) -> str:
47 """Normalize a number to its simplest decimal.
48
49 Example:
50 "1.2340000" -> "1.234"
51 1.000000 -> "1"
52 """
53 n = Decimal(n)
54 if precision == -1: # auto precision (up to 8 decimal places)
55 return f"{n:.8f}".rstrip("0").rstrip(".")
56 if precision == 0:
57 return f"{n:.0f}"
58 return f"{n:+.{precision}f}" if sign else f"{n:.{precision}f}"
59
60
61def split_parts(first: int = 0, middle: int = 0, last: int = 0) -> dict:
62 """Split a list of items into three parts: first, middle, and last.
63
64 Useful for determine the number of media files in master / reply / quote posts.
65 """
66 data = {
67 "first": f"🏞P1-P{first}",
68 "middle": f"🏞P{first + 1}-P{first + middle}",
69 "last": f"🏞P{first + middle + 1}-P{first + middle + last}",
70 }
71 for k, v in data.items():
72 idx1, idx2 = (s.strip("🏞P") for s in v.split("-"))
73 if int(idx1) > int(idx2):
74 data[k] = ""
75 elif int(idx1) == int(idx2):
76 data[k] = f"🏞P{idx1}"
77 return data
78
79
80def to_int(var: str | float) -> str | int:
81 """Convert a string or float to an integer."""
82 try:
83 return int(float(var))
84 except (ValueError, TypeError):
85 return str(var)
86
87
88def read_text(path: str | Path) -> str:
89 """Read text file with any encoding."""
90 path = Path(path).expanduser().resolve()
91 if not path.is_file():
92 logger.warning(f"File not found: {path.name}")
93 return ""
94 try:
95 return path.read_text(encoding="utf-8")
96 except UnicodeDecodeError:
97 with path.open("rb") as f: # Open in binary mode for detection
98 raw_data = f.read()
99 result = chardet.detect(raw_data)
100 detected_encoding = result["encoding"]
101 if detected_encoding:
102 logger.success(f"File: `{path.name}` Encoding: {detected_encoding}")
103 return path.read_text(encoding=detected_encoding)
104 logger.warning(f"Could not detect encoding: {path.name}")
105 return ""
106
107
108def rand_string(length: int = 48) -> str:
109 return "".join(random.choices(string.ascii_letters + string.digits, k=length))
110
111
112def rand_number(length: int = 8) -> int:
113 return int("".join(random.choices(string.digits, k=length)))
114
115
116def true(value: Any) -> bool:
117 if not value:
118 return False
119 if isinstance(value, str):
120 return str(value).lower() not in {"0", "n", "na", "n/a", "no", "not", "f", "false", "off", "none", "null", "disable", "disabled"}
121 return True
122
123
124def sanitize_filename(filename: str, replacement: str = "_") -> str:
125 # 替换所有跨平台非法字符和ASCII控制字符(0-31)
126 illegal_chars = re.compile(r'[\\/:*?"<>|\x00-\x1f]')
127 cleaned = illegal_chars.sub(replacement, filename)
128
129 # 处理Windows特有的跨平台兼容性问题
130 # 1. 移除文件名末尾的点和空格(Windows会自动忽略,导致跨平台问题)
131 cleaned = cleaned.rstrip(". ")
132
133 # 2. 处理Windows系统保留名称(即使在Linux/macOS创建,Windows也无法访问)
134 reserved_names = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"}
135
136 # 检查是否为纯保留名称(不包含扩展名)
137 upper_cleaned = cleaned.upper()
138 if upper_cleaned in reserved_names:
139 cleaned = f"{cleaned}{replacement}"
140
141 # 处理清理后为空的情况
142 if not cleaned:
143 cleaned = replacement
144
145 return cleaned
146
147
148def digest(s: Any, length: int = 32, *, to_int: bool = False) -> str | int:
149 hasher = hashlib.shake_256()
150 if isinstance(s, Path) and s.is_file():
151 with open(s, "rb") as f:
152 for chunk in iter(lambda: f.read(65536), b""):
153 hasher.update(chunk)
154 elif isinstance(s, bytes):
155 hasher.update(s)
156 else:
157 hasher.update(str(s).encode())
158
159 raw_bytes = hasher.digest(length * 2)
160 b64_str = base64.urlsafe_b64encode(raw_bytes).decode("ascii")
161 b64_str = b64_str.replace("=", "").replace("-", "").replace("_", "")
162 if to_int:
163 b64_str = int.from_bytes(b64_str.encode("ascii"), byteorder="big")
164 return str(b64_str)[:length]
165
166
167def remove_none_values(d: dict | list) -> dict:
168 """Recursively removes keys with None values from a nested dictionary.
169
170 Cleans None values from lists and processes nested structures.
171
172 Args:
173 d (dict | list): The input dict or list
174
175 Returns:
176 dict: A cleaned dictionary or list with None values removed.
177 """
178 if isinstance(d, dict): # If the input is a dictionary
179 cleaned_dict = {}
180 for key, value in d.items():
181 if isinstance(value, dict):
182 # Recursively clean nested dictionaries
183 nested_cleaned = remove_none_values(value)
184 if nested_cleaned: # Only add non-empty cleaned dict
185 cleaned_dict[key] = nested_cleaned
186 elif isinstance(value, list):
187 # Clean lists recursively
188 cleaned_list = [remove_none_values(item) if isinstance(item, dict | list) else item for item in value if item is not None]
189 if cleaned_list: # Only add non-empty cleaned lists
190 cleaned_dict[key] = cleaned_list
191 elif value is not None:
192 cleaned_dict[key] = value
193 return cleaned_dict
194 if isinstance(d, list): # If the input is a list
195 return [remove_none_values(item) if isinstance(item, dict | list) else item for item in d if item is not None] # type: ignore
196
197 return d # Return non-dict, non-list values as is
198
199
200def soup_to_text(soup: PageElement) -> str:
201 text = ""
202 if not hasattr(soup, "children"):
203 return soup.text
204 for tag in soup.children: # type: ignore
205 if tag.name == "img" and "alt" in tag.attrs:
206 text += tag["alt"]
207 elif tag.name == "br":
208 text += "\n"
209 elif hasattr(tag, "children"):
210 text += soup_to_text(tag)
211 else:
212 text += tag.text
213 return text
214
215
216def markdown_to_text(mkdown: str) -> str:
217 with contextlib.suppress(Exception):
218 html = markdown.markdown(mkdown).replace("\n", "<br>")
219 soup = BeautifulSoup(html, "html.parser")
220 return soup.get_text()
221 return mkdown
222
223
224def number_to_emoji(num: int | str, default: str | None = None) -> str:
225 """Convert a number to an emoji."""
226 num = str(num)
227 if default is None:
228 default = num
229 return {"0": "0️⃣", "1": "1️⃣", "2": "2️⃣", "3": "3️⃣", "4": "4️⃣", "5": "5️⃣", "6": "6️⃣", "7": "7️⃣", "8": "8️⃣", "9": "9️⃣", "10": "🔟"}.get(num, default)
230
231
232def seconds_to_hms(seconds: float | str) -> str:
233 """Convert seconds to hms format."""
234 seconds = int(float(seconds))
235 m, s = divmod(seconds, 60)
236 h, m = divmod(m, 60)
237 if h == 0:
238 return f"{m:02d}:{s:02d}"
239 return f"{h:02d}:{m:02d}:{s:02d}"
240
241
242def count_subtitles(texts: str) -> int:
243 """Count number of characters in texts after removing initial timestamp.
244
245 Args:
246 texts: Input string potentially containing [hh:ss] timestamps
247
248 Returns:
249 int: Character count after timestamp removal
250 """
251 cleaned_text = re.sub(r"^\[.*?\]\s?", "", texts, flags=re.MULTILINE)
252 return len(cleaned_text)
253
254
255def stringfy(d: dict) -> dict:
256 """Convert dict values to string.
257
258 Args:
259 d (dict | list): The input dict or list
260
261 Returns:
262 dict: A stringfy dictionary or list.
263 """
264 if isinstance(d, dict): # If the input is a dictionary
265 stringfy_dict = {}
266 for key, value in d.items():
267 if isinstance(value, dict | list | set):
268 stringfy_dict[key] = json.dumps(value)
269 else:
270 stringfy_dict[key] = unicode_to_ascii(value)
271 return stringfy_dict
272 return d # Return non-dict, non-list values as is
273
274
275def seconds_to_time(seconds: float) -> str:
276 """Seconds to time string.
277
278 100 -> "01:40"
279 1000 -> "16:40"
280 10000 -> "02:46:40"
281 100000 -> "27:46:40"
282 """
283 seconds = round(float(seconds))
284 m, s = divmod(seconds, 60)
285 h, m = divmod(m, 60)
286 if h:
287 return f"{h:02d}:{m:02d}:{s:02d}"
288 return f"{m:02d}:{s:02d}"
289
290
291def to_dt(t: float | str | datetime | None, tz="UTC") -> datetime:
292 """Convert float, str, datetime to datetime."""
293 if isinstance(t, datetime):
294 return t
295 if isinstance(t, float):
296 ts = round(t)
297 ts = ts / 10**6 if ts > 10**14 else ts
298 ts = ts / 10**3 if ts > 10**11 else ts
299 return datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(tz))
300 if not isinstance(t, str):
301 return nowdt(tz)
302 t = str(t).strip()
303 if len(t) == 4: # 2026
304 return datetime.strptime(t, "%Y").astimezone(ZoneInfo(tz))
305 if len(t) == 7: # 2026-02
306 return datetime.strptime(t, "%Y-%m").astimezone(ZoneInfo(tz))
307 if len(t) == 10: # 2026-02-01
308 return datetime.strptime(t, "%Y-%m-%d").astimezone(ZoneInfo(tz))
309 if len(t) == 19: # 2026-02-01 12:46:40
310 return datetime.strptime(t, "%Y-%m-%d %H:%M:%S").astimezone(ZoneInfo(tz))
311 return nowdt(tz)
312
313
314def readable_time(seconds: str | float) -> str:
315 """Human readable time duration.
316
317 100 -> "1m40s"
318 1000 -> "16m40s"
319 10000 -> "2h46m40s"
320 100000 -> "1d3h46m40s"
321 """
322 try:
323 seconds = float(seconds)
324 except ValueError:
325 # already in reachable time
326 return str(seconds)
327 if seconds < 60:
328 return f"{seconds:.0f}s"
329 if seconds < 3600:
330 minutes, seconds = divmod(seconds, 60)
331 return f"{minutes:.0f}m{seconds:.0f}s"
332 if seconds < 86400:
333 hours, seconds = divmod(seconds, 3600)
334 minutes, seconds = divmod(seconds, 60)
335 return f"{hours:.0f}h{minutes:.0f}m{seconds:.0f}s"
336 days, seconds = divmod(seconds, 86400)
337 hours, seconds = divmod(seconds, 3600)
338 minutes, seconds = divmod(seconds, 60)
339 return f"{days:.0f}d{hours:.0f}h{minutes:.0f}m{seconds:.0f}s"
340
341
342def readable_size(num_bytes: str | float = 0, path: str | Path | None = None) -> str:
343 """Human readable file size."""
344 num_bytes = Path(path).stat().st_size if path is not None else float(num_bytes)
345 # for unit in ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]:
346 for unit in ["B", "KB"]:
347 if abs(num_bytes) < 1024:
348 return f"{num_bytes:.1f} {unit}"
349 num_bytes /= 1024
350 return f"{num_bytes:.1f} MB"
351
352
353def readable_count(num: int | str) -> str:
354 count = to_int(num)
355 if not isinstance(count, int):
356 return str(num)
357 if count >= 100000:
358 return f"{count // 10000}万"
359 if count >= 10000:
360 m, n = divmod(count, 10000)
361 return f"{m}万" if n < 1000 else f"{m}.{n // 1000}万"
362 return str(count)
363
364
365def find_url(text: str) -> str:
366 if not isinstance(text, str):
367 return ""
368 regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
369 if matched := re.findall(regex, text):
370 url = matched[0][0]
371 logger.debug(f"URL found from message text: {url}")
372 return url
373 return ""
374
375
376def https_url(url: str) -> str:
377 return "https://" + str(url).removeprefix("https://").removeprefix("http://").lstrip("/").rstrip("/")
378
379
380def bare_url(url: str) -> str:
381 return str(url).removeprefix("https://").removeprefix("http://").lstrip("/").rstrip("/")
382
383
384def ts_to_dt(ts: str | float | None) -> datetime | None:
385 if not ts:
386 return None
387
388 try: # not number
389 ts = float(ts)
390 except ValueError:
391 return None
392
393 if 0 < float(ts) < 1:
394 return None
395 try:
396 return datetime.fromtimestamp(ts, tz=UTC).astimezone(ZoneInfo(TZ))
397 except Exception as e:
398 if "out of range" in str(e):
399 return ts_to_dt(ts / 1000)
400 logger.error(e)
401 return None
402
403
404def slim_cid(cid: int | str) -> str:
405 return str(cid).strip().removeprefix("-100")
406
407
408def strings_list(value: str | None = None, *, env_key: str = "", separator: str = ",", shuffle: bool = False) -> list[str]:
409 """Get list from environment variable."""
410 if value is None:
411 value = os.getenv(env_key, "")
412 results = [s.strip() for s in value.split(separator) if s.strip()]
413 if shuffle:
414 random.shuffle(results)
415 return results
416
417
418def parse_time(timestr: str) -> dict[str, int]:
419 """Parse time string.
420
421 Support formats:
422 length= 4: yyyy
423 length= 6: yyyymm
424 length= 7: yyyy-mm
425 length= 8: yyyymmdd
426 length=10: yyyy-mm-dd
427 length=14: yyyymmddHHMMSS
428 length=15: yyyymmdd-HHMMSS
429 length=17: yyyymmdd HH:MM:SS
430 length=19: yyyy-mm-dd HH:MM:SS
431
432 Returns:
433 {"year": int, "month": int, "day": int, "hour": int, "minute": int, "second": int}
434 """
435 res = {"year": 0, "month": 0, "day": 0, "hour": 0, "minute": 0, "second": 0}
436 if not timestr:
437 return {}
438 if len(timestr) not in [4, 6, 7, 8, 10, 14, 15, 19]:
439 logger.warning(f"Invalid time format: {timestr}")
440 return res
441
442 # first 4 digits are year
443 res["year"] = int(timestr[:4])
444 if len(timestr) == 6: # yyyymm
445 res["month"] = int(timestr[4:6])
446 elif len(timestr) == 7: # yyyy-mm
447 res["month"] = int(timestr[5:7])
448 elif len(timestr) == 8: # yyyymmdd
449 res["month"] = int(timestr[4:6])
450 res["day"] = int(timestr[6:8])
451 elif len(timestr) == 10: # yyyy-mm-dd
452 res["month"] = int(timestr[5:7])
453 res["day"] = int(timestr[8:10])
454 elif len(timestr) == 14: # yyyymmddHHMMSS
455 res["month"] = int(timestr[4:6])
456 res["day"] = int(timestr[6:8])
457 res["hour"] = int(timestr[8:10])
458 res["minute"] = int(timestr[10:12])
459 res["second"] = int(timestr[12:14])
460 elif len(timestr) == 15: # yyyymmdd-HHMMSS
461 res["month"] = int(timestr[4:6])
462 res["day"] = int(timestr[6:8])
463 res["hour"] = int(timestr[9:11])
464 res["minute"] = int(timestr[11:13])
465 res["second"] = int(timestr[13:15])
466 elif len(timestr) == 17: # yyyymmdd HH:MM:SS
467 res["month"] = int(timestr[4:6])
468 res["day"] = int(timestr[6:8])
469 res["hour"] = int(timestr[9:11])
470 res["minute"] = int(timestr[12:14])
471 res["second"] = int(timestr[15:17])
472 elif len(timestr) == 19: # yyyy-mm-dd HH:MM:SS
473 res["month"] = int(timestr[5:7])
474 res["day"] = int(timestr[8:10])
475 res["hour"] = int(timestr[11:13])
476 res["minute"] = int(timestr[14:16])
477 res["second"] = int(timestr[17:19])
478 return res
479
480
481async def myself(client: Client) -> User:
482 """Get myself info."""
483 if cache.get("me"):
484 return cache.get("me")
485 try:
486 me = await client.get_me()
487 except Exception as e:
488 logger.error(e)
489 return User(id=1, is_bot=False)
490 cache.set("me", me, ttl=0)
491 return me
492
493
494async def i_am_bot(client: Client) -> bool:
495 """Check if this clinet is a bot or not."""
496 if cache.get("i_am_bot"):
497 return cache.get("i_am_bot")
498 try:
499 me = await myself(client)
500 except Exception as e:
501 logger.error(e)
502 return False
503 cache.set("i_am_bot", me.is_bot, ttl=0)
504 return me.is_bot
505
506
507def match_urls(text: str) -> list[str]:
508 """Match all urls in a text."""
509 res = re.findall(
510 r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
511 str(text),
512 )
513 return [https_url(x[0]) for x in res]
514
515
516def remove_dash(text: str) -> str:
517 if not text:
518 return ""
519 while "---" in text:
520 text = text.replace("---", "")
521 while "--" in text:
522 text = text.replace("--", "")
523 return text
524
525
526def remove_pound(text: str) -> str:
527 if not text:
528 return ""
529 while "# " in text:
530 text = text.replace("# ", " ")
531 return text
532
533
534def remove_consecutive_newlines(text: str, newline_level: int = 3) -> str:
535 if not text:
536 return ""
537 while "\n\n\n" in text:
538 text = text.replace("\n\n\n", "\n\n")
539 if newline_level == 2:
540 while "\n\n" in text:
541 text = text.replace("\n\n", "\n")
542 return text
543
544
545def is_supported_by_ytdlp(url: str) -> bool:
546 """Check if this url is supported by ytdlp."""
547 if "t.me" in url: # tg link
548 return False
549 extractors = gen_extractors()
550 extractors = [e for e in extractors if e.IE_NAME != "generic"] # filter out generic extractors
551 return any(extractor.suitable(url) for extractor in extractors)
552
553
554def guess_mime(path: str | Path) -> str:
555 path = Path(path).expanduser().resolve()
556 if not path.is_file():
557 return ""
558 with contextlib.suppress(Exception):
559 import magic # magic needs `libmagic` to be installed.
560
561 # `sudo apt-get install libmagic1` or `brew install libmagic`
562 return magic.from_file(path, mime=True)
563
564 # infer from `magic` failed
565 with contextlib.suppress(Exception):
566 return puremagic.from_file(path, mime=True)
567 return ""
568
569
570def unicode_to_ascii(text: str | float) -> str:
571 if not text:
572 return ""
573 return str(text).encode("unicode_escape").decode("ascii")
574
575
576def ascii_to_unicode(text: str) -> str:
577 if not text:
578 return ""
579 return bytes(str(text), "ascii").decode("unicode_escape")
580
581
582def save_txt(text: str, path: Path | str | None = None) -> str:
583 if path is None:
584 path = Path(DOWNLOAD_DIR) / f"{rand_string()}.txt"
585 Path(path).write_text(text)
586 return Path(path).as_posix()
587
588
589def check_data(text: str, check_keys: list[str] | None = None, check_kv: dict | None = None):
590 """Check if data contains required keys and key-value pairs.
591
592 Example data:
593 {
594 "foo": "bar",
595 "baz": {
596 "qux": "quux"
597 },
598 "lst": ["1", "2", "3"]
599 }
600
601 check_keys: ["foo", "baz.qux", "lst"]
602 check_kv: {"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]}
603 """
604 if not check_keys and not check_kv: # no need to check
605 return
606 try:
607 data = json.loads(text)
608 except json.JSONDecodeError:
609 logger.error(f"Failed to parse data as json: {text}")
610 raise
611
612 # ["foo", "baz.qux", "lst"]
613 if check_keys:
614 for key in check_keys:
615 try:
616 glom(data, key)
617 except PathAccessError as e:
618 logger.error(e)
619 raise
620
621 # {"foo": "bar", "baz.qux": "quux", "lst": ["1", "2", "3"]}
622 if check_kv:
623 for key, required_value in check_kv.items():
624 try:
625 value = glom(data, key)
626 except PathAccessError as e:
627 logger.error(e)
628 raise
629
630 if str(value) != str(required_value): # convert to str to compare
631 msg = f"{data=}, {key=}, {value=}, but required: {required_value}"
632 logger.error(msg)
633 raise ValueError
634
635
636def cleanup_old_files(root: Path | str | None = None, duration: int = CLEAN_OLD_FILES_OLDER_THAN_SECONDS) -> None:
637 """Clean up files older than duration seconds."""
638 if root is None:
639 root = DOWNLOAD_DIR
640 root = Path(root).expanduser().resolve()
641 if not root.is_dir():
642 return
643 now = datetime.now(UTC).timestamp()
644 for path in root.glob("*"):
645 if not path.is_file():
646 continue
647 if all(now - x > duration for x in [path.stat().st_atime, path.stat().st_mtime]):
648 logger.warning(f"Deleting old file: {path}")
649 path.unlink(missing_ok=True)
650
651
652def convert2md(*, html: str | None = None, path: str | Path | None = None) -> str:
653 """Convert html or local file to markdown format."""
654 md = MarkItDown()
655 if path is not None:
656 path = Path(path).expanduser().resolve()
657 if not path.is_file():
658 return ""
659 result = md.convert(path)
660 return result.text_content
661 if html is not None:
662 with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
663 f.write(html)
664 result = md.convert(f.name)
665 Path(f.name).unlink(missing_ok=True)
666 return result.text_content
667 return ""
668
669
670def convert2html(texts: str = "") -> str:
671 """Convert texts to html format."""
672 if not isinstance(texts, str) or not str(texts).strip():
673 return ""
674 texts = markdown.markdown(texts)
675 return texts.replace("\n", "<br>")
676
677
678def av2bv(aid: int | str) -> str:
679 """Bilibili AV -> BV ID converter."""
680 aid = str(aid)
681 if aid[:3].upper() == "BV1": # BV1Y4UHYyE2z
682 return aid
683 aid = int(aid[2:]) if aid[:2].lower() == "av" else int(aid)
684 return aid2bvid(aid)
685
686
687def bv2av(bvid: str | int) -> int:
688 """Bilibili BV -> AV ID converter."""
689 bvid = str(bvid)
690 if bvid[:2].lower() == "av": # av113503016851915
691 return int(bvid[2:])
692 if bvid.isdigit(): # 113503016851915
693 return int(bvid)
694 assert bvid[:3].upper() == "BV1"
695 return bvid2aid(bvid)
696
697
698def zhcn(text: str) -> str:
699 """Convert zh-tw to zh-cn."""
700 return zhconv.convert_for_mw(text, locale="zh-cn")
701
702
703if __name__ == "__main__":
704 print(digest("1"))
705 print(rand_string())
706 print(rand_number())
707 # print(cleanup_old_files())
708 print(readable_size(0))
709 print(readable_size(2000 * 1024 * 1024))
710 print(unicode_to_ascii("你好"))
711 print(unicode_to_ascii(1.1))
712 print(unicode_to_ascii("test"))
713 print(ascii_to_unicode("1.1"))
714 print(ascii_to_unicode("test"))
715 print(match_urls("http://a.com/BmT8gZ 匹配不到就删除了https://b.com/MxRdMO"))
716 print(is_supported_by_ytdlp("https://www.bilibili.com/video/BV15n61YtEmk"))
717 print(is_supported_by_ytdlp("https://t.me/c/1744444199/2475260"))
718 print(is_supported_by_ytdlp("https://test.com/"))
719 print(find_url("https://test.com/"))
720 print(find_url("test.com/"))
721
722 # assert av2bv("av113503016851915") == "BV1Y4UHYyE2z"
723 # assert av2bv("113503016851915") == "BV1Y4UHYyE2z"
724 # assert av2bv(113503016851915) == "BV1Y4UHYyE2z"
725 # assert av2bv("BV1Y4UHYyE2z") == "BV1Y4UHYyE2z"
726 # assert bv2av("BV1Y4UHYyE2z") == 113503016851915
727 # assert bv2av("113503016851915") == 113503016851915
728 # assert bv2av("av113503016851915") == 113503016851915
729 # assert bv2av(113503016851915) == 113503016851915