Commit f2a7b48

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-09-01 17:38:53
perf(danmu): improve r2 query performance by caching to local disk
1 parent 4c7352b
Changed files (1)
src
danmu
src/danmu/r2.py
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import contextlib
+import json
 from collections import defaultdict
 from datetime import UTC, datetime
 from decimal import Decimal
+from pathlib import Path
 from zoneinfo import ZoneInfo
 
 import anyio
@@ -13,7 +16,7 @@ from danmu.utils import live_date
 from database.r2 import get_cf_r2
 from messages.progress import modify_progress
 from others.emoji import CURRENCY
-from utils import number
+from utils import nowdt, number
 
 
 async def query_r2(dates: list[str], user: str, keyword: str, caption: str, super_chats: defaultdict, qtype: str, **kwargs) -> dict:
@@ -31,7 +34,8 @@ async def query_r2(dates: list[str], user: str, keyword: str, caption: str, supe
     paths = []
     for date in sorted(dates, reverse=True):  # 日期从新到旧
         logger.debug(f"Get {qtype} from R2: {date}")
-        data: dict[str, list[dict]] = await get_cf_r2(prefix + date, silent=True)
+        r2_key = prefix + date
+        data = await query_r2_for_date(date, r2_key, qtype)
         queried_dates.append(date)
         if not data:
             continue
@@ -90,3 +94,20 @@ async def parse_from_r2(data: dict[str, list[dict]], user: str, keyword: str, su
             count += 1
 
     return {"texts": texts.rstrip(), "count": count}
+
+
+async def query_r2_for_date(date: str, r2_key: str, qtype: str) -> dict[str, list[dict]]:
+    """首先尝试从本地磁盘获取, 如果不存在, 则从R2获取."""
+    this_year = nowdt(TZ).strftime("%Y")
+    this_month = nowdt(TZ).strftime("%Y-%m")
+    # skip for specific day or this year or this month
+    if len(date) == 10 or date in [this_year, this_month]:
+        return await get_cf_r2(r2_key, silent=True)
+    save_path = f"{DOWNLOAD_DIR}/{qtype}/{date}.json"
+    with contextlib.suppress(Exception):
+        return json.loads(Path(save_path).read_text())
+    data = await get_cf_r2(r2_key, silent=True)
+    logger.debug(f"Save {qtype} {date} to {save_path}")
+    Path(save_path).parent.mkdir(parents=True, exist_ok=True)
+    Path(save_path).write_text(json.dumps(data))
+    return data