Commit ac22419

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-28 11:52:09
feat(database): add brotli compression support for R2 uploads
1 parent c52a618
src/database.py
@@ -17,6 +17,7 @@ from pathlib import Path
 from urllib.parse import quote_plus, unquote_plus
 
 import anyio
+import brotli
 import puremagic
 from aioboto3 import Session
 from botocore.exceptions import ClientError
@@ -81,11 +82,11 @@ async def get_cf_kv(key: str, *, log_success: bool = True) -> dict:
     return {}
 
 
-async def list_cf_r2(prefix: str = "", continuation_token: str | None = None) -> tuple[bool, dict]:
+async def list_cf_r2(prefix: str = "", continuation_token: str | None = None) -> dict:
     """Get from Cloudflare R2."""
     if not DB.CF_R2_ENABLED:
         logger.warning("SKIP LIST CF-R2: Cloudflare R2 disabled")
-        return False, {}
+        return {}
     async with Session().client(
         service_name="s3",
         endpoint_url=f"https://{DB.CF_ACCOUNT_ID}.r2.cloudflarestorage.com",
@@ -98,11 +99,21 @@ async def list_cf_r2(prefix: str = "", continuation_token: str | None = None) ->
             payload["ContinuationToken"] = continuation_token
         if prefix:
             payload["Prefix"] = prefix
+        contents = []
         try:
-            return True, await s3.list_objects_v2(**payload)
+            results = await s3.list_objects_v2(**payload)
+            if not results.get("IsTruncated"):
+                return results
+            contents.extend(results.get("Contents", []))
+            while results.get("NextContinuationToken"):
+                payload["ContinuationToken"] = results["NextContinuationToken"]
+                results = await s3.list_objects_v2(**payload)
+                contents.extend(results.get("Contents", []))
+            results["Contents"] = contents
         except Exception as e:
             logger.warning(f"List CF-R2 failed for {prefix=}: {e}")
-    return False, {}
+            return {}
+        return results
 
 
 async def get_cf_r2(key: str) -> dict:
@@ -191,6 +202,7 @@ async def set_cf_r2(
     metadata: dict | None = None,
     ttl: int | None = None,
     *,
+    compress: bool = False,
     mime_type: str = "application/json",
     skip_in_memory: bool = True,
 ) -> bool:
@@ -214,7 +226,18 @@ async def set_cf_r2(
         "ContentType": mime_type,
     }
     if data:
-        payload |= {"Body": data if isinstance(data, str) else json.dumps(data).encode("utf-8")}
+        if isinstance(data, (dict, list)):
+            upload = json.dumps(data, ensure_ascii=False).encode("utf-8")
+        elif isinstance(data, str):
+            upload = data.encode("utf-8")
+            payload["ContentType"] = "text/plain"
+        payload |= {"Body": upload}
+
+        if compress:
+            payload["Body"] = brotli.compress(upload, quality=11)
+            payload["ContentEncoding"] = "br"
+
+    metadata = metadata or {}
     if metadata:
         payload |= {"Metadata": stringfy(metadata)}
     if ttl is not None:
@@ -503,11 +526,12 @@ if __name__ == "__main__":
     # asyncio.run(download_alist("test.py"))
     # asyncio.run(set_cf_r2("test2", metadata={"finished": "1"}))
     # asyncio.run(set_cf_r2("test2", data={"finished": "1"}, ttl=60))
-    # asyncio.run(get_cf_r2("test2"))
-    columns = "id INTEGER PRIMARY KEY, time TEXT, uid INTEGER, user TEXT, text TEXT, sc_amt REAL NULL, sc_ccy TEXT NULL"
-    asyncio.run(create_cf_d1_table("2025", columns))
-
-    sql = 'INSERT INTO "2025" (id, time, uid, user, text, sc_amt, sc_ccy) VALUES (?, ?, ?, ?, ?, ?, ?);'
-    params = [123, "2025-01-01", 456, "username", "hello", 15.5, "USD"]
-    resp = asyncio.run(query_cf_d1(sql, params=params))
-    print(resp)
+    asyncio.run(list_cf_r2("RSS/"))
+    # asyncio.run(get_cf_r2("Danmu/2025-05-26"))
+    # columns = "id INTEGER PRIMARY KEY, time TEXT, uid INTEGER, user TEXT, text TEXT, sc_amt REAL NULL, sc_ccy TEXT NULL"
+    # asyncio.run(create_cf_d1_table("2025", columns))
+
+    # sql = 'INSERT INTO "2025" (id, time, uid, user, text, sc_amt, sc_ccy) VALUES (?, ?, ?, ?, ?, ?, ?);'
+    # params = [123, "2025-01-01", 456, "username", "hello", 15.5, "USD"]
+    # resp = asyncio.run(query_cf_d1(sql, params=params))
+    # print(resp)
pyproject.toml
@@ -4,6 +4,7 @@ dependencies = [
   "apscheduler>=3.11.0,<4.0.0",
   "beautifulsoup4>=4.12.3",
   "bilibili-api-python==17.1.4",
+  "brotli>=1.1.0",
   "cacheout>=0.16.0",
   "dashscope>=1.23.2",
   "feedparser>=6.0.11",
uv.lock
@@ -254,6 +254,7 @@ dependencies = [
     { name = "apscheduler" },
     { name = "beautifulsoup4" },
     { name = "bilibili-api-python" },
+    { name = "brotli" },
     { name = "cacheout" },
     { name = "dashscope" },
     { name = "feedparser" },
@@ -295,6 +296,7 @@ requires-dist = [
     { name = "apscheduler", specifier = ">=3.11.0,<4.0.0" },
     { name = "beautifulsoup4", specifier = ">=4.12.3" },
     { name = "bilibili-api-python", specifier = "==17.1.4" },
+    { name = "brotli", specifier = ">=1.1.0" },
     { name = "cacheout", specifier = ">=0.16.0" },
     { name = "dashscope", specifier = ">=1.23.2" },
     { name = "feedparser", specifier = ">=6.0.11" },