Commit 938f569

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-05-21 09:45:18
feat(r2): add `Cloudflare R2` publishing support
1 parent bc6ef07
src/asr/voice_recognition.py
@@ -21,7 +21,8 @@ from messages.progress import modify_progress
 from messages.sender import send2tg
 from messages.utils import blockquote, count_without_entities, equal_prefix, get_reply_to, startswith_prefix
 from multimedia import convert_to_audio, parse_media_info
-from utils import publish_telegraph, rand_string, to_int
+from publish import publish_telegraph
+from utils import rand_string, to_int
 
 # ruff: noqa: RUF001
 
src/others/download_external.py
@@ -15,7 +15,8 @@ from messages.sender import send2tg
 from messages.utils import equal_prefix, get_reply_to, startswith_prefix
 from multimedia import is_valid_video_or_audio, validate_img
 from networking import download_file
-from utils import find_url, publish_telegraph, readable_size, to_int
+from publish import publish_telegraph
+from utils import find_url, readable_size, to_int
 
 HELP = f"""
 ⏬**下载文件**
src/preview/wechat.py
@@ -16,7 +16,8 @@ from messages.progress import modify_progress
 from messages.sender import send2tg
 from messages.utils import count_without_entities, summay_media
 from networking import download_file, download_media, hx_req
-from utils import nowstr, publish_telegraph, rand_string
+from publish import publish_telegraph
+from utils import nowstr, rand_string
 
 
 async def preview_wechat(client: Client, message: Message, url: str = "", db_key: str = "", **kwargs):
src/preview/ytdlp.py
@@ -44,8 +44,9 @@ from messages.utils import blockquote, count_without_entities, get_reply_to, sma
 from multimedia import convert_to_h264, generate_cover
 from networking import hx_req
 from preview.utils import fetch_youtube_video_info, get_bilibili_comments, make_bvid_clickable
+from publish import publish_telegraph
 from subtitles.base import fetch_subtitle
-from utils import count_subtitles, nowdt, publish_telegraph, readable_size, readable_time, remove_none_values, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
+from utils import count_subtitles, nowdt, readable_size, readable_time, remove_none_values, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
 
 
 class ProxyError(Exception):
src/subtitles/subtitle.py
@@ -21,8 +21,9 @@ from messages.utils import equal_prefix
 from networking import match_social_media_link
 from preview.utils import fetch_youtube_video_info, get_bilibili_video_info
 from preview.ytdlp import preview_ytdlp
+from publish import publish_telegraph
 from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, publish_telegraph, to_int
+from utils import count_subtitles, to_int
 
 HELP = f"""📃**提取字幕**
 使用说明:
src/config.py
@@ -117,7 +117,8 @@ class TOKEN:
     CHART_IMG = os.getenv("CHART_IMG_KEY", "")
     TELEGRAPH = os.getenv("TELEGRAPH_TOKEN", "")
     NEOCITIES = os.getenv("NEOCITIES_USERPASS", "")  # in "user,pass" format
-    IV_HASH = os.getenv("INSTANTVIEW_HASH", "")
+    NEOCITIES_IV_HASH = os.getenv("NEOCITIES_INSTANTVIEW_HASH", "")
+    R2_IV_HASH = os.getenv("R2_INSTANTVIEW_HASH", "")
 
 
 class PROXY:  # format: socks5://127.0.0.1:7890
@@ -166,6 +167,7 @@ class DB:
     CF_R2_BUCKET_NAME = os.getenv("CF_R2_BUCKET_NAME", "bennybot")
     CF_R2_ACCESS_KEY_ID = os.getenv("CF_R2_ACCESS_KEY_ID", "")
     CF_R2_SECRET_ACCESS_KEY = os.getenv("CF_R2_SECRET_ACCESS_KEY", "")
+    CF_R2_PUBLIC_URL = os.getenv("CF_R2_PUBLIC_URL", "")
     ALIST_ENABLED = os.getenv("ALIST_ENABLED", "1").lower() in ["1", "y", "yes", "t", "true", "on"]
     ALIST_USERNAME = os.getenv("ALIST_USERNAME", "guest")
     ALIST_PASSWORD = os.getenv("ALIST_PASSWORD", "guest")
src/database.py
@@ -185,7 +185,15 @@ async def set_cf_kv(key: str, data: dict | list | str, ttl: int | None = None, *
     return False
 
 
-async def set_cf_r2(key: str, data: dict | list | str | None = None, metadata: dict | None = None, ttl: int | None = None, *, skip_in_memory: bool = True) -> bool:
+async def set_cf_r2(
+    key: str,
+    data: dict | list | str | None = None,
+    metadata: dict | None = None,
+    ttl: int | None = None,
+    *,
+    mime_type: str = "application/json",
+    skip_in_memory: bool = True,
+) -> bool:
     """Set to Cloudflare R2 via boto3.
 
     We do not put data to R2, just use metadata to store data.
@@ -203,10 +211,10 @@ async def set_cf_r2(key: str, data: dict | list | str | None = None, metadata: d
         "CacheControl": "no-cache",
         "Bucket": DB.CF_R2_BUCKET_NAME,
         "Key": key,
-        "ContentType": "application/json",
+        "ContentType": mime_type,
     }
     if data:
-        payload |= {"Body": json.dumps(data).encode("utf-8")}
+        payload |= {"Body": data if isinstance(data, str) else json.dumps(data).encode("utf-8")}
     if metadata:
         payload |= {"Metadata": stringfy(metadata)}
     if ttl is not None:
src/publish.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import contextlib
+import io
+import tempfile
+from pathlib import Path
+from urllib.parse import quote_plus
+
+import anyio
+import markdown
+from glom import Coalesce, glom
+from httpx import AsyncClient
+from loguru import logger
+from telegraph.aio import Telegraph
+
+from config import DB, TOKEN, TZ
+from database import set_cf_r2
+from utils import nowdt, rand_string
+
+
+async def publish_telegraph(title: str, texts: str | None = None, html: str = "", author: str | None = None, url: str | None = None) -> str:
+    """Publish to Telegraph."""
+
+    def clean_html(s: str | None) -> str:
+        # Revise Telegraph Tags
+        s = str(s).replace("<h1>", "<h3>").replace("</h1>", "</h3>")
+        return s.replace("<h2>", "<h3>").replace("</h2>", "</h3>")
+
+    if not (texts or html):
+        return ""
+    if not TOKEN.TELEGRAPH:
+        return await publish_cf_r2(title, texts=texts, html=html, author=author, url=url)
+    if texts:
+        html = markdown.markdown(texts)
+    telegraph = Telegraph(access_token=TOKEN.TELEGRAPH)
+    account_info = {}
+    if not (author and url):
+        with contextlib.suppress(Exception):
+            account_info = await telegraph.get_account_info()
+        if not author:
+            author = glom(account_info, Coalesce("result.short_name", "result.author_name"), default=None)
+        if not url:
+            url = glom(account_info, "result.author_url", default=None)
+    # sanitize
+    title = title[:256]
+    if isinstance(author, str):
+        author = author[:128]
+    if isinstance(url, str):
+        url = url[:512]
+    try:
+        page = await telegraph.create_page(title=title[:256], author_name=author, author_url=url, html_content=clean_html(html))
+        logger.info(f"⚡️Telegraph: {page['url']}")
+        return page["url"]
+    except Exception as e:
+        logger.error(f"Telegraph publish error: {e}")
+        return await publish_cf_r2(title, texts=texts, html=html, author=author, url=url)
+
+
+async def publish_cf_r2(title: str, texts: str | None = None, html: str = "", author: str | None = None, url: str | None = None) -> str:
+    """Publish to CF R2."""
+    if not (texts or html):
+        return ""
+    if texts:
+        html = markdown.markdown(texts)
+    now = nowdt(TZ)
+    today = f"{now:%Y-%m-%d}"
+    key = f"InstantView/{today}-{rand_string(8)}.html"
+    if not url:
+        url = "https://instantview.telegram.org"
+    if not author:
+        author = "BennyBot"
+
+    html = f'<h1 id="iv-title">{title}</h1><a href="{url}" id="iv-author">{author}</a>{html}'
+    html = f'<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>{title}</title><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/water.css"></head><body><article>{html}</article></body></html>'
+    if await set_cf_r2(key=key, data=html, metadata={"title": title, "author": author, "url": url}, mime_type="text/html") and DB.CF_R2_PUBLIC_URL and TOKEN.R2_IV_HASH:
+        pub_url = f"{DB.CF_R2_PUBLIC_URL.rstrip('/')}/{key}"
+        logger.info(f"⚡️CF R2: {pub_url}")
+        return f"https://t.me/iv?url={quote_plus(pub_url)}&rhash={TOKEN.R2_IV_HASH}"
+    return await publish_neocities(title, texts=texts, html=html, author=author, url=url)
+
+
+async def publish_neocities(title: str, texts: str | None = None, html: str = "", author: str | None = None, url: str | None = None) -> str:
+    """Publish to neocities.org ."""
+    if not TOKEN.NEOCITIES:
+        return ""
+    if not (texts or html):
+        return ""
+    if texts:
+        html = markdown.markdown(texts)
+    base_url = "https://neocities.org/api/upload"
+    username, password = TOKEN.NEOCITIES.split(",")
+    now = nowdt(TZ)
+    today = f"{now:%Y-%m-%d}"
+    server_file = f"{today}/{rand_string(12)}.html"
+    pub_url = f"https://{username}.neocities.org/{server_file.removesuffix('.html')}"
+    if not url:
+        url = pub_url
+    if not author:
+        author = "BennyBot"
+
+    html = f'<h1 id="iv-title">{title}</h1><a href="{url}" id="iv-author">{author}</a>{html}'
+    html = f'<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>{title}</title><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/water.css"></head><body><article>{html}</article></body></html>'
+
+    try:
+        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as tempf:
+            tempf.write(html)
+        async with await anyio.open_file(tempf.name, "rb") as f:
+            content = await f.read()
+            client = AsyncClient(http2=True, timeout=20)
+            await client.post(
+                base_url,
+                auth=(username, password),
+                files={server_file: (server_file, io.BytesIO(content), "text/html")},
+            )
+        Path(tempf.name).unlink(missing_ok=True)
+        logger.info(f"⚡️Neocities: {pub_url}")
+    except Exception as e:
+        logger.error(f"Neocities publish error: {e}")
+        return ""
+
+    return f"https://t.me/iv?url={quote_plus(pub_url)}&rhash={TOKEN.NEOCITIES_IV_HASH}" if TOKEN.NEOCITIES_IV_HASH else pub_url
src/utils.py
@@ -1,33 +1,25 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-import contextlib
-import io
 import json
 import random
 import re
 import string
-import tempfile
 from datetime import UTC, datetime
 from decimal import Decimal
 from pathlib import Path
 from typing import Any
-from urllib.parse import quote_plus
 from zoneinfo import ZoneInfo
 
-import anyio
-import markdown
 import zhconv
 from bilibili_api.utils.aid_bvid_transformer import aid2bvid, bvid2aid
 from bs4.element import PageElement
-from glom import Coalesce, PathAccessError, glom
-from httpx import AsyncClient
+from glom import PathAccessError, glom
 from loguru import logger
 from pyrogram.client import Client
 from pyrogram.types import User
-from telegraph.aio import Telegraph
 from yt_dlp.extractor import gen_extractors
 
-from config import DOWNLOAD_DIR, TOKEN, TZ, cache
+from config import DOWNLOAD_DIR, TZ, cache
 
 # ruff: noqa: RUF001
 
@@ -422,86 +414,6 @@ def cleanup_old_files(root: Path | str | None = None, duration: int = 7200) -> N
             path.unlink(missing_ok=True)
 
 
-async def publish_telegraph(title: str, texts: str | None = None, html: str = "", author: str | None = None, url: str | None = None) -> str:
-    """Publish to Telegraph."""
-
-    def clean_html(s: str | None) -> str:
-        # Revise Telegraph Tags
-        s = str(s).replace("<h1>", "<h3>").replace("</h1>", "</h3>")
-        return s.replace("<h2>", "<h3>").replace("</h2>", "</h3>")
-
-    if not TOKEN.TELEGRAPH:
-        return ""
-    if not (texts or html):
-        return ""
-    if texts:
-        html = markdown.markdown(texts)
-    telegraph = Telegraph(access_token=TOKEN.TELEGRAPH)
-    account_info = {}
-    if not (author and url):
-        with contextlib.suppress(Exception):
-            account_info = await telegraph.get_account_info()
-        if not author:
-            author = glom(account_info, Coalesce("result.short_name", "result.author_name"), default=None)
-        if not url:
-            url = glom(account_info, "result.author_url", default=None)
-    # sanitize
-    title = title[:256]
-    if isinstance(author, str):
-        author = author[:128]
-    if isinstance(url, str):
-        url = url[:512]
-    try:
-        page = await telegraph.create_page(title=title[:256], author_name=author, author_url=url, html_content=clean_html(html))
-        logger.info(f"⚡️即时预览: {page['url']}")
-        return page["url"]
-    except Exception as e:
-        logger.error(f"Telegraph publish error: {e}")
-        return await publish_neocities(title, texts=texts, html=html, author=author, url=url)
-
-
-async def publish_neocities(title: str, texts: str | None = None, html: str = "", author: str | None = None, url: str | None = None) -> str:
-    """Publish to neocities.org ."""
-    if not TOKEN.NEOCITIES:
-        return ""
-    if not (texts or html):
-        return ""
-    if texts:
-        html = markdown.markdown(texts)
-    base_url = "https://neocities.org/api/upload"
-    username, password = TOKEN.NEOCITIES.split(",")
-    now = nowdt(TZ)
-    today = f"{now:%Y-%m-%d}"
-    server_file = f"{today}/{rand_string(12)}.html"
-    pub_url = f"https://{username}.neocities.org/{server_file.removesuffix('.html')}"
-    if not url:
-        url = pub_url
-    if not author:
-        author = "BennyBot"
-
-    html = f'<h1>{title}</h1><div class="author-name"><a href="{url}" id="author-url">{author}</a>{html}'
-    html = f'<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>{title}</title><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/water.css"></head><body><article>{html}</article></body></html>'
-
-    try:
-        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as tempf:
-            tempf.write(html)
-        async with await anyio.open_file(tempf.name, "rb") as f:
-            content = await f.read()
-            client = AsyncClient(http2=True, timeout=20)
-            await client.post(
-                base_url,
-                auth=(username, password),
-                files={server_file: (server_file, io.BytesIO(content), "text/html")},
-            )
-        Path(tempf.name).unlink(missing_ok=True)
-        logger.info(f"⚡️Neocities: {pub_url}")
-    except Exception as e:
-        logger.error(f"Neocities publish error: {e}")
-        return ""
-
-    return f"https://t.me/iv?url={quote_plus(pub_url)}&rhash={TOKEN.IV_HASH}" if TOKEN.IV_HASH else pub_url
-
-
 def av2bv(aid: int | str) -> str:
     """Bilibili AV -> BV ID converter."""
     aid = str(aid)