main
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3import io
4import re
5import tempfile
6from datetime import UTC
7from pathlib import Path
8from typing import Literal
9from urllib.parse import quote_plus
10from zoneinfo import ZoneInfo
11
12import anyio
13from glom import glom
14from httpx import AsyncClient
15from loguru import logger
16from telegraph.aio import Telegraph
17from telegraph.utils import html_to_nodes
18
19from asr.utils import split_transcripts
20from config import DB, DOWNLOAD_DIR, TOKEN, TZ
21from database.r2 import set_cf_r2
22from networking import download_file
23from schema import AIPage, Section
24from utils import convert2html, convert2md, digest, nowdt, rand_string, remove_consecutive_newlines
25
26
27def adjust_tags(s: str | None) -> str:
28 # Revise Telegraph Tags
29 s = str(s).replace("<h1>", "<h3>").replace("</h1>", "</h3>")
30 return s.replace("<h2>", "<h3>").replace("</h2>", "</h3>")
31
32
33async def publish_telegraph(
34 title: str | None = None,
35 html: str | None = None,
36 nodes: list[dict] | None = None,
37 author: str | None = None,
38 url: str | None = None,
39 aipage: AIPage | None = None,
40 ttl: str | None = None, # 12h, 7d, 1M, ...
41 *,
42 fallback_r2: bool = True,
43) -> str:
44 """Publish to Telegraph.
45
46 Available tags:
47 a, aside, b, blockquote, br, code, em, figcaption, figure, h3, h4, hr, i, iframe, img, li, ol, p, pre, s, strong, u, ul, video.
48
49 """
50 ttl = ttl or "365d"
51 title = title or "Telegraph"
52 if aipage is not None:
53 return await telegraph_aipage(aipage, ttl=ttl)
54
55 # limit title, author, url length
56 title = title[:256]
57 if author:
58 author = author[:128]
59 if url:
60 url = url[:512]
61 try:
62 telegraph = Telegraph(access_token=TOKEN.TELEGRAPH)
63 page = await telegraph.create_page(title=title, author_name=author, author_url=url, content=nodes, html_content=adjust_tags(html))
64 logger.info(f"⚡️Telegraph: {page['url']}")
65 return page["url"]
66 except Exception as e:
67 logger.error(f"Telegraph publish error: {e}")
68 if fallback_r2:
69 return await publish_cf_r2(title, html=html, author=author, url=url, ttl=ttl)
70 return ""
71
72
73async def publish_cf_r2(
74 title: str,
75 html: str | None = None,
76 author: str | None = None,
77 url: str | None = None,
78 aipage: AIPage | None = None,
79 ttl: str = "365d",
80) -> str:
81 """Publish to CF R2."""
82 if html is None and aipage is None:
83 logger.error("`html` or `aipage` parameter is required")
84 return ""
85 if html is not None and aipage is not None:
86 logger.error("`html` and `aipage` parameter cannot be both provided")
87 return ""
88 if not html:
89 return ""
90 now = nowdt(TZ)
91 today = f"{now:%Y-%m-%d}"
92 key = f"InstantView/{today}-{rand_string(8)}.html" if ttl == "forever" else f"TTL/{ttl}/{today}-{rand_string(8)}.html"
93 if not url:
94 url = "https://instantview.telegram.org"
95 if not author:
96 author = "BennyBot"
97
98 html = f'<h1 id="iv-title">{title}</h1><a href="{url}" id="iv-author">{author}</a>{html}'
99 html = f'<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>{title}</title><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/water.css"></head><body><article>{html}</article></body></html>'
100 if await set_cf_r2(key=key, data=html, metadata={"title": title, "author": author, "url": url}, mime_type="text/html") and DB.CF_R2_PUBLIC_URL and TOKEN.R2_IV_HASH:
101 pub_url = f"{DB.CF_R2_PUBLIC_URL.rstrip('/')}/{key}"
102 logger.info(f"⚡️CF R2: {pub_url}")
103 return f"https://t.me/iv?url={quote_plus(pub_url)}&rhash={TOKEN.R2_IV_HASH}"
104 return await publish_neocities(title, html=html, author=author, url=url)
105
106
107async def publish_neocities(title: str, html: str | None = None, author: str | None = None, url: str | None = None) -> str:
108 """Publish to neocities.org ."""
109 if not TOKEN.NEOCITIES:
110 return ""
111 if not html:
112 return ""
113 base_url = "https://neocities.org/api/upload"
114 username, password = TOKEN.NEOCITIES.split(",")
115 now = nowdt(TZ)
116 today = f"{now:%Y-%m-%d}"
117 server_file = f"{today}/{rand_string(12)}.html"
118 pub_url = f"https://{username}.neocities.org/{server_file.removesuffix('.html')}"
119 if not url:
120 url = pub_url
121 if not author:
122 author = "BennyBot"
123
124 html = f'<h1 id="iv-title">{title}</h1><a href="{url}" id="iv-author">{author}</a>{html}'
125 html = f'<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>{title}</title><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/water.css@2/out/water.css"></head><body><article>{html}</article></body></html>'
126
127 try:
128 with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as tempf:
129 tempf.write(html)
130 async with await anyio.open_file(tempf.name, "rb") as f:
131 content = await f.read()
132 client = AsyncClient(http2=True, timeout=20)
133 await client.post(
134 base_url,
135 auth=(username, password),
136 files={server_file: (server_file, io.BytesIO(content), "text/html")},
137 )
138 Path(tempf.name).unlink(missing_ok=True)
139 logger.info(f"⚡️Neocities: {pub_url}")
140 except Exception as e:
141 logger.error(f"Neocities publish error: {e}")
142 return ""
143
144 return f"https://t.me/iv?url={quote_plus(pub_url)}&rhash={TOKEN.NEOCITIES_IV_HASH}" if TOKEN.NEOCITIES_IV_HASH else pub_url
145
146
147async def telegraph_aipage(page: AIPage, ttl: str | None = None, *, force_r2: bool = False) -> str:
148 """Publish AI Page to Telegraph."""
149 if force_r2:
150 return await r2_aipage(page, ttl=ttl, rformat="url")
151 anchor = lambda s: s.replace(" ", "-")
152
153 nodes = []
154 overview = glom(page, "summary.overview", default="")
155 transcripts = page.transcripts or []
156 if isinstance(transcripts, str):
157 transcripts = split_transcripts(transcripts)
158 sections: list[Section] = glom(page, "summary.sections", default=[])
159
160 # TOC
161 ul = [{"tag": "li", "children": [{"attrs": {"href": f"#{anchor(section.emoji + section.title)}"}, "children": [section.emoji + section.title], "tag": "a"}]} for section in sections]
162 if transcripts:
163 ul.append({"tag": "li", "children": [{"attrs": {"href": "#🔤完整字幕"}, "children": ["🔤完整字幕"], "tag": "a"}]})
164 if page.mermaid_url:
165 ul.append({"tag": "li", "children": [{"attrs": {"href": page.mermaid_url}, "children": ["🧠思维导图"], "tag": "a"}]})
166 if ul:
167 toc = {"tag": "ul", "children": ul}
168 nodes.append(toc)
169
170 # Overview
171 if overview:
172 nodes.append({"tag": "h3", "children": ["🤖AI导读"]})
173 nodes.append({"tag": "p", "children": [overview]})
174
175 # Description
176 description = page.description
177 if description and isinstance(description, str):
178 desc = convert2md(html=description)
179 desc_html = convert2html(remove_consecutive_newlines(desc, newline_level=2))
180 desc_nodes = html_to_nodes(adjust_tags(desc_html))
181 nodes.append({"tag": "h4", "children": ["📖原始简介"]})
182 nodes.extend(desc_nodes)
183 elif description and isinstance(description, dict) and description.get("html"):
184 desc_emoji = description.get("emoji", "📖")
185 desc_title = description.get("name", "原始简介")
186 desc_md = convert2md(html=description["html"]) if description["html"].startswith("<") else description["html"]
187 desc_html = convert2html(remove_consecutive_newlines(desc_md, newline_level=2))
188 desc_nodes = html_to_nodes(adjust_tags(desc_html))
189 nodes.append({"tag": "h4", "children": [desc_emoji + desc_title]})
190 nodes.extend(desc_nodes)
191 # Sections
192 for section in sections:
193 nodes.append({"tag": "h4", "children": [section.emoji + section.title]})
194 nodes.append({"tag": "p", "children": [section.content]})
195
196 # Transcript
197 if transcripts:
198 nodes.append({"tag": "h4", "children": ["🔤完整字幕"]})
199 nodes.extend([{"children": [f"[{t.start}] {t.content}"], "tag": "p"} for t in transcripts])
200 if not nodes:
201 logger.warning("No Telegraph nodes to publish")
202 return ""
203 telegraph_url = await publish_telegraph(title=page.title, nodes=nodes, author=page.author, url=page.url, ttl=ttl, fallback_r2=False)
204 if not telegraph_url:
205 return await r2_aipage(page, ttl=ttl, rformat="url")
206 return telegraph_url
207
208
209async def r2_aipage(page: AIPage, ttl: str | None = None, *, expand_transcript: bool = False, rformat: Literal["url", "html"] = "url") -> str:
210 """Publish AI Page to R2."""
211 ttl = ttl or "365d"
212 today = f"{nowdt(TZ):%Y-%m-%d}"
213 r2_prefix = f"InstantView/{today}" if ttl == "forever" else f"TTL/{ttl}/{today}"
214 r2_key = f"{r2_prefix}-{digest(page)}.html" # noqa: S324
215 r2_url = f"{DB.CF_R2_PUBLIC_URL}/{r2_key}"
216 if TOKEN.R2_IV_HASH:
217 r2_url = f"https://t.me/iv?url={quote_plus(r2_url)}&rhash={TOKEN.R2_IV_HASH}"
218 date = page.date or nowdt("UTC")
219 utc_date = date.astimezone(UTC)
220 tz_date = utc_date.astimezone(ZoneInfo(TZ))
221 url = page.url or "https://instantview.telegram.org"
222 author_tag = f'<div class="header-author"><span class="header-author">{page.author}</span><span class="header-date"> | {tz_date:%Y-%m-%d %H:%M:%S}</span></div>' if page.author else ""
223 overview = glom(page, "summary.overview", default="")
224 if overview:
225 overview = f'<div class="card summary"><div class="card-label" id="summary" >🤖AI导读</div>{convert2html(overview)}</div>'
226
227 sections: list[Section] = glom(page, "summary.sections", default=[])
228
229 sidebars = '<nav class="sidebar" id="sidebar"><ul>'
230 if overview:
231 sidebars += '<li><a href="#summary" onclick="navClick(event)"><span class="sidebar-icon">🤖</span><span class="sidebar-label">AI导读</span></a></li>'
232
233 desc_tag = ""
234 desc_head = ""
235 description = page.description
236 if description and isinstance(description, str):
237 sidebars += '<li><a href="#description" onclick="navClick(event)"><span class="sidebar-icon">📖</span><span class="sidebar-label">原始简介</span></a></li>'
238 desc_html = description if description.startswith("<") else convert2html(description)
239 desc_tag = f'<div class="card description"><div class="card-label" id="description">📖原始简介</div>{desc_html}</div>'
240 elif description and isinstance(description, dict) and description.get("html"):
241 desc_emoji = description.get("emoji", "📖")
242 desc_title = description.get("name", "原始简介")
243 desc_html = description["html"] if description["html"].startswith("<") else convert2html(description["html"])
244 sidebars += f'<li><a href="#description" onclick="navClick(event)"><span class="sidebar-icon">{desc_emoji}</span><span class="sidebar-label">{desc_title}</span></a></li>'
245 desc_tag = f'<div class="card description"><div class="card-label" id="description">{desc_emoji}{desc_title}</div>{desc_html}</div>'
246 if description and overview:
247 desc_head = f"""<meta property="og:description" content="{glom(page, "summary.overview", default="")}">"""
248
249 sections_tag = ""
250
251 for idx, section in enumerate(sections):
252 sidebars += f'<li><a href="#s{idx + 1}" onclick="navClick(event)"><span class="sidebar-icon">{section.emoji}</span><span class="sidebar-label">{section.title}</span>'
253 sections_tag += f'<section class="section" id="s{idx + 1}"><div class="section-header"><span class="section-icon">{section.emoji}</span><h2 class="section-title">{section.title}</h2>'
254 if section.start:
255 start = section.start.removeprefix("00:") if len(section.start) > 5 else section.start
256 sidebars += f'<span class="sidebar-time">{start}</span>'
257 sections_tag += f'<span class="section-time">{start}</span>'
258 sidebars += "</a></li>"
259 sections_tag += f"</div>{convert2html(section.content)}</section>"
260
261 transcripts = page.transcripts or []
262 if isinstance(transcripts, str):
263 transcripts = split_transcripts(transcripts)
264
265 transcriptions = ""
266 if transcripts:
267 sidebars += """<li class="nav-transcript"><a href="#transcript" onclick="navClick(event)"><span class="sidebar-icon">🔤</span><span class="sidebar-label">完整字幕</span></a></li>"""
268 transcriptions += '<div class="card transcript-card" id="transcript"><button class="transcript-toggle" aria-expanded="false" onclick="toggleTranscript(this)">展开字幕 <span class="arrow">▾</span></button><div class="transcript-content" id="transcriptions">'
269 for sentence in transcripts:
270 transcriptions += f'<p><span class="ts">{sentence.start}</span>{sentence.content}</p>'
271 transcriptions += "</div></div>"
272
273 sidebars += "</ul></nav>"
274 sidebar_icon = '<button class="icon-toc" id="icon-toc" onclick="toggleSidebar()" aria-label="菜单"><svg class="icon-open" viewBox="0 0 24 24"><line x1="3" y1="6" x2="21" y2="6" /><line x1="3" y1="12" x2="21" y2="12" /><line x1="3" y1="18" x2="21" y2="18" /></svg><svg class="icon-close" viewBox="0 0 24 24" style="display:none"><polyline points="13,4 5,12 13,20" /><polyline points="20,4 12,12 20,20" /></svg></button>'
275 if sidebars == '<nav class="sidebar" id="sidebar"><ul></ul></nav>': # empty sidebar
276 sidebars = ""
277 sidebar_icon = ""
278
279 if not any([overview, page.mermaid_img, sections_tag, desc_tag, transcriptions]):
280 logger.warning("No AIPage contents to publish")
281 return ""
282
283 theme_icon = '<button class="icon-theme" id="icon-theme" onclick="toggleTheme()" aria-label="切换主题"><svg class="icon-sun" viewBox="0 0 24 24"><circle cx="12" cy="12" r="5" /><path d="M12 1v2M12 21v2M4.22 4.22l1.42 1.42M18.36 18.36l1.42 1.42M1 12h2M21 12h2M4.22 19.78l1.42-1.42M18.36 5.64l1.42-1.42" /></svg><svg class="icon-moon" viewBox="0 0 24 24" style="display:none"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z" /></svg></button>'
284 mermaid_icon = '<button class="icon-mindmap" id="icon-mindmap" onclick="toggleMindmapPanel()" aria-label="思维导图"><svg viewBox="0 0 24 24"><circle cx="4" cy="12" r="2" /><path d="M6 12h6M12 12l8-8M12 12h8M12 12l8 8" /></svg></button>'if page.mermaid_img else "" # fmt: skip
285
286 mermaid_desktop = f'<div class="mindmap-panel" id="mindmap-panel"><div class="mindmap-panel-content"><img data-src="{page.mermaid_img}" alt="思维导图"><a href="{page.mermaid_url}" target="_blank" class="mindmap-link">查看完整思维导图</a></div></div>' if page.mermaid_img else "" # fmt: skip
287
288 mermaid_mobile = f'<div class="card mindmap-card mindmap-mobile" id="mindmap-mobile"><button class="transcript-toggle" aria-expanded="false" onclick="toggleMindmap(this)">展开思维导图 <span class="arrow">▾</span></button><div class="mindmap-body" id="mindmap-body"><img data-src="{page.mermaid_img}" alt="思维导图"><a href="{page.mermaid_url}" target="_blank" class="mindmap-link">查看完整思维导图</a></div></div>' if page.mermaid_img else "" # fmt: skip
289
290 html_str = f"""<!DOCTYPE html>
291<html lang="zh-CN">
292<head>
293 <meta charset="UTF-8">
294 <meta name="viewport" content="width=device-width, initial-scale=1.0">
295 <meta property="article:published_time" content="{utc_date:%Y-%m-%dT%H:%M:%SZ}">
296 <meta property="og:title" content="{page.title}">{desc_head}
297 <meta property="og:site_name" content="🤖AI导读" />
298 <link rel="icon" type="image/png" href="{DB.CF_R2_PUBLIC_URL}/favicon/favicon-96x96.png" sizes="96x96" />
299 <link rel="icon" type="image/svg+xml" href="{DB.CF_R2_PUBLIC_URL}/favicon/favicon.svg" />
300 <link rel="shortcut icon" href="{DB.CF_R2_PUBLIC_URL}/favicon/favicon.ico" />
301 <link rel="apple-touch-icon" sizes="180x180" href="{DB.CF_R2_PUBLIC_URL}/favicon/apple-touch-icon.png" />
302 <meta name="apple-mobile-web-app-title" content="R2" />
303 <link rel="manifest" href="{DB.CF_R2_PUBLIC_URL}/favicon/site.webmanifest" />
304 <title>{page.title}</title>
305 <link rel="stylesheet" href="{DB.CF_R2_PUBLIC_URL}/telegraph.css">
306 <script src="{DB.CF_R2_PUBLIC_URL}/telegraph.js" defer></script>
307</head>
308<body>
309
310 <!-- Icon -->
311 {sidebar_icon}
312
313 {mermaid_icon}
314
315 {theme_icon}
316
317 {sidebars}
318 <div class="resize-handle resize-handle-left" id="resize-handle-left"></div>
319 <div class="resize-handle resize-handle-right" id="resize-handle-right"></div>
320 <div class="container">
321 <header class="header"><h1 class="header-title"><a href="{url}" target="_blank">{page.title}</a></h1>{author_tag}</header>
322
323 <!-- AI Summary -->
324 {overview}
325
326 {mermaid_mobile}
327
328 <!-- Description -->
329 {desc_tag}
330
331 <!-- Sections -->
332 {sections_tag}
333
334
335 <!-- Transcript -->
336 {transcriptions}
337
338 </div>
339 {'<div class="transcript-panel" id="transcript-panel"></div>' if transcriptions else ""}
340 {mermaid_desktop}
341
342</body>
343
344</html>"""
345 # simplify html
346 html_str = html_str.replace(f'<a href="" target="_blank">{page.title}</a>', page.title)
347 html_str = html_str.replace('<footer class="footer header-title"><a href="" target="_blank" ></a></footer>', "")
348 html_str = re.sub(r"<!--[\s\S]*?-->", "", html_str)
349 html_str = re.sub(r"\s+", " ", html_str).strip()
350 if expand_transcript:
351 html_str = html_str.replace('class="transcript-content"', 'class="transcript-content open"')
352 html_str = html_str.replace('aria-expanded="false"', 'aria-expanded="true"')
353 html_str = html_str.replace(">展开字幕 <", ">收起字幕 <")
354 if rformat == "url":
355 return r2_url if await set_cf_r2(key=r2_key, data=html_str, metadata={"title": page.title, "author": page.author or "BennyBot", "url": page.url}, mime_type="text/html") else ""
356
357 # return html, embed css and js in head
358 css_path = Path(DOWNLOAD_DIR) / "HTML/telegraph.css"
359 js_path = Path(DOWNLOAD_DIR) / "HTML/telegraph.js"
360 await download_file(DB.CF_R2_PUBLIC_URL + "/telegraph.css", css_path, skip_exist=True)
361 await download_file(DB.CF_R2_PUBLIC_URL + "/telegraph.js", js_path, skip_exist=True)
362 if css_path.exists():
363 css = css_path.read_text()
364 html_str = html_str.replace(f'<link rel="stylesheet" href="{DB.CF_R2_PUBLIC_URL}/telegraph.css">', f"<style>\n{css}\n</style>")
365 if js_path.exists():
366 js = js_path.read_text()
367 html_str = html_str.replace(f'<script src="{DB.CF_R2_PUBLIC_URL}/telegraph.js" defer></script>', f"<script defer>\n{js}\n</script>")
368 return html_str