Commit 3715bb8

benny-dou <60535774+benny-dou@users.noreply.github.com>
2026-05-27 03:15:05
chore(arxiv): send PDF first
1 parent 7d7af9f
Changed files (1)
src
preview
src/preview/arxiv.py
@@ -1,14 +1,17 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import contextlib
+from pathlib import Path
 
 import feedparser
 from glom import Coalesce, glom
 from pyrogram.client import Client
-from pyrogram.types import Message
+from pyrogram.types import InputMediaDocument, Message
 
-from config import PROXY
+from config import CAPTION_LENGTH, PROXY, TEXT_LENGTH
 from messages.progress import modify_progress
 from messages.sender import send2tg
+from messages.utils import blockquote, smart_split
 from networking import download_file, hx_req
 
 HEADERS = {
@@ -19,23 +22,31 @@ HEADERS = {
 
 async def preview_arxiv(client: Client, message: Message, url: str, arxiv_id: str, **kwargs):
     """Preview arxiv in the message."""
+    status_msg = None
     if kwargs.get("show_progress") and "progress" not in kwargs:
         res = await send2tg(client, message, texts=f"🔗正在解析arXiv链接\n{url}", **kwargs)
         kwargs["progress"] = res[0]
+        status_msg = res[0]
     kwargs["send_from_user"] = ""  # disable @send_user
+    # First, get the PDF and send it.
+    pdf = await download_file(f"https://arxiv.org/pdf/{arxiv_id}", suffix=".pdf", proxy=PROXY.ARXIV, stream=True)
+    if not pdf:
+        await modify_progress(status_msg, text="❌下载PDF失败", force_update=True)
+        return
+    file_id = pdf
+    if isinstance(status_msg, Message):
+        status_msg = await status_msg.edit_media(file_name=f"{arxiv_id}.pdf", media=InputMediaDocument(file_id, caption=f"arXiv: [{arxiv_id}]({url})"))
+        file_id = glom(status_msg, "document.file_id", default=pdf)
 
     api = f"https://export.arxiv.org/api/query?id_list={arxiv_id}"
     resp = await hx_req(api, headers=HEADERS, proxy=PROXY.ARXIV, rformat="text")
     if "hx_error" in resp:
         return
-    if not resp.get("text"):
-        await modify_progress(text=f"❌arXiv解析失败: {resp}", force_update=True, **kwargs)
     arxiv = feedparser.parse(resp["text"])
-
     entry = glom(arxiv, "entries.0", default={})
-
     title = glom(entry, "title", default="")
     updated = glom(entry, Coalesce("updated", "published"), default="")
+    updated = updated.replace("T", " ").rstrip("Z")
     abstract = glom(entry, "summary", default="")
     comment = glom(entry, "arxiv_comment", default="")
     authors = ""
@@ -43,11 +54,19 @@ async def preview_arxiv(client: Client, message: Message, url: str, arxiv_id: st
         if name := author.get("name"):
             authors += f"{name}, "
     authors = authors.rstrip(", ")
-    await modify_progress(text="⏬正在下载PDF", force_update=True, **kwargs)
-    pdf = await download_file(f"https://arxiv.org/pdf/{arxiv_id}", suffix=".pdf", proxy=PROXY.ARXIV, stream=True)
     texts = f"📄**[{title}]({url})**\n👥{authors}\n🕒{updated}\n"
     if comment:
-        texts += f"📝{comment}\n"
-    texts += f"\n**Abstract**\n{abstract}"
-    await send2tg(client, message, texts=texts, media=[{"document": pdf}], **kwargs)
-    await modify_progress(del_status=True, **kwargs)
+        texts += f"📝{comment}"
+
+    caption = (await smart_split(texts, CAPTION_LENGTH))[0]
+    if isinstance(status_msg, Message):
+        status_msg = await status_msg.edit_media(file_name=f"{arxiv_id}.pdf", media=InputMediaDocument(file_id, caption=caption))
+        Path(pdf).unlink(missing_ok=True)
+    # await modify_progress(status, del_status=True)
+    if not isinstance(status_msg, Message):
+        return
+
+    abstract = f"**Abstract**\n{abstract}"
+    with contextlib.suppress(Exception):
+        for txt in await smart_split(abstract, TEXT_LENGTH):
+            status_msg = await status_msg.reply_text(blockquote(txt), quote=True)