Commit 3715bb8
Changed files (1)
src
preview
src/preview/arxiv.py
@@ -1,14 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import contextlib
+from pathlib import Path
import feedparser
from glom import Coalesce, glom
from pyrogram.client import Client
-from pyrogram.types import Message
+from pyrogram.types import InputMediaDocument, Message
-from config import PROXY
+from config import CAPTION_LENGTH, PROXY, TEXT_LENGTH
from messages.progress import modify_progress
from messages.sender import send2tg
+from messages.utils import blockquote, smart_split
from networking import download_file, hx_req
HEADERS = {
@@ -19,23 +22,31 @@ HEADERS = {
async def preview_arxiv(client: Client, message: Message, url: str, arxiv_id: str, **kwargs):
"""Preview arxiv in the message."""
+ status_msg = None
if kwargs.get("show_progress") and "progress" not in kwargs:
res = await send2tg(client, message, texts=f"🔗正在解析arXiv链接\n{url}", **kwargs)
kwargs["progress"] = res[0]
+ status_msg = res[0]
kwargs["send_from_user"] = "" # disable @send_user
+ # First, get the PDF and send it.
+ pdf = await download_file(f"https://arxiv.org/pdf/{arxiv_id}", suffix=".pdf", proxy=PROXY.ARXIV, stream=True)
+ if not pdf:
+ await modify_progress(status_msg, text="❌下载PDF失败", force_update=True)
+ return
+ file_id = pdf
+ if isinstance(status_msg, Message):
+ status_msg = await status_msg.edit_media(file_name=f"{arxiv_id}.pdf", media=InputMediaDocument(file_id, caption=f"arXiv: [{arxiv_id}]({url})"))
+ file_id = glom(status_msg, "document.file_id", default=pdf)
api = f"https://export.arxiv.org/api/query?id_list={arxiv_id}"
resp = await hx_req(api, headers=HEADERS, proxy=PROXY.ARXIV, rformat="text")
if "hx_error" in resp:
return
- if not resp.get("text"):
- await modify_progress(text=f"❌arXiv解析失败: {resp}", force_update=True, **kwargs)
arxiv = feedparser.parse(resp["text"])
-
entry = glom(arxiv, "entries.0", default={})
-
title = glom(entry, "title", default="")
updated = glom(entry, Coalesce("updated", "published"), default="")
+ updated = updated.replace("T", " ").rstrip("Z")
abstract = glom(entry, "summary", default="")
comment = glom(entry, "arxiv_comment", default="")
authors = ""
@@ -43,11 +54,19 @@ async def preview_arxiv(client: Client, message: Message, url: str, arxiv_id: st
if name := author.get("name"):
authors += f"{name}, "
authors = authors.rstrip(", ")
- await modify_progress(text="⏬正在下载PDF", force_update=True, **kwargs)
- pdf = await download_file(f"https://arxiv.org/pdf/{arxiv_id}", suffix=".pdf", proxy=PROXY.ARXIV, stream=True)
texts = f"📄**[{title}]({url})**\n👥{authors}\n🕒{updated}\n"
if comment:
- texts += f"📝{comment}\n"
- texts += f"\n**Abstract**\n{abstract}"
- await send2tg(client, message, texts=texts, media=[{"document": pdf}], **kwargs)
- await modify_progress(del_status=True, **kwargs)
+ texts += f"📝{comment}"
+
+ caption = (await smart_split(texts, CAPTION_LENGTH))[0]
+ if isinstance(status_msg, Message):
+ status_msg = await status_msg.edit_media(file_name=f"{arxiv_id}.pdf", media=InputMediaDocument(file_id, caption=caption))
+ Path(pdf).unlink(missing_ok=True)
+ # await modify_progress(status, del_status=True)
+ if not isinstance(status_msg, Message):
+ return
+
+ abstract = f"**Abstract**\n{abstract}"
+ with contextlib.suppress(Exception):
+ for txt in await smart_split(abstract, TEXT_LENGTH):
+ status_msg = await status_msg.reply_text(blockquote(txt), quote=True)