Commit 9bfb7ea
Changed files (11)
src
ai
history
src/ai/utils.py
@@ -14,7 +14,7 @@ from glom import glom
from google import genai
from google.genai.types import HttpOptions
from loguru import logger
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.parser.markdown import BLOCKQUOTE_DELIM, BLOCKQUOTE_EXPANDABLE_DELIM, SPOILER_DELIM
from config import AI, PREFIX, PROXY, cache
from database.kv import get_cf_kv
@@ -84,9 +84,12 @@ def clean_bot_tips(text: str) -> str:
return re.sub(rf"^{EMOJI_TEXT_BOT}(.*?){BOT_TIPS}", "", text, flags=re.DOTALL).strip()
-def clean_reasoning(text: str) -> str:
- text = re.sub(rf"{EMOJI_REASONING_BEGIN}(.*?){EMOJI_REASONING_END}", "", text.strip(), flags=re.DOTALL).strip()
- return text.replace(BLOCKQUOTE_EXPANDABLE_DELIM, "").strip()
+def clean_reasoning(s: str) -> str:
+ s = re.sub(rf"{EMOJI_REASONING_BEGIN}(.*?){EMOJI_REASONING_END}", "", s.strip(), flags=re.DOTALL).strip()
+ texts = ""
+ for line in s.splitlines():
+ texts += line.removeprefix(BLOCKQUOTE_EXPANDABLE_DELIM).removeprefix(BLOCKQUOTE_DELIM).removesuffix(SPOILER_DELIM) + "\n"
+ return texts.strip()
def clean_context(text: str) -> str:
src/history/query.py
@@ -6,7 +6,7 @@ from io import BytesIO
from glom import glom
from loguru import logger
from pyrogram.client import Client
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.parser.markdown import BLOCKQUOTE_DELIM, BLOCKQUOTE_EXPANDABLE_DELIM, SPOILER_DELIM
from pyrogram.types import Message, User
from config import HISTORY, PREFIX, TZ, cache
@@ -32,14 +32,14 @@ HELP = f"""🗣**查询当前对话聊天记录**
4.`/hist + 日期 + @用户名 + 关键词` (日期需放在最前面)
示例:
{BLOCKQUOTE_EXPANDABLE_DELIM}`/hist 你好`: 查询包含“你好”关键词的记录
-{BLOCKQUOTE_EXPANDABLE_DELIM}`/hist 2025-01-01 你好`: 查询2025-01-01日包含“你好”的记录
-{BLOCKQUOTE_EXPANDABLE_DELIM}`/hist @张三 你好`: 查询用户【张三】包含“你好”的记录
-{BLOCKQUOTE_EXPANDABLE_DELIM}`/hist 2025 @张三 你好`: 查询2025年用户【张三】包含“你好”的记录
-{BLOCKQUOTE_EXPANDABLE_DELIM}
-{BLOCKQUOTE_EXPANDABLE_DELIM}注意:
-{BLOCKQUOTE_EXPANDABLE_DELIM}- 用户名和关键词需要区分大小写
-{BLOCKQUOTE_EXPANDABLE_DELIM}- 用户名可以为昵称 (Name)、用户名 (@username)、用户的TelegramUID
-{BLOCKQUOTE_EXPANDABLE_DELIM}- 如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
+{BLOCKQUOTE_DELIM}`/hist 2025-01-01 你好`: 查询2025-01-01日包含“你好”的记录
+{BLOCKQUOTE_DELIM}`/hist @张三 你好`: 查询用户【张三】包含“你好”的记录
+{BLOCKQUOTE_DELIM}`/hist 2025 @张三 你好`: 查询2025年用户【张三】包含“你好”的记录{SPOILER_DELIM}
+
+注意:
+- 用户名和关键词需要区分大小写
+- 用户名可以为昵称 (Name)、用户名 (@username)、用户的TelegramUID
+- 如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
`/history` 使用说明:
查询所有对话的聊天记录
但出于隐私考虑, 本命令会限制使用权限
src/messages/sender.py
@@ -7,7 +7,7 @@ from pathlib import Path
from loguru import logger
from pyrogram.client import Client
from pyrogram.errors import FloodWait
-from pyrogram.parser.markdown import BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.parser.markdown import BLOCKQUOTE_DELIM, BLOCKQUOTE_EXPANDABLE_DELIM
from pyrogram.types import Message, ReplyParameters
from config import CAPTION_LENGTH, TID
@@ -130,9 +130,7 @@ async def send_texts(
if not msg:
continue
# we do not send comments-only texts
- if all(s.startswith(BLOCKQUOTE_EXPANDABLE_DELIM) for s in msg.split("\n") if s):
- continue
- if f"{BLOCKQUOTE_EXPANDABLE_DELIM}💬" in msg and "💬**点此展开评论区**:" not in msg:
+ if (f"{BLOCKQUOTE_EXPANDABLE_DELIM}💬" in msg or f"{BLOCKQUOTE_DELIM}💬" in msg) and "💬**点此展开评论区**:" not in msg:
continue
if idx != 0:
src/messages/utils.py
@@ -6,7 +6,7 @@ import re
from loguru import logger
from pyrogram.client import Client
from pyrogram.enums import ParseMode
-from pyrogram.parser.markdown import BLOCKQUOTE_DELIM, BLOCKQUOTE_EXPANDABLE_DELIM
+from pyrogram.parser.markdown import BLOCKQUOTE_DELIM, BLOCKQUOTE_EXPANDABLE_DELIM, SPOILER_DELIM
from pyrogram.parser.parser import Parser
from pyrogram.types import Message, ReactionTypeEmoji, ReplyParameters
@@ -159,7 +159,7 @@ async def smart_split(text: str, chars_per_string: int = TEXT_LENGTH, mode: Pars
return strings[: matched.end()]
return strings
- chars_per_string = chars_per_string - len(BLOCKQUOTE_EXPANDABLE_DELIM) * text.count(BLOCKQUOTE_EXPANDABLE_DELIM)
+ chars_per_string = chars_per_string - len(BLOCKQUOTE_EXPANDABLE_DELIM) - len(SPOILER_DELIM) - len(BLOCKQUOTE_DELIM) * text.count(BLOCKQUOTE_DELIM)
parts = []
while True:
if await count_without_entities(text, mode) < chars_per_string:
@@ -181,8 +181,11 @@ async def smart_split(text: str, chars_per_string: int = TEXT_LENGTH, mode: Pars
def blockquote(s: str) -> str:
"""Block quote texts."""
- s = s.replace(BLOCKQUOTE_EXPANDABLE_DELIM, "")
- return BLOCKQUOTE_EXPANDABLE_DELIM + s.replace("\n", f"\n{BLOCKQUOTE_EXPANDABLE_DELIM}")
+ s = s.removeprefix(BLOCKQUOTE_EXPANDABLE_DELIM).removesuffix(SPOILER_DELIM)
+ if not s:
+ return s
+
+ return "\n" + BLOCKQUOTE_EXPANDABLE_DELIM + s.replace("\n", f"\n{BLOCKQUOTE_DELIM}") + SPOILER_DELIM
def quote(s: str) -> str:
src/preview/bilibili.py
@@ -243,7 +243,7 @@ async def get_bilibili_subtitle(url_or_vid: int | str) -> dict:
async def get_bilibili_comments(url_or_vid: int | str) -> list[str]:
"""Get Bilibili comments."""
- comments = []
+ comments_list = []
try:
# url to vid
cookie = await bilibili_cookie_dict()
@@ -264,13 +264,16 @@ async def get_bilibili_comments(url_or_vid: int | str) -> list[str]:
location = f"({location})" if location else ""
if cmt := glom(x, "content.message", default=""):
if idx == 0:
- comments.append(f"\n{blockquote('💬**点此展开评论区**:')}")
+ comments_list.append("💬**点此展开评论区**:")
cmt = f"💬**{name}**{location}: {emojify(cmt)}"
- comments.append(f"\n{blockquote(cmt)}")
+ comments_list.append(f"\n{cmt}")
except Exception as e:
logger.error(f"Failed to get Bilibili comments: {e}")
return []
- return comments
+ if not comments_list:
+ return []
+ comments = blockquote("".join(comments_list))
+ return comments.splitlines(keepends=True)
async def bilibili_subtitle_and_summary(url_or_vid: int | str) -> dict:
src/preview/douyin.py
@@ -295,7 +295,6 @@ async def get_comments(aweme_id: str = "", platform: str = "douyin", douyin_comm
comments_str = ""
for idx, cmt in enumerate(comments):
if idx == 0:
- comments_str += f"\n{blockquote('💬**点此展开评论区**:')}"
- cmt_str = f"💬**{cmt['name']}**{cmt['region']}: {cmt['text']}"
- comments_str += f"\n{blockquote(cmt_str)}"
- return comments_str
+ comments_str += "💬**点此展开评论区**:"
+ comments_str += f"\n💬**{cmt['name']}**{cmt['region']}: {cmt['text']}"
+ return blockquote(comments_str)
src/preview/instagram.py
@@ -108,11 +108,11 @@ async def preview_instagram(
comment_nodes = sorted(comment_nodes, key=lambda x: glom(x, "node.created_at", default=0))
for idx, node in enumerate(comment_nodes):
if idx == 0:
- comments += f"\n{blockquote('💬**点此展开评论区**:')}"
+ comments += "💬**点此展开评论区**:"
author = glom(node, "node.owner.username", default="user")
cmt = glom(node, "node.text", default="")
- comment = f"💬**[{author}](https://www.instagram.com/{author})**: {cmt}"
- comments += f"\n{blockquote(comment)}"
+ comments += f"\n💬**[{author}](https://www.instagram.com/{author})**: {cmt}"
+ comments = blockquote(comments)
await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
media = await download_media(media, **kwargs)
src/preview/reddit.py
@@ -85,10 +85,9 @@ async def get_reddit_info(url: str, **kwargs) -> dict:
continue
if comment == "[removed]" or has_markdown_img(comment):
continue
- cmt = f"💬**[{author}]({author_url})**: {comment}"
- comments += f"\n{blockquote(cmt)}"
+ comments += f"\n💬**[{author}]({author_url})**: {comment}"
if comments:
- comments = f"\n{blockquote('💬**点此展开评论区**:')}{comments}"
+ comments = blockquote(f"💬**点此展开评论区**:{comments}")
await modify_progress(text=f"⏬正在下载:\n{summay_media(media)}", force_update=True, **kwargs)
media = await download_media(media, **kwargs)
except Exception as e:
src/preview/twitter.py
@@ -159,12 +159,12 @@ async def preview_twitter(
if texts := master_info.get("texts"):
msg += f"\n{texts}"
if true(twitter_comments) and (comments := master_info.get("comments")):
- msg += f"\n{blockquote('💬**点此展开评论区**:')}"
+ comments_str = "💬**点此展开评论区**:"
for cmt in comments:
if str(cmt["post_id"]) == str(this_info["post_id"]):
continue
- full_cmt = f"💬**{cmt['author']}**: {cmt['text']}"
- msg += f"\n{blockquote(full_cmt)}"
+ comments_str += f"\n💬**{cmt['author']}**: {cmt['text']}"
+ msg += blockquote(comments_str)
media.extend(master_media)
# 本条推文
@@ -187,11 +187,11 @@ async def preview_twitter(
msg += f"\n{texts}"
if true(twitter_comments) and (comments := this_info.get("comments")):
- msg += f"\n{blockquote('💬**点此展开评论区**:')}"
+ comments_str = "💬**点此展开评论区**:"
for cmt in comments:
- cmt_texts = cmt["text"].strip().removeprefix(f"@{master_handle}").strip() # 有时回推的comment前会附带被回推的handle, 这里去掉
- full_cmt = f"💬**{cmt['author']}**: {cmt_texts}"
- msg += f"\n{blockquote(full_cmt)}"
+ cleaned = cmt["text"].strip().removeprefix(f"@{master_handle}").strip() # 有时回推的comment前会附带被回推的handle, 这里去掉
+ msg += f"\n💬**{cmt['author']}**: {cleaned}"
+ msg += blockquote(comments_str)
# 引用推文
if quote_info:
src/preview/weibo.py
@@ -130,10 +130,10 @@ async def preview_weibo(
media.extend(quote_info["media"])
- comments = []
+ comments = ""
if true(weibo_comments):
comments = await parse_weibo_comments(post_id)
- sent_messages = await send2tg(client, message, texts=emojify(msg.strip()), media=media, comments=comments, **kwargs)
+ sent_messages = await send2tg(client, message, texts=emojify(msg.strip()) + comments, media=media, **kwargs)
await modify_progress(del_status=True, **kwargs)
await save_messages(messages=sent_messages, key=db_key)
@@ -295,10 +295,10 @@ async def parse_weibo_comments(post_id: str) -> str:
if text := info.get("text"):
cmt += f" {soup_to_text(BeautifulSoup(text, 'html.parser'))}"
cmt = emojify(cmt)
- comments += f"\n{blockquote(cmt)}"
+ comments += f"\n{cmt}"
if comments:
- comments = f"\n{blockquote('💬**点此展开评论区**:')}{comments}"
- return comments
+ comments = f"\n{'💬**点此展开评论区**:'}{comments}"
+ return blockquote(comments.strip())
def real_weibo_post_id(post_id: str) -> str:
src/preview/youtube.py
@@ -25,7 +25,7 @@ async def get_youtube_comments(vid: str | None) -> list[str]:
return []
api = "https://www.googleapis.com/youtube/v3/commentThreads"
params = {"key": TOKEN.YOUTUBE_API_KEY, "maxResults": 100, "textFormat": "plainText", "part": "snippet", "videoId": vid}
- comments = []
+ comments_list = []
try:
resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"])
if resp.get("hx_error"):
@@ -39,13 +39,13 @@ async def get_youtube_comments(vid: str | None) -> list[str]:
name = f"[{name}]({author_url})"
if cmt := glom(x, "snippet.topLevelComment.snippet.textDisplay", default=""):
if idx == 0:
- comments.append(f"\n{blockquote('💬**点此展开评论区**:')}")
- cmt = f"💬**{name}**: {cmt}"
- comments.append(f"\n{blockquote(cmt)}")
+ comments_list.append("💬**点此展开评论区**:")
+ comments_list.append(f"\n💬**{name}**: {cmt}")
except Exception as e:
logger.error(f"Failed to get YouTube comments: {e}")
return []
- return comments
+ comments = blockquote("".join(comments_list))
+ return comments.splitlines(keepends=True)
@cache.memoize(ttl=120)