Commit 22bd417
Changed files (3)
src
src/danmu/entrypoint.py
@@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
import re
from collections import defaultdict
-from datetime import timedelta
from decimal import Decimal
from pathlib import Path
@@ -39,9 +38,10 @@ HELP = f"""📖**查询直播合订本**
- `{PREFIX.DANMU} 2025-01-01 你好`: 查询2025-01-01日包含“你好”的弹幕
- `{PREFIX.DANMU} 2025 @张三 你好`: 查询2025年用户【张三】包含“你好”的弹幕
-注意:
+⚠️注意:
- 日期为开播日期, 不是弹幕发送日期 (过了凌晨也算前一天)
-- 如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
+- 如果用户名中有空格, 请用**引号**包住用户名 (单双引号、中英引号皆可)。
+- 例如: 想指定用户为John Doe请使用 `@"John Doe"`
{BLOCKQUOTE_EXPANDABLE_END_DELIM}
`{PREFIX.FAYAN}` 用法类似, 但查询的是**【{DANMU.STREAMER}】**直播语录。
额外需注意的是 `{PREFIX.FAYAN}` 命令会忽略指定的 `@用户名`
@@ -88,7 +88,7 @@ async def query_danmu(client: Client, message: Message, **kwargs):
resp = {}
paths = []
count = 0
- engine_dates = await get_engine_with_dates(match_time, user, keyword, qtype)
+ engine_dates = await get_engine_with_dates(match_time, qtype)
for engine, dates in sorted(engine_dates.items(), reverse=True):
if engine == "r2":
resp = await query_r2(dates, user, keyword, caption, super_chats, qtype, **kwargs)
@@ -142,6 +142,7 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
Returns:
match_time, user, keyword, error
"""
+ # ruff: noqa: RUF001
match_time = ""
user = ""
keyword = ""
@@ -158,14 +159,40 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
# remove prefix + date
texts = re.sub(rf"^{PREFIX.DANMU}\s+{match_time}", "", texts).lstrip()
-
# @张三 你好
# @张三
- if matched := re.match(r"^@(\w+)(\s+)?", texts):
- user = matched.group(1)
+ # @"Zhang San"
+ if texts.startswith("@"): # user is specified
+ texts = texts.removeprefix("@")
+ if texts.startswith('"'):
+ if matched := re.match(r'^"(.*?)"', texts):
+ user = matched.group(1)
+ texts = re.sub(r'^"(.*)"', "", texts)
+ else:
+ return "", "", "", "查询格式有误, 用户名右侧缺失英文双引号"
+ elif texts.startswith("“"):
+ if matched := re.match(r"^“(.*?)”", texts):
+ user = matched.group(1)
+ texts = re.sub(r"^“(.*?)”", "", texts)
+ else:
+ return "", "", "", "查询格式有误, 用户名右侧缺失中文双引号"
+ elif texts.startswith("'"):
+ if matched := re.match(r"^'(.*?)'", texts):
+ user = matched.group(1)
+ texts = re.sub(r"^'(.*?)'", "", texts)
+ else:
+ return "", "", "", "查询格式有误, 用户名右侧缺失英文单引号"
+ elif texts.startswith("‘"):
+ if matched := re.match(r"^‘(.*?)’", texts):
+ user = matched.group(1)
+ texts = re.sub(r"^‘(.*?)’", "", texts)
+ else:
+ return "", "", "", "查询格式有误, 用户名右侧缺失中文单引号"
+ elif matched := re.match(r"^@(\w+)(\s+)?", texts):
+ user = matched.group(1)
+ texts = re.sub(rf"^@{user}", "", texts)
- # remove user
- keyword = re.sub(rf"^@{user}", "", texts).lstrip()
+ keyword = texts.lstrip()
if qtype == "发言":
user = ""
@@ -175,19 +202,15 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
return match_time, user, keyword, ""
-async def get_engine_with_dates(match_time: str, user: str = "", keyword: str = "", qtype: str = "弹幕") -> dict[str, list[str]]:
+async def get_engine_with_dates(match_time: str, qtype: str = "弹幕") -> dict[str, list[str]]:
"""获取查询引擎和对应查询日期.
对于弹幕记录根据日期进行判断:
不指定日期: 从server获取
- YYYY-MM-DD: 不是今天或者昨天, 则从R2获取
- YYYY-MM: 不是本月, 则从R2获取
- YYYY: 如果获取全年的全部弹幕记录 (不指定user和keyword), 文件可能会非常大, 小内存VPS经常爆内存
- 如果指定了user和keyword, 则从server获取
- 如果YYYY为本年, 从server获取本月, 其余月份从R2获取
- 其余情况一律从R2获取
- 为避免此情况, 当查询为全年时, 改为按月份从R2获取
- 然后合并每2个月的记录为一个文件, 发送6个文件.
+ YYYY-MM-DD: 从R2获取
+ YYYY-MM: 从R2获取
+ YYYY: 由于全年的弹幕记录文件非常大, 小内存VPS经常爆内存
+ 所以R2中没有保存全年的记录, 所以从server获取
对于发言记录一律从R2查询
Returns:
@@ -205,29 +228,16 @@ async def get_engine_with_dates(match_time: str, user: str = "", keyword: str =
# 以下为匹配弹幕查询
if not match_time:
return {"server": allowed_years}
- today = now.strftime("%Y-%m-%d")
- year = now.year
- month = f"{now.month:02d}"
- yesterday = (now - timedelta(days=1)).strftime("%Y-%m-%d")
+
# YYYY-MM-DD
if len(match_time) == 10:
- if match_time not in [today, yesterday]:
- return {"r2": [match_time]}
- return {"server": [match_time]}
+ return {"r2": [match_time]}
# YYYY-MM
if len(match_time) == 7:
- if match_time != f"{year}-{month}":
- return {"r2": [match_time]}
- return {"server": [f"{year}-{month}"]} # 本月
+ return {"r2": [match_time]}
# YYYY
- if user or keyword:
+ if len(match_time) == 4:
return {"server": [match_time]}
- if match_time == str(year): # 今年
- # 从server获取本月, 其余月份从R2获取
- r2_months = [f"{year}-{mon:02d}" for mon in range(1, now.month)]
- return {"server": [f"{year}-{month}"], "r2": r2_months} # type: ignore
- # 往年
- r2_months = [f"{match_time}-{mon:02d}" for mon in range(1, 13)]
- return {"r2": r2_months}
+ return {"server": allowed_years}
src/danmu/r2.py
@@ -68,7 +68,7 @@ async def parse_from_r2(data: dict[str, list[dict]], user: str, keyword: str, su
if keyword:
items = [x for x in items if keyword in x.get("m", "")]
if user and qtype == "弹幕":
- items = [x for x in items if x.get("u", "") == user]
+ items = [x for x in items if x.get("u", "") == user.replace(" ", "")]
sort_key = "s" if qtype == "弹幕" else "t"
items = sorted(items, key=lambda x: x[sort_key]) # 数据从旧到新
for idx, x in enumerate(items):
src/danmu/turso.py
@@ -55,14 +55,14 @@ async def query_turso(match_time: str, user: str, keyword: str, caption: str, su
if match_time:
conditions.append(f"T.time >= '{begin}' AND T.time <= '{end}'" if keyword else f"time >= '{begin}' AND time <= '{end}'")
if user:
- conditions.append(f"T.user = '{user}'" if keyword else f"user = '{user}'")
+ conditions.append(f"T.fullname = '{user}'" if keyword else f"fullname = '{user}'")
if keyword:
- sql = f"SELECT T.time,T.fullname,T.content,T.superchat,T.user,T.uid FROM 弹幕 AS T JOIN fts_弹幕 AS FTS ON T.rowid = FTS.rowid WHERE FTS.segmented MATCH '{texts_to_match}'"
+ sql = f"SELECT T.time,T.fullname,T.content,T.superchat,T.uid FROM 弹幕 AS T JOIN fts_弹幕 AS FTS ON T.rowid = FTS.rowid WHERE FTS.segmented MATCH '{texts_to_match}'"
if conditions:
sql += " AND " + " AND ".join(conditions)
else:
cond = " AND ".join(conditions)
- sql = f"SELECT time,fullname,content,superchat,user,uid FROM 弹幕 WHERE {cond}"
+ sql = f"SELECT time,fullname,content,superchat,uid FROM 弹幕 WHERE {cond}"
logger.info(sql)
resp = await turso_exec([{"type": "execute", "stmt": {"sql": sql}}], silent=True, retry=2, **TURSO_KWARGS)
parsed = await parse_from_turso(turso_parse_resp(resp), user, keyword, super_chats, qtype)
@@ -98,7 +98,7 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
if keyword:
items = [x for x in items if keyword in x.get("content", "")]
if user and qtype == "弹幕":
- items = [x for x in items if x.get("user", "") == user]
+ items = [x for x in items if x.get("fullname", "") == user]
items = sorted(items, key=lambda x: x["time"]) # 数据从旧到新
for idx, x in enumerate(items):
# only show the day once