Commit `22bd417`

benny-dou <60535774+benny-dou@users.noreply.github.com>

2025-08-17 06:39:36

fix(danmu): fix white space in username

main

1 parent 8e86115

Changed files (3)

src

danmu

entrypoint.py

r2.py

turso.py

@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 import re
 from collections import defaultdict
-from datetime import timedelta
 from decimal import Decimal
 from pathlib import Path
 
@@ -39,9 +38,10 @@ HELP = f"""📖**查询直播合订本**
 - `{PREFIX.DANMU} 2025-01-01 你好`: 查询2025-01-01日包含“你好”的弹幕
 - `{PREFIX.DANMU} 2025 @张三 你好`: 查询2025年用户【张三】包含“你好”的弹幕
 
-注意:
+⚠️注意:
 - 日期为开播日期, 不是弹幕发送日期 (过了凌晨也算前一天)
-- 如果用户名中有空格, 请去除空格。例如: 想指定用户为John Doe请使用 `@JohnDoe`
+- 如果用户名中有空格, 请用**引号**包住用户名 (单双引号、中英引号皆可)。
+- 例如: 想指定用户为John Doe请使用 `@"John Doe"`
 {BLOCKQUOTE_EXPANDABLE_END_DELIM}
 `{PREFIX.FAYAN}` 用法类似, 但查询的是**【{DANMU.STREAMER}】**直播语录。
 额外需注意的是 `{PREFIX.FAYAN}` 命令会忽略指定的 `@用户名`
@@ -88,7 +88,7 @@ async def query_danmu(client: Client, message: Message, **kwargs):
         resp = {}
         paths = []
         count = 0
-        engine_dates = await get_engine_with_dates(match_time, user, keyword, qtype)
+        engine_dates = await get_engine_with_dates(match_time, qtype)
         for engine, dates in sorted(engine_dates.items(), reverse=True):
             if engine == "r2":
                 resp = await query_r2(dates, user, keyword, caption, super_chats, qtype, **kwargs)
@@ -142,6 +142,7 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
     Returns:
         match_time, user, keyword, error
     """
+    # ruff: noqa: RUF001
     match_time = ""
     user = ""
     keyword = ""
@@ -158,14 +159,40 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
 
     # remove prefix + date
     texts = re.sub(rf"^{PREFIX.DANMU}\s+{match_time}", "", texts).lstrip()
-
     # @张三 你好
     # @张三
-    if matched := re.match(r"^@(\w+)(\s+)?", texts):
-        user = matched.group(1)
+    # @"Zhang San"
+    if texts.startswith("@"):  # user is specified
+        texts = texts.removeprefix("@")
+        if texts.startswith('"'):
+            if matched := re.match(r'^"(.*?)"', texts):
+                user = matched.group(1)
+                texts = re.sub(r'^"(.*)"', "", texts)
+            else:
+                return "", "", "", "查询格式有误, 用户名右侧缺失英文双引号"
+        elif texts.startswith("“"):
+            if matched := re.match(r"^“(.*?)”", texts):
+                user = matched.group(1)
+                texts = re.sub(r"^“(.*?)”", "", texts)
+            else:
+                return "", "", "", "查询格式有误, 用户名右侧缺失中文双引号"
+        elif texts.startswith("'"):
+            if matched := re.match(r"^'(.*?)'", texts):
+                user = matched.group(1)
+                texts = re.sub(r"^'(.*?)'", "", texts)
+            else:
+                return "", "", "", "查询格式有误, 用户名右侧缺失英文单引号"
+        elif texts.startswith("‘"):
+            if matched := re.match(r"^‘(.*?)’", texts):
+                user = matched.group(1)
+                texts = re.sub(r"^‘(.*?)’", "", texts)
+            else:
+                return "", "", "", "查询格式有误, 用户名右侧缺失中文单引号"
+        elif matched := re.match(r"^@(\w+)(\s+)?", texts):
+            user = matched.group(1)
+            texts = re.sub(rf"^@{user}", "", texts)
 
-    # remove user
-    keyword = re.sub(rf"^@{user}", "", texts).lstrip()
+    keyword = texts.lstrip()
 
     if qtype == "发言":
         user = ""
@@ -175,19 +202,15 @@ def parse_queries(texts: str, qtype: str) -> tuple[str, str, str, str]:
     return match_time, user, keyword, ""
 
 
-async def get_engine_with_dates(match_time: str, user: str = "", keyword: str = "", qtype: str = "弹幕") -> dict[str, list[str]]:
+async def get_engine_with_dates(match_time: str, qtype: str = "弹幕") -> dict[str, list[str]]:
     """获取查询引擎和对应查询日期.
 
     对于弹幕记录根据日期进行判断:
         不指定日期: 从server获取
-        YYYY-MM-DD: 不是今天或者昨天, 则从R2获取
-        YYYY-MM: 不是本月, 则从R2获取
-        YYYY: 如果获取全年的全部弹幕记录 (不指定user和keyword), 文件可能会非常大, 小内存VPS经常爆内存
-              如果指定了user和keyword, 则从server获取
-              如果YYYY为本年, 从server获取本月, 其余月份从R2获取
-              其余情况一律从R2获取
-              为避免此情况, 当查询为全年时, 改为按月份从R2获取
-              然后合并每2个月的记录为一个文件, 发送6个文件.
+        YYYY-MM-DD: 从R2获取
+        YYYY-MM: 从R2获取
+        YYYY: 由于全年的弹幕记录文件非常大, 小内存VPS经常爆内存
+              所以R2中没有保存全年的记录, 所以从server获取
 
     对于发言记录一律从R2查询
     Returns:
@@ -205,29 +228,16 @@ async def get_engine_with_dates(match_time: str, user: str = "", keyword: str =
     # 以下为匹配弹幕查询
     if not match_time:
         return {"server": allowed_years}
-    today = now.strftime("%Y-%m-%d")
-    year = now.year
-    month = f"{now.month:02d}"
-    yesterday = (now - timedelta(days=1)).strftime("%Y-%m-%d")
+
     # YYYY-MM-DD
     if len(match_time) == 10:
-        if match_time not in [today, yesterday]:
-            return {"r2": [match_time]}
-        return {"server": [match_time]}
+        return {"r2": [match_time]}
 
     # YYYY-MM
     if len(match_time) == 7:
-        if match_time != f"{year}-{month}":
-            return {"r2": [match_time]}
-        return {"server": [f"{year}-{month}"]}  # 本月
+        return {"r2": [match_time]}
 
     # YYYY
-    if user or keyword:
+    if len(match_time) == 4:
         return {"server": [match_time]}
-    if match_time == str(year):  # 今年
-        # 从server获取本月, 其余月份从R2获取
-        r2_months = [f"{year}-{mon:02d}" for mon in range(1, now.month)]
-        return {"server": [f"{year}-{month}"], "r2": r2_months}  # type: ignore
-    # 往年
-    r2_months = [f"{match_time}-{mon:02d}" for mon in range(1, 13)]
-    return {"r2": r2_months}
+    return {"server": allowed_years}

@@ -68,7 +68,7 @@ async def parse_from_r2(data: dict[str, list[dict]], user: str, keyword: str, su
         if keyword:
             items = [x for x in items if keyword in x.get("m", "")]
         if user and qtype == "弹幕":
-            items = [x for x in items if x.get("u", "") == user]
+            items = [x for x in items if x.get("u", "") == user.replace(" ", "")]
         sort_key = "s" if qtype == "弹幕" else "t"
         items = sorted(items, key=lambda x: x[sort_key])  # 数据从旧到新
         for idx, x in enumerate(items):

@@ -55,14 +55,14 @@ async def query_turso(match_time: str, user: str, keyword: str, caption: str, su
         if match_time:
             conditions.append(f"T.time >= '{begin}' AND T.time <= '{end}'" if keyword else f"time >= '{begin}' AND time <= '{end}'")
         if user:
-            conditions.append(f"T.user = '{user}'" if keyword else f"user = '{user}'")
+            conditions.append(f"T.fullname = '{user}'" if keyword else f"fullname = '{user}'")
         if keyword:
-            sql = f"SELECT T.time,T.fullname,T.content,T.superchat,T.user,T.uid FROM 弹幕 AS T JOIN fts_弹幕 AS FTS ON T.rowid = FTS.rowid WHERE FTS.segmented MATCH '{texts_to_match}'"
+            sql = f"SELECT T.time,T.fullname,T.content,T.superchat,T.uid FROM 弹幕 AS T JOIN fts_弹幕 AS FTS ON T.rowid = FTS.rowid WHERE FTS.segmented MATCH '{texts_to_match}'"
             if conditions:
                 sql += " AND " + " AND ".join(conditions)
         else:
             cond = " AND ".join(conditions)
-            sql = f"SELECT time,fullname,content,superchat,user,uid FROM 弹幕 WHERE {cond}"
+            sql = f"SELECT time,fullname,content,superchat,uid FROM 弹幕 WHERE {cond}"
         logger.info(sql)
         resp = await turso_exec([{"type": "execute", "stmt": {"sql": sql}}], silent=True, retry=2, **TURSO_KWARGS)
         parsed = await parse_from_turso(turso_parse_resp(resp), user, keyword, super_chats, qtype)
@@ -98,7 +98,7 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
         if keyword:
             items = [x for x in items if keyword in x.get("content", "")]
         if user and qtype == "弹幕":
-            items = [x for x in items if x.get("user", "") == user]
+            items = [x for x in items if x.get("fullname", "") == user]
         items = sorted(items, key=lambda x: x["time"])  # 数据从旧到新
         for idx, x in enumerate(items):
             # only show the day once

Commit 22bd417

Commit `22bd417`