Commit 3d12fa3

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-11-02 14:38:33
fix(danmu): remove `user` field in turso danmu records
1 parent c4566f2
Changed files (2)
src
src/danmu/sync.py
@@ -21,11 +21,11 @@ LIVEINFO_COLUMNS = "date TEXT, title TEXT, url TEXT, 发言已完成 INTEGER DEF
 # livechats相关
 COLUMNS = {
     "发言": "time TEXT, content TEXT, segmented TEXT",
-    "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT,user TEXT, uid TEXT, segmented TEXT",
+    "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT, uid TEXT, segmented TEXT",
 }
 INDEX_NAMES = {
     "发言": ["time"],
-    "弹幕": ["time", "user", "uid", "superchat"],
+    "弹幕": ["time", "fullname", "uid", "superchat"],
 }
 
 
@@ -129,7 +129,6 @@ async def save_livechats_to_turso(live_info: dict, data: list[dict], qtype: str)
             dt = datetime.fromtimestamp(x["timestamp"] / 1000000, tz=ZoneInfo(TZ))
             item["time"] = dt.strftime("%Y-%m-%d %H:%M:%S")
             item["fullname"] = x["authorName"]  # User Name
-            item["user"] = x["authorName"].replace(" ", "")  # UserName
             if x.get("authorId"):
                 item["uid"] = x["authorId"]
             if x.get("scAmount"):
@@ -138,10 +137,10 @@ async def save_livechats_to_turso(live_info: dict, data: list[dict], qtype: str)
             if x.get("message"):
                 item["content"] = x["message"]
                 item["segmented"] = " ".join(cutter.cutword(x["message"]))
-            if f"{item['time']}{item['user']}{item.get('content', '')}" in added:
+            if f"{item['time']}{item['fullname']}{item.get('content', '')}" in added:
                 continue
             normed_data.append(item)
-            added.add(f"{item['time']}{item['user']}{item.get('content', '')}")
+            added.add(f"{item['time']}{item['fullname']}{item.get('content', '')}")
 
     # 过滤掉获取已保存在turso的记录
     data = await filter_records_in_turso(normed_data, date, qtype)
src/danmu/turso.py
@@ -1,7 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from collections import defaultdict
+from datetime import datetime, timedelta
 from decimal import Decimal
+from zoneinfo import ZoneInfo
 
 import anyio
 from loguru import logger
@@ -79,17 +81,24 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
     """解析从Turso获取的记录.
 
     日期从新到旧, 数据从旧到新
+    注意, 如果获取到的弹幕的发送时间过了凌晨, 则该弹幕会被当成第二天的数据
+    为了获取该条弹幕实际的开播日期, 需要用前一天的日期去获取真实的开播日期
 
     COLUMNS = {
     "发言": "time TEXT, content TEXT, segmented TEXT",
-    "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT,user TEXT, uid TEXT, segmented TEXT",
+    "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT, uid TEXT, segmented TEXT",
     }
     """
     # ruff: noqa: PLW2901
     # group by dates
     grouped_data = defaultdict(list)  # {date: list[dict]}
     for x in data:
-        grouped_data[x["time"][:10]].append(x)
+        time = datetime.strptime(x["time"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=ZoneInfo(TZ))
+        if time.hour < 8:  # 如果发言时间在凌晨, 则认为是第二天的数据, 需要用前一天的日期去获取真实的开播日期  # noqa: SIM108
+            real_date = (time - timedelta(days=1)).strftime("%Y-%m-%d")
+        else:
+            real_date = time.strftime("%Y-%m-%d")
+        grouped_data[real_date].append(x)
     texts = ""
     count = 0
     for date, items in sorted(grouped_data.items(), reverse=True):  # 日期从新到旧
@@ -102,9 +111,9 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
         added = set()
         deduplicated = []
         for x in items:
-            if f"{x['time']}{x['content']}{x.get('user')}" not in added:
+            if f"{x['time']}{x['content']}{x.get('fullname')}" not in added:
                 deduplicated.append(x)
-                added.add(f"{x['time']}{x['content']}{x.get('user')}")
+                added.add(f"{x['time']}{x['content']}{x.get('fullname')}")
         for idx, x in enumerate(sorted(deduplicated, key=lambda x: x["time"])):  # 数据从旧到新
             # only show the day once
             day = f"\n开播日期: {await live_date(date)}\n" if idx == 0 else ""