Commit 3d12fa3
src/danmu/sync.py
@@ -21,11 +21,11 @@ LIVEINFO_COLUMNS = "date TEXT, title TEXT, url TEXT, 发言已完成 INTEGER DEF
# livechats相关
COLUMNS = {
"发言": "time TEXT, content TEXT, segmented TEXT",
- "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT,user TEXT, uid TEXT, segmented TEXT",
+ "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT, uid TEXT, segmented TEXT",
}
INDEX_NAMES = {
"发言": ["time"],
- "弹幕": ["time", "user", "uid", "superchat"],
+ "弹幕": ["time", "fullname", "uid", "superchat"],
}
@@ -129,7 +129,6 @@ async def save_livechats_to_turso(live_info: dict, data: list[dict], qtype: str)
dt = datetime.fromtimestamp(x["timestamp"] / 1000000, tz=ZoneInfo(TZ))
item["time"] = dt.strftime("%Y-%m-%d %H:%M:%S")
item["fullname"] = x["authorName"] # User Name
- item["user"] = x["authorName"].replace(" ", "") # UserName
if x.get("authorId"):
item["uid"] = x["authorId"]
if x.get("scAmount"):
@@ -138,10 +137,10 @@ async def save_livechats_to_turso(live_info: dict, data: list[dict], qtype: str)
if x.get("message"):
item["content"] = x["message"]
item["segmented"] = " ".join(cutter.cutword(x["message"]))
- if f"{item['time']}{item['user']}{item.get('content', '')}" in added:
+ if f"{item['time']}{item['fullname']}{item.get('content', '')}" in added:
continue
normed_data.append(item)
- added.add(f"{item['time']}{item['user']}{item.get('content', '')}")
+ added.add(f"{item['time']}{item['fullname']}{item.get('content', '')}")
# 过滤掉获取已保存在turso的记录
data = await filter_records_in_turso(normed_data, date, qtype)
src/danmu/turso.py
@@ -1,7 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import defaultdict
+from datetime import datetime, timedelta
from decimal import Decimal
+from zoneinfo import ZoneInfo
import anyio
from loguru import logger
@@ -79,17 +81,24 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
"""解析从Turso获取的记录.
日期从新到旧, 数据从旧到新
+ 注意, 如果获取到的弹幕的发送时间过了凌晨, 则该弹幕会被当成第二天的数据
+ 为了获取该条弹幕实际的开播日期, 需要用前一天的日期去获取真实的开播日期
COLUMNS = {
"发言": "time TEXT, content TEXT, segmented TEXT",
- "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT,user TEXT, uid TEXT, segmented TEXT",
+ "弹幕": "time TEXT, fullname TEXT, content TEXT, superchat TEXT, uid TEXT, segmented TEXT",
}
"""
# ruff: noqa: PLW2901
# group by dates
grouped_data = defaultdict(list) # {date: list[dict]}
for x in data:
- grouped_data[x["time"][:10]].append(x)
+ time = datetime.strptime(x["time"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=ZoneInfo(TZ))
+ if time.hour < 8: # 如果发言时间在凌晨, 则认为是第二天的数据, 需要用前一天的日期去获取真实的开播日期 # noqa: SIM108
+ real_date = (time - timedelta(days=1)).strftime("%Y-%m-%d")
+ else:
+ real_date = time.strftime("%Y-%m-%d")
+ grouped_data[real_date].append(x)
texts = ""
count = 0
for date, items in sorted(grouped_data.items(), reverse=True): # 日期从新到旧
@@ -102,9 +111,9 @@ async def parse_from_turso(data: list[dict], user: str, keyword: str, super_chat
added = set()
deduplicated = []
for x in items:
- if f"{x['time']}{x['content']}{x.get('user')}" not in added:
+ if f"{x['time']}{x['content']}{x.get('fullname')}" not in added:
deduplicated.append(x)
- added.add(f"{x['time']}{x['content']}{x.get('user')}")
+ added.add(f"{x['time']}{x['content']}{x.get('fullname')}")
for idx, x in enumerate(sorted(deduplicated, key=lambda x: x["time"])): # 数据从旧到新
# only show the day once
day = f"\n开播日期: {await live_date(date)}\n" if idx == 0 else ""