Commit 3a2ef45
src/history/turso.py
@@ -26,7 +26,7 @@ async def sync_history_to_turso(client: Client, message: Message) -> None:
if not HISTORY.TURSO_ENABLE:
return
info = parse_msg(message, silent=True)
- if not check_save_history(info["ctype"], info["cid"]) or not fine_grained_check(info):
+ if not check_save_history(info["ctype"], info["cid"]) or not fine_grained_check(info) or message.service:
return
table_name = await get_table_name(client, info["cid"])
records = {
@@ -64,10 +64,10 @@ async def backup_chat_history_to_turso(client: Client, chat_id: str | int, hours
saved_mids = flatten(glom(resp, "results.0.response.result.rows.*.*.value", default=[]))
saved_mids = {int(x) for x in saved_mids}
logger.info(f"Found {len(saved_mids)} messages in Turso. Rows read: {glom(resp, 'results.0.response.result.rows_read', default=1)}")
- concurrency = 200
+ concurrency = 1000
statements = []
async for message in client.get_chat_history(chat.id): # type: ignore
- if not isinstance(message, Message) or message.empty:
+ if not isinstance(message, Message) or message.empty or message.service:
continue
info = parse_msg(message, silent=True)
if info["mid"] in saved_mids:
src/history/utils.py
@@ -59,7 +59,7 @@ def fine_grained_check(info: dict) -> bool:
这种细粒度的检查, 仅支持通过环境变量设置.
目前支持:
- HISTORY_{cid}_MUST_MTYPE: 必须为指定的消息类型
+ HISTORY_{cid}_MUST_MTYPE: 必须为指定的消息类型, 可以为多个类型, 用逗号分隔
HISTORY_{cid}_MUST_HAVE_TEXT: 必须有文字的消息
HISTORY_{cid}_SKIP_URL: 跳过包含链接的消息
HISTORY_{cid}_SKIP_KEYWORDS: 跳过包含关键词的消息 (其中关键词为逗号分隔的字符串)
@@ -71,6 +71,8 @@ def fine_grained_check(info: dict) -> bool:
return False
if true(os.getenv(f"HISTORY_{cid}_MUST_HAVE_TEXT")) and not info["text"]:
return False
+ if true(os.getenv(f"HISTORY_{cid}_MUST_HAVE_URL")) and not (find_url(info["text"]) or info.get("entity_urls")):
+ return False
if true(os.getenv(f"HISTORY_{cid}_SKIP_URL")) and (find_url(info["text"]) or info.get("entity_urls")):
return False
if os.getenv(f"HISTORY_{cid}_SKIP_KEYWORDS"):