Commit 1617807

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-06-17 04:03:56
chore(summary): add support for retrieving chat history from Turso database
1 parent a4070dd
Changed files (2)
src
src/llm/summary.py
@@ -117,8 +117,7 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
     offset_id = 0
     if message.reply_to_message:
         offset_id = message.reply_to_message.id + 1  # include the reply message
-        reply_info = parse_msg(message.reply_to_message)
-        end_time = reply_info["datetime"]
+        end_time = message.date.replace(tzinfo=ZoneInfo(TZ)) if isinstance(message.date, datetime) else nowdt(TZ)
 
     # 3️⃣ /summary #YYYYMMDDHHMMSS @user
     # 4️⃣ /summary #YYYYMMDDHHMMSS-YYYYMMDDHHMMSS @user
@@ -249,7 +248,7 @@ def get_txt_format(info_list: list[dict]) -> str:
         txt_format += f"[{info['datetime']:%m-%d %H:%M:%S}]{info['full_name']}:\n"
         if info["mtype"] != "text":  # not plaintext message
             # media group
-            if info["media_group_id"] > 0:
+            if info["media_group_id"]:
                 media_types = [f"[{x['mtype']}]" for x in info_list if x["media_group_id"] == info["media_group_id"]]
                 txt_format += " ".join(media_types)
                 txt_mediagroup_ids.add(info["media_group_id"])
src/messages/chat_history.py
@@ -4,17 +4,100 @@
 from datetime import datetime
 from zoneinfo import ZoneInfo
 
+from loguru import logger
 from pyrogram.client import Client
 
 from config import MAX_MESSAGE_RETRIEVED, TZ
+from database.turso import turso_exec, turso_parse_resp
+from history.turso import get_turso_chatinfo
+from history.utils import TURSO_KWARGS, get_chat
 from messages.parser import parse_msg
+from utils import to_int
 
 
 async def get_history_info_list(
     client: Client,
     chat_id: int | str,
     offset_id: int = 0,
-    num: int = 0,
+    limit: int = 0,
+    begin_time: datetime | None = None,
+    end_time: datetime | None = None,
+    users: str | list[str] | None = None,
+) -> list[dict]:
+    """Get given number of chat history from old to new in parserd json format.
+
+    If user is specified, number of messages from the user will be returned.
+    """
+    if begin_time is None:
+        begin_time = datetime.fromtimestamp(0, tz=ZoneInfo(TZ))
+    if end_time is None:
+        end_time = datetime.now(tz=ZoneInfo(TZ))
+
+    history = await get_history_info_list_via_telegram(client, chat_id, offset_id=offset_id, limit=limit, begin_time=begin_time, end_time=end_time, users=users)
+    if not history:
+        history = await get_history_info_list_via_turso(chat_id, limit=limit, begin_time=begin_time, end_time=end_time, users=users)
+    return history
+
+
+async def get_history_info_list_via_turso(
+    chat_id: int | str,
+    begin_time: datetime,
+    end_time: datetime,
+    users: str | list[str] | None = None,
+    limit: int = 0,
+) -> list[dict]:
+    """Get given number of chat history from old to new in parserd json format.
+
+    If user is specified, number of messages from the user will be returned.
+    """
+    chat_info = await get_turso_chatinfo(chat_id)
+    if not chat_info:
+        return []
+    begin = begin_time.strftime("%Y-%m-%d %H:%M:%S")
+    end = end_time.strftime("%Y-%m-%d %H:%M:%S")
+    sql = f"""SELECT * FROM '{chat_info["tablename"]}' WHERE (time > '{begin}' AND time < '{end}')"""
+    if users:
+        users = [users] if isinstance(users, str) else users
+        handle_cond = " OR ".join(f"handle = '{user}'" for user in users)
+        name_cond = " OR ".join(f"user = '{user}'" for user in users)
+        uid_cond = " OR ".join(f"uid = '{user}'" for user in users)
+        combined_cond = f"{handle_cond} OR {name_cond} OR {uid_cond}"
+        sql += f" AND ({combined_cond})"
+    sql += " ORDER BY mid ASC"
+    if limit:
+        sql += f" LIMIT {limit}"
+    logger.debug(sql)
+    resp = await turso_exec([{"type": "execute", "stmt": {"sql": sql}}], silent=True, **TURSO_KWARGS)
+    rows = turso_parse_resp(resp)
+
+    """Necessary fields for `parse_history_list` function:
+
+    file_name, is_bot, text, datetime, mtype, mid, full_name, message_url, reply_to_message_id, media_group_id
+    """
+    message_url_prefix = f"https://t.me/{chat_info['chandle']}" if chat_info["chandle"] else f"https://t.me/c/{chat_info['cid']}"
+
+    return [
+        {
+            "file_name": row["filename"],
+            "is_bot": row["handle"].endswith("bot"),
+            "text": row["content"],
+            "datetime": datetime.strptime(row["time"], "%Y-%m-%d %H:%M:%S").astimezone(ZoneInfo(TZ)),
+            "mtype": row["mtype"],
+            "mid": int(row["mid"]),
+            "full_name": row["fullname"],
+            "message_url": f"{message_url_prefix}/{row['mid']}",
+            "reply_to_message_id": to_int(row["reply"]),
+            "media_group_id": row["gid"],
+        }
+        for row in rows
+    ]
+
+
+async def get_history_info_list_via_telegram(
+    client: Client,
+    chat_id: int | str,
+    offset_id: int = 0,
+    limit: int = 0,
     begin_time: datetime | None = None,
     end_time: datetime | None = None,
     users: str | list[str] | None = None,
@@ -29,17 +112,19 @@ async def get_history_info_list(
         end_time = datetime.now(tz=ZoneInfo(TZ))
     history = []
     retrieved = 0
-    if users is None:
+    if not users:
         users = []
-    if isinstance(users, str):
-        users = [users]
+    users = [users] if isinstance(users, str) else users
     users = [x.replace(" ", "").lower() for x in users]
-    async for msg in client.get_chat_history(chat_id=chat_id, offset_id=offset_id):  # type: ignore
+    chat = await get_chat(client, chat_id)
+    if chat.id == 0:
+        return []
+    async for msg in client.get_chat_history(chat_id=chat.username or chat.id, offset_id=offset_id):  # type: ignore
         # iterate messages from new to old
         retrieved += 1
         if retrieved > MAX_MESSAGE_RETRIEVED:
             break
-        if len(history) >= num:
+        if len(history) >= limit:
             break
         if msg.empty:
             break