Commit ca6bf84
Changed files (2)
src
llm
subtitles
src/llm/summary.py
@@ -6,26 +6,32 @@ import re
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
+from glom import Coalesce, glom
from loguru import logger
from pyrogram.client import Client
from pyrogram.types import Chat, Message
from pyrogram.types.messages_and_media.message import Str
-from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ
+from config import GPT, MAX_MESSAGE_SUMMARY, PREFIX, TID, TZ, cache
from llm.gpt import gpt_response
from llm.utils import BOT_TIPS, count_tokens
from messages.chat_history import get_history_info_list
from messages.parser import parse_msg
from messages.progress import modify_progress
from messages.sender import send2tg
-from messages.utils import equal_prefix, startswith_prefix, to_int
+from messages.utils import equal_prefix, remove_prefix, startswith_prefix, to_int
+from networking import match_social_media_link
+from subtitles.subtitle import get_subtitle
from utils import nowdt, rand_number, strings_list
HELP = f"""🤖**AI总结历史消息** (最多{MAX_MESSAGE_SUMMARY}条)
-⚠️使用`{PREFIX.COMBINATION}`命令只生成聊天记录文件, 不进行AI总结
-使用说明:
-# 后跟消息数量或时间范围
-@ 后跟用户名 (可多次使用@)
+⚠️使用`{PREFIX.AI_SUMMARY}`命令生成聊天记录文件 + 聊天记录AI总结
+⚠️使用`{PREFIX.COMBINATION}`命令只生成聊天记录文件, 不对聊天记录AI总结
+⚠️额外功能: 使用`{PREFIX.AI_SUMMARY} + 油管或B站链接`对视频内容进行AI总结
+
+{PREFIX.AI_SUMMARY}使用说明:
+- # 后跟消息数量或时间范围
+- @ 后跟用户名 (可多次使用@)
**1️⃣指定条目数**
- `{PREFIX.AI_SUMMARY} #N`: 总结最近的N条历史消息
@@ -101,9 +107,26 @@ async def ai_summary(client: Client, message: Message, summary_prefix: str | Non
summary_prefix (str | None): Prefix string of the response message.
"""
# send docs if message == "/summary"
- if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]):
+ if equal_prefix(message.text, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]) and not message.reply_to_message:
await send2tg(client, message, texts=HELP, **kwargs)
return
+ if not startswith_prefix(message.content, prefix=[PREFIX.AI_SUMMARY, PREFIX.COMBINATION]):
+ return
+ # summary Youtube & Bilibili video (skip for summaring chat history)
+ if startswith_prefix(message.text, prefix=PREFIX.AI_SUMMARY) and not remove_prefix(message.text, prefix=PREFIX.AI_SUMMARY).strip().startswith("#"):
+ # Youtube & Bilibili links in message's content or reply_to_message's content or reply_to_message's entity_urls
+ links_to_check = [glom(message, Coalesce("content", "reply_to_message.content"), default="")]
+ if message.reply_to_message:
+ reply_info = parse_msg(message.reply_to_message, use_cache=False, silent=True)
+ links_to_check.extend(reply_info["entity_urls"])
+ for link in links_to_check:
+ matched = await match_social_media_link(link)
+ if matched["platform"] in ["youtube", "bilibili"]:
+ cache.delete(f"parse_msg-{message.chat.id}-{message.id}")
+ msg = Message(id=glom(message, Coalesce("reply_to_message.id", "id")), chat=message.chat, text=Str(f"{PREFIX.SUBTITLE} {matched['url']}"))
+ kwargs |= {"ai_summary": True, "send_subtitle_as": "none"}
+ await get_subtitle(client, msg, **kwargs)
+ return
info = parse_msg(message, silent=True)
need_summay = startswith_prefix(info["text"], prefix=[PREFIX.AI_SUMMARY])
src/subtitles/subtitle.py
@@ -3,6 +3,7 @@
import contextlib
import re
from io import BytesIO
+from typing import Literal
from glom import Coalesce, glom
from loguru import logger
@@ -39,7 +40,15 @@ HELP = f"""📃**提取字幕**
""" # noqa: RUF001
-async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool = True, ai_summary: bool = True, **kwargs):
+async def get_subtitle(
+ client: Client,
+ message: Message,
+ *,
+ to_telegraph: bool = True,
+ ai_summary: bool = True,
+ send_subtitle_as: Literal["file", "str", "none"] = "file",
+ **kwargs,
+):
"""Get YouTube Subtitle."""
target_chat = kwargs["target_chat"] if kwargs.get("target_chat") else message.chat.id
# send docs if message == "/subtitle", without reply
@@ -55,8 +64,15 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
cache.set(f"subtitle-{message.chat.id}-{media_group_id}", "1", ttl=120)
matched = await match_social_media_link(url)
platform = matched["platform"]
+ if platform not in ["bilibili", "youtube"]:
+ await send2tg(client, message, texts="仅支持Bilibili和YouTube视频链接", **kwargs)
+ return
vid = glom(matched, Coalesce("vid", "bvid"), default=url)
vinfo = await get_youtube_vinfo(vid) if platform == "youtube" else await get_bilibili_vinfo(vid)
+ if error := vinfo.get("error_msg"):
+ await send2tg(client, message, texts=error, **kwargs)
+ return
+ url = glom(vinfo, Coalesce("url", "link"), default=url)
description = glom(vinfo, Coalesce("description", "desc"), default="")
caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['pubdate']}\n📝[{vinfo['title']}]({url})"
msg = f"🔍**正在获取字幕:**\n{caption}"[:TEXT_LENGTH]
@@ -96,7 +112,7 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
return
res |= {"subtitles": res["texts"], "num_chars": count_subtitles(res["texts"]), "reading_minutes": count_subtitles(res["texts"]) / READING_SPEED}
- # Send subtitle file
+ # Send subtitle
subtitles = glom(res, Coalesce("full", "subtitles", "summary"), default="")
if not subtitles:
await modify_progress(del_status=True, **kwargs)
@@ -104,12 +120,17 @@ async def get_subtitle(client: Client, message: Message, *, to_telegraph: bool =
logger.success(subtitles)
caption = f"{vinfo['emoji']}[{vinfo['author']}]({vinfo['channel']})\n🕒{vinfo['pubdate']}\n"
caption += f"📝[{vinfo['title']}]({url})\n#️⃣字符数: {res['num_chars']}\n⏳阅读时长: {readable_time(60 * res['reading_minutes'])}"
- if to_telegraph:
- html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
- if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
- caption += f"\n⚡️[即时预览]({telegraph_url})"
- with BytesIO(subtitles.encode("utf-8")) as f:
- subtitle_msg = await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+ if send_subtitle_as == "file":
+ if to_telegraph:
+ html = "\n".join([f"<p>{s}</p>" for s in subtitles.split("\n")])
+ if telegraph_url := await publish_telegraph(title=vinfo["title"], html=html, author=vinfo["author"], url=url):
+ caption += f"\n⚡️[即时预览]({telegraph_url})"
+ with BytesIO(subtitles.encode("utf-8")) as f:
+ subtitle_msg = await client.send_document(to_int(target_chat), f, file_name=f"{vinfo['title']}.txt", caption=caption)
+ elif send_subtitle_as == "str":
+ subtitle_msg = (await send2tg(client, message, texts=f"{caption}\n{subtitles}", **kwargs))[0]
+ else:
+ subtitle_msg = message
if ai_summary and isinstance(subtitle_msg, Message):
# use real subtitle (without AI summary by Bilibili)