Commit afdb353
Changed files (3)
src
subtitles
src/llm/contexts.py
@@ -11,7 +11,7 @@ from pyrogram.client import Client
from pyrogram.types import Message
from config import GPT
-from llm.utils import BOT_TIPS, clean_response, convert_md
+from llm.utils import BOT_TIPS, clean_context, convert_md
from messages.parser import parse_msg
if TYPE_CHECKING:
@@ -94,7 +94,7 @@ async def single_gpt_context(client: Client, message: Message) -> dict:
}
)
# user message has entity urls, use full html
- clean_texts = clean_response(info["html"]) if role == "user" and info["entity_urls"] else clean_response(info["text"])
+ clean_texts = clean_context(info["html"]) if role == "user" and info["entity_urls"] else clean_context(info["text"])
if not clean_texts:
continue
texts = f"[username]: {info['full_name']}\n[message]:\n{clean_texts}" if role == "user" else clean_texts
@@ -146,7 +146,7 @@ async def single_gemini_context(client: Client, message: Message) -> dict:
Path(fpath).unlink(missing_ok=True)
parts.append(Part.from_text(text=f"[fileowner]: {info['full_name']}\n[filename]: {info['file_name']}\n[file content]:\n{text.strip()}"))
# user message has entity urls, use full html
- clean_texts = clean_response(info["html"]) if role == "user" and info["entity_urls"] else clean_response(info["text"])
+ clean_texts = clean_context(info["html"]) if role == "user" and info["entity_urls"] else clean_context(info["text"])
if not clean_texts:
continue
texts = f"[username]: {info['full_name']}\n[message]:\n{clean_texts}" if role == "user" else clean_texts
src/llm/utils.py
@@ -193,7 +193,7 @@ def clean_reasoning(text: str) -> str:
return text.removeprefix(BLOCKQUOTE_EXPANDABLE_END_DELIM).lstrip()
-def clean_response(text: str) -> str:
+def clean_context(text: str) -> str:
"""Remove bot prefix and reasoning content."""
text = re.sub(r"^👤@.*?\/\/", "", text) # remove markdown send_from_user
text = re.sub(r"^👤\<a.*?tg://user\?id=\d+.*?@.*?</a>//", "", text) # remove html send_from_user
src/subtitles/subtitle.py
@@ -23,7 +23,7 @@ from preview.utils import fetch_youtube_video_info, get_bilibili_video_info
from preview.ytdlp import preview_ytdlp
from publish import publish_telegraph
from subtitles.base import fetch_subtitle, match_url
-from utils import count_subtitles, to_int
+from utils import count_subtitles, rand_number, to_int
HELP = f"""📃**提取字幕**
使用说明:
@@ -131,16 +131,18 @@ async def get_subtitle(
if ai_summary and isinstance(subtitle_msg, Message):
# use real subtitle (without AI summary by Bilibili)
subtitles = re.sub(r"(.*?)Bilibili原视频提供的字幕:", "", subtitles, flags=re.DOTALL).strip()
- prompt = f"以下是{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目详情, 请解读本期节目内容。"
+ prompt = f"节目标题: {vinfo['title']}\n发布日期: {vinfo['date']:%Y-%m-%d %H:%M:%S}\n节目简介: {description}\n"
+ prompt += f"\n这是{matched['platform'].title()}视频作者【{vinfo['author']}】的一期节目详情, 请解读本期节目内容。"
prompt += "\n注意: 不要复述节目标题、作者、日期、简介等基本信息, 不要进行任何寒暄与问候, 直接输出节目内容解读。"
- prompt += f"\n节目标题: {vinfo['title']}\n发布日期: {vinfo['date']:%Y-%m-%d %H:%M:%S}\n节目简介: {description}\n节目文字稿:\n{subtitles}"
# Construct a message to call GPT
ai_msg = Message(
id=subtitle_msg.id,
chat=subtitle_msg.chat,
text=Str(f"{PREFIX.GPT} {prompt}"),
from_user=message.from_user,
+ reply_to_message=Message(id=rand_number(), chat=subtitle_msg.chat, from_user=message.from_user, text=Str(subtitles)),
)
+ kwargs["include_thoughts"] = False
await gpt_response(client, ai_msg, **kwargs)
with contextlib.suppress(Exception):
[await modify_progress(msg, del_status=True) for msg in res.get("sent_messages", [])]