main
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3from glom import glom
4from pyrogram.client import Client
5from pyrogram.types import Message
6
7from ai.texts.contexts import get_openai_completion_contexts
8from ai.texts.openai_chat import openai_chat_completions
9from ai.utils import EMOJI_TEXT_BOT, literal_eval
10from config import GOOGLE_SEARCH_GL, PROXY, TOKEN, TZ
11from messages.progress import modify_progress
12from networking import hx_req
13from utils import nowdt
14
15# ruff: noqa: RUF001
16TOOLS = [
17 {
18 "type": "function",
19 "function": {
20 "name": "web_search",
21 "description": "Search the web for current information on a topic",
22 "parameters": {
23 "type": "object",
24 "required": ["query"],
25 "properties": {
26 "query": {"description": "The search query to look up", "type": "string"},
27 },
28 },
29 },
30 }
31]
32
33
34def remove_tool(tools: list[dict], tool_name: str) -> list:
35 """Remove tool from tool list.
36
37 Returns: list[dict]
38 """
39 return [tool for tool in tools if glom(tool, "function.name") != tool_name]
40
41
42async def web_search(query: str) -> list[dict]:
43 """Search the web for current information on a topic.
44
45 Args:
46 query (str): The search query to look up.
47
48 Returns:
49 list[dict]: A list of dictionaries containing the search results.
50 """
51 if not (TOKEN.GOOGLE_SEARCH_API_KEY and TOKEN.GOOGLE_SEARCH_CX):
52 return []
53 results = []
54 api = "https://www.googleapis.com/customsearch/v1"
55 params = {
56 "key": TOKEN.GOOGLE_SEARCH_API_KEY,
57 "cx": TOKEN.GOOGLE_SEARCH_CX,
58 "q": query,
59 "num": 10,
60 "safe": "off",
61 "gl": GOOGLE_SEARCH_GL,
62 }
63 response = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"], max_retry=0)
64 results = glom(response, "items", default=[]) or []
65
66 keep_keys = ["title", "link", "snippet", "mime"]
67 return [{k: v for k, v in x.items() if k in keep_keys} for x in results]
68
69
70async def get_tool_call_results(client: Client, message: Message, **kwargs) -> dict:
71 """Get OpenAI Tool Call Results.
72
73 Returns:
74 dict: {"texts": str, "thoughts": str, "prefix": str, "sent_messages": list[Message]}
75 """
76 contexts = await get_openai_completion_contexts(client, message)
77 if not contexts:
78 return {}
79
80 default_system_prompt = f"""You are a helpful assistant.
81Current date: {nowdt(TZ):%Y-%m-%d}
82
83# Tools
84
85## web_search
86
87Use the `web_search` tool to access up-to-date information from the web or when responding to the user requires information about their location. Some examples of when to use the `web_search` tool include:
88
89- Local Information: Use the `web_search` tool to respond to questions that require information about the user's location, such as the weather, local businesses, or events.
90- Freshness: If up-to-date information on a topic could potentially change or enhance the answer, call the `web_search` tool any time you would otherwise refuse to answer a question because your knowledge might be out of date.
91- Niche Information: If the answer would benefit from detailed information not widely known or understood (which might be found on the internet), use web sources directly rather than relying on the distilled knowledge from pretraining.
92- Accuracy: If the cost of a small mistake or outdated information is high (e.g., using an outdated version of a software library or not knowing the date of the next game for a sports team), then use the `web_search` tool.
93""".strip()
94 system_prompt = kwargs.get("tool_call_system_prompt", default_system_prompt)
95 contexts.insert(0, {"role": "system", "content": system_prompt})
96 show_progress = kwargs.pop("show_progress", True) # record show_progress value
97 kwargs |= {
98 "openai_tools": TOOLS,
99 "openai_contexts": contexts,
100 "openai_system_prompt": system_prompt,
101 "show_progress": False, # force disable progress message
102 }
103 resp = await openai_chat_completions(client, message, **kwargs)
104 if resp.get("tool_name") and show_progress and not kwargs.get("silent") and not isinstance(kwargs.get("progress"), Message):
105 kwargs["progress"] = await message.reply(f"{EMOJI_TEXT_BOT}**{kwargs['model_name']}** 执行工具:\n{resp['tool_name']}", quote=True)
106 while resp.get("tool_name"):
107 tool_name = resp["tool_name"].strip()
108 tool_args = literal_eval(resp.get("tool_args", "{}"))
109 if tool_name == "web_search" and tool_args:
110 await modify_progress(text=f"{EMOJI_TEXT_BOT}**{kwargs['model_name']}** 开始搜索:\n{tool_args['query']}", force_update=True, **kwargs)
111 results = await web_search(**tool_args)
112 kwargs["openai_contexts"] = add_search_results(contexts, results)
113 kwargs["openai_tools"] = remove_tool(kwargs["openai_tools"], tool_name)
114 if not kwargs["openai_tools"]:
115 break
116 resp = await openai_chat_completions(client, message, **kwargs)
117 if texts := glom(kwargs, "openai_contexts.0.content", default="").strip().removeprefix(system_prompt).strip():
118 return {
119 "success": True,
120 "openai_system_prompt": texts, # add tool results to system prompt
121 "openai_tools": None, # disable tools after tool call
122 "progress": kwargs.get("progress") or resp.get("progress"),
123 }
124 status_msg = kwargs.get("progress") or resp.get("progress")
125 result: dict = {"progress": status_msg} if isinstance(status_msg, Message) else {}
126 if resp.get("success"):
127 result |= {"success": True}
128 return result
129
130
131def add_search_results(contexts: list[dict], search_results: list[dict]) -> list[dict]:
132 """Add search results to the context.
133
134 Args:
135 contexts (list[dict]): The context to add the search results to.
136 search_results (list[dict]): The search results to add to the context.
137
138 Returns:
139 list[dict]: The context with the search results added.
140 """
141 if not contexts or not search_results:
142 return contexts
143
144 search_msg = ""
145 for idx, result in enumerate(search_results):
146 search_msg += f"[webpage {idx + 1} begin] {result} [webpage {idx + 1} end]\n"
147
148 # modified from DeepSeek's official instructions: https://github.com/deepseek-ai/DeepSeek-R1/tree/ef99616
149 prompt = f"""
150# 以下内容是基于用户发送的消息的搜索结果:
151{search_msg}
152在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的,X代表每篇文章的数字索引。
153在回答时,请注意以下几点:
154- 今天是{nowdt(TZ):%Y-%m-%d}。
155- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
156- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内,并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项;如非必要,不要主动告诉用户搜索结果未提供的内容。
157- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
158- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。
159- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
160- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
161- 请在适当的情况下在句子末尾引用上下文。请按照引用编号 [[X]](url) 的格式在答案中对应部分引用上下文。
162- 如果一句话源自多个上下文,请列出所有相关的引用编号,例如[[1]](url1) [[2]](url2),切记不要将引用集中在最后返回,而是在答案对应部分列出。
163- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。
164- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
165"""
166 if contexts[0]["role"] == "system":
167 contexts[0]["content"] += prompt
168 else:
169 contexts.insert(0, {"role": "system", "content": prompt})
170 return contexts