Commit a3bfbcc

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-09-30 00:40:30
fix(tmdb): search both simplified and traditional Chinese
1 parent 6c82f68
Changed files (1)
src
others
src/others/tmdb.py
@@ -3,6 +3,7 @@
 from collections import defaultdict
 from typing import Literal
 
+import zhconv
 from glom import Coalesce, glom
 from pyrogram.client import Client
 from pyrogram.types import Message
@@ -13,7 +14,7 @@ from messages.sender import send2tg
 from messages.utils import blockquote, count_without_entities, equal_prefix, remove_prefix, set_reaction, smart_split, startswith_prefix
 from networking import download_file, download_first_success_urls, download_media, hx_req
 from publish import publish_telegraph
-from utils import seconds_to_hms
+from utils import seconds_to_hms, zhcn
 
 HELP = f"""
 🎬**查询影视信息**
@@ -70,12 +71,20 @@ async def search_keyword(query: str, tmdb_lang: Literal["en-US", "zh-CN"] = "zh-
 
     Returns: {"texts": str}
     """
-    params = {"query": query, "include_adult": str(include_adult).lower(), "language": tmdb_lang, "page": 1}
-    url = "https://api.themoviedb.org/3/search/multi"
-    resp = await hx_req(url, headers=HEADERS, params=params, proxy=PROXY.TMDB, check_kv={"page": 1}, check_keys=["results"])
-    if resp.get("hx_error"):
-        return {"texts": resp["hx_error"]}
-    results = [x for x in resp["results"] if x.get("media_type") in ["movie", "tv"]]  # only movie & TV
+    retrieved_ids = set()
+
+    async def search(q: str, lang: Literal["zh-cn", "zh-tw"]) -> list:
+        params = {"query": zhconv.convert(q, lang), "include_adult": str(include_adult).lower(), "language": tmdb_lang, "page": 1}
+        url = "https://api.themoviedb.org/3/search/multi"
+        resp = await hx_req(url, headers=HEADERS, params=params, proxy=PROXY.TMDB, check_kv={"page": 1}, check_keys=["results"])
+        if resp.get("hx_error"):
+            return []
+        retrieved = [x for x in resp["results"] if x.get("id") not in retrieved_ids and x.get("media_type") in ["movie", "tv"]]  # only movie & TV
+        retrieved_ids.update(x.get("id", 0) for x in retrieved)
+        return retrieved
+
+    results = await search(query, "zh-cn")
+    results.extend(await search(query, "zh-tw"))
     final_msg = ""
     for item in results:
         this_msg = ""
@@ -89,7 +98,7 @@ async def search_keyword(query: str, tmdb_lang: Literal["en-US", "zh-CN"] = "zh-
 
         if overview := item.get("overview"):
             if original_title and original_title != title:  # title: 中文名, original_title: 英文名
-                overview = f"《{original_title}》: {overview}"
+                overview = f"《{original_title}》: {zhcn(overview)}"
             this_msg += f"\n{blockquote(overview)}\n"
         if await count_without_entities(final_msg + this_msg) > TEXT_LENGTH:
             break
@@ -107,13 +116,22 @@ async def search_people(query: str, tmdb_lang: Literal["en-US", "zh-CN"] = "zh-C
 
     Returns: {"texts": str}
     """
-    params = {"query": query, "include_adult": str(include_adult).lower(), "language": tmdb_lang, "page": 1}
-    api = "https://api.themoviedb.org/3/search/person"
-    resp = await hx_req(api, headers=HEADERS, params=params, proxy=PROXY.TMDB, check_kv={"page": 1}, check_keys=["results"])
-    if resp.get("hx_error"):
-        return {"texts": resp["hx_error"]}
+    retrieved_ids = set()
+
+    async def search(q: str, lang: Literal["zh-cn", "zh-tw"]) -> list:
+        params = {"query": zhconv.convert(q, lang), "include_adult": str(include_adult).lower(), "language": tmdb_lang, "page": 1}
+        url = "https://api.themoviedb.org/3/search/person"
+        resp = await hx_req(url, headers=HEADERS, params=params, proxy=PROXY.TMDB, check_kv={"page": 1}, check_keys=["results"])
+        if resp.get("hx_error"):
+            return []
+        retrieved = [x for x in resp["results"] if x.get("id") not in retrieved_ids]  # only movie & TV
+        retrieved_ids.update(x.get("id", 0) for x in retrieved)
+        return retrieved
+
+    results = await search(query, "zh-cn")
+    results.extend(await search(query, "zh-tw"))
     final_msg = ""
-    for item in resp["results"]:
+    for item in results:
         this_msg = ""
         name = glom(item, Coalesce("name", "original_name"), default="")
         url = f"https://www.themoviedb.org/person/{item['id']}"
@@ -183,7 +201,7 @@ async def get_details(query: str, tmdb_lang: Literal["en-US", "zh-CN"] = "zh-CN"
     else:
         texts += f"链接: [TMDB](https://www.themoviedb.org/{media_type}/{resp['id']})\n"
     if overview := resp.get("overview"):
-        texts += f"简介: {overview}\n"
+        texts += f"简介: {zhcn(overview)}\n"
 
     # choose poster language
     media = []
@@ -289,7 +307,7 @@ async def get_people_details(people_id: int, tmdb_lang: Literal["en-US", "zh-CN"
         media = await download_media(media)
     telegraph_url = await publish_telegraph(title=name, html=productions_for_html.strip("<br>"), author=name, url=f"https://www.themoviedb.org/person/{people_id}")
 
-    description = f"简介: {resp['biography']}" if resp.get("biography") else ""
+    description = f"简介: {zhcn(resp['biography'])}" if resp.get("biography") else ""
     description = description.replace("\\n", "\n")
     max_length = CAPTION_LENGTH if media else TEXT_LENGTH
     if await count_without_entities(f"{texts}\n{description}") > max_length - 10:  # long desc