Commit c94cc26
Changed files (1)
src
preview
src/preview/xiaohongshu.py
@@ -110,13 +110,21 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
await save_messages(messages=sent_messages, key=db_key)
-async def get_xhs_info(url: str, retry: int = 0) -> dict:
+async def get_xhs_info(url: str, ua: str = UA.CHROME, retry: int = 0) -> dict:
"""Get xiaohongshu post info.
XHS banned VPS IP, so we need to use residential proxy.
+ XHS has two different return formats base on User-Agent.
+ Some posts can only be accessed with mobile User-Agent. (I don't know why)
+ But images got from mobile has XHS watermark.
+ So we prefer to use desktop User-Agent.
"""
- headers = {"user-agent": UA.IPHONE, "referer": "https://www.xiaohongshu.com/"}
- if retry > 3:
+
+ def switch_ua(ua: str) -> str:
+ return UA.IPHONE if ua == UA.CHROME else UA.CHROME
+
+ headers = {"user-agent": ua, "referer": "https://www.xiaohongshu.com/"}
+ if retry > 4:
return {}
data = {}
try:
@@ -127,12 +135,12 @@ async def get_xhs_info(url: str, retry: int = 0) -> dict:
info = yaml.safe_load(script_info)
if not info:
retry += 1
- logger.warning(f"XHS empty response, maybe need to adjust the proxy. Retrying: {retry} / 3")
- return await get_xhs_info(url, retry=retry)
+ logger.warning(f"XHS empty response, maybe need to adjust the proxy. Retrying: {retry}")
+ return await get_xhs_info(url, ua=switch_ua(ua), retry=retry)
except Exception as e:
- logger.error(f"XHS parsing response failed: {e}, Retrying: {retry} / 3")
+ logger.error(f"XHS parsing response failed: {e}, Retrying: {retry}")
retry += 1
- return await get_xhs_info(url, retry=retry)
+ return await get_xhs_info(url, ua=switch_ua(ua), retry=retry)
# XHS has two different return formats
if notes := list(info.get("note", {}).get("noteDetailMap", {}).values()):
@@ -143,8 +151,8 @@ async def get_xhs_info(url: str, retry: int = 0) -> dict:
data["note"] = note
return data
retry += 1
- logger.error(f"Parsed info has no post, Retrying: {retry} / 3")
- return await get_xhs_info(url, retry=retry)
+ logger.error(f"Parsed info has no post, Retrying: {retry}")
+ return await get_xhs_info(url, ua=switch_ua(ua), retry=retry)
def get_xhs_comments(soup: BeautifulSoup | None) -> list[str]: