Commit fdb3bff
Changed files (3)
src/preview/xiaohongshu.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-import json
from datetime import datetime
from zoneinfo import ZoneInfo
+import yaml
from bs4 import BeautifulSoup
from loguru import logger
from pyrogram.client import Client
@@ -55,7 +55,7 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
await send_to_social_media_bridge(client, message, full_url, **kwargs)
else:
await modify_progress(text="❌小红书解析失败, 请稍候再尝试", **kwargs)
-
+ return
await modify_progress(text="✅解析成功, 正在处理...", **kwargs)
media: list[dict] = []
if note.get("type") == "video":
@@ -70,7 +70,7 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
media.append({"video": download_first_success_urls(video_urls, suffix=".mp4", **kwargs)})
else:
for img_info in note.get("imageList", []):
- img_url = img_info.get("urlDefault", "")
+ img_url = img_info.get("urlDefault") or img_info.get("url") or ""
if img_info.get("livePhoto"):
video_urls = []
for vcodec in ["h264", "h265", "av1", "h266"]:
@@ -89,7 +89,7 @@ async def preview_xhs(client: Client, message: Message, url: str = "", db_key: s
ts = note.get("time", 0) / 1000
texts = ""
- if author := note.get("user", {}).get("nickname", ""):
+ if author := note.get("user", {}).get("nickname") or note.get("user", {}).get("nickName"):
texts += f"🍠[{author}]({url})\n"
if ts := note.get("time"):
dt = datetime.fromtimestamp(float(ts) / 1000).astimezone(ZoneInfo(TZ))
@@ -115,7 +115,7 @@ async def get_xhs_info(url: str, retry: int = 0) -> dict:
XHS banned VPS IP, so we need to use residential proxy.
"""
- headers = {"user-agent": UA.TELEGRAM, "referer": "https://www.xiaohongshu.com/"}
+ headers = {"user-agent": UA.IPHONE, "referer": "https://www.xiaohongshu.com/"}
if retry > 3:
return {}
data = {}
@@ -124,7 +124,7 @@ async def get_xhs_info(url: str, retry: int = 0) -> dict:
soup = BeautifulSoup(resp.text, "html.parser")
data["soup"] = soup
script_info = next((str(x.text).removeprefix("window.__INITIAL_STATE__=") for x in soup.find_all("script") if str(x.text).startswith("window.__INITIAL_STATE__=")), "{}")
- info = json.loads(script_info.replace("undefined", '""')) # or use yaml.safe_load(script_info)
+ info = yaml.safe_load(script_info)
if not info:
retry += 1
logger.warning(f"XHS empty response, maybe need to adjust the proxy. Retrying: {retry} / 3")
@@ -134,8 +134,13 @@ async def get_xhs_info(url: str, retry: int = 0) -> dict:
retry += 1
return await get_xhs_info(url, retry=retry)
+ # XHS has two different return formats
if notes := list(info.get("note", {}).get("noteDetailMap", {}).values()):
data["note"] = notes[0].get("note", {})
+ if data["note"]:
+ return data
+ if note := info.get("noteData", {}).get("data", {}).get("noteData", {}):
+ data["note"] = note
return data
retry += 1
logger.error(f"Parsed info has no post, Retrying: {retry} / 3")
pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
"pysocks>=1.7.1",
"pytgcrypto>=1.2.9.2",
"python-ffmpeg>=2.0.12",
+ "pyyaml>=6.0.2",
"uvloop>=0.21.0",
"youtube-transcript-api>=0.6.3",
"yt-dlp>=2025.1.12rc",
uv.lock
@@ -212,6 +212,7 @@ dependencies = [
{ name = "pysocks", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pytgcrypto", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-ffmpeg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+ { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "uvloop", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "youtube-transcript-api", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "yt-dlp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -238,6 +239,7 @@ requires-dist = [
{ name = "pysocks", specifier = ">=1.7.1" },
{ name = "pytgcrypto", specifier = ">=1.2.9.2" },
{ name = "python-ffmpeg", specifier = ">=2.0.12" },
+ { name = "pyyaml", specifier = ">=6.0.2" },
{ name = "uvloop", specifier = ">=0.21.0" },
{ name = "youtube-transcript-api", specifier = ">=0.6.3" },
{ name = "yt-dlp", specifier = ">=2025.1.12rc0" },
@@ -908,6 +910,35 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7f/6d/02e817aec661defe148cb9eb0c4eca2444846305f625c2243fb9f92a9045/python_ffmpeg-2.0.12-py3-none-any.whl", hash = "sha256:d86697da8dfb39335183e336d31baf42fb217468adf5ac97fd743898240faae3", size = 14411 },
]
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 },
+ { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 },
+ { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 },
+ { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 },
+ { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 },
+ { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 },
+ { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 },
+ { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 },
+ { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 },
+ { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 },
+ { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 },
+ { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 },
+ { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 },
+ { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 },
+ { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 },
+ { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 },
+ { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 },
+ { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 },
+ { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 },
+ { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 },
+ { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 },
+]
+
[[package]]
name = "requests"
version = "2.32.3"