bennybot/src/preview/youtube.py at main

  1#!/usr/bin/env python
  2# -*- coding: utf-8 -*-
  3"""This file contains the code for extracting information from YouTube videos.
  4
  5But not for downloading YouTube videos.
  6For downloading YouTube videos, please see `src/preview/ytdlp.py`.
  7"""
  8
  9import re
 10from datetime import UTC, datetime, timedelta
 11from zoneinfo import ZoneInfo
 12
 13from glom import Coalesce, glom
 14from loguru import logger
 15
 16from config import PROXY, TOKEN, TZ, cache
 17from messages.utils import blockquote
 18from networking import hx_req
 19from utils import nowstr, readable_count, true
 20
 21
 22@cache.memoize(ttl=60)
 23async def get_youtube_comments(vid: str | None) -> list[str]:
 24    if not vid:
 25        return []
 26    api = "https://www.googleapis.com/youtube/v3/commentThreads"
 27    params = {"key": TOKEN.YOUTUBE_API_KEY, "maxResults": 100, "textFormat": "plainText", "part": "snippet", "videoId": vid}
 28    comments = []
 29    try:
 30        resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items"])
 31        if resp.get("hx_error"):
 32            logger.warning(f"YouTube Comments API failed: {resp['hx_error']}")
 33            return []
 34        data = resp["items"]
 35        for idx, x in enumerate(data):
 36            name = glom(x, "snippet.topLevelComment.snippet.authorDisplayName", default="匿名")
 37            name = name.removeprefix("@")
 38            if author_url := glom(x, "snippet.topLevelComment.snippet.authorChannelUrl", default=""):
 39                name = f"[{name}]({author_url})"
 40            if cmt := glom(x, "snippet.topLevelComment.snippet.textDisplay", default=""):
 41                if idx == 0:
 42                    comments.append(f"\n{blockquote('💬**点此展开评论区**:')}")
 43                cmt = f"💬**{name}**: {cmt}"
 44                comments.append(f"\n{blockquote(cmt)}")
 45    except Exception as e:
 46        logger.error(f"Failed to get YouTube comments: {e}")
 47        return []
 48    return comments
 49
 50
 51@cache.memoize(ttl=120)
 52async def get_youtube_vinfo(video_id: str) -> dict:
 53    """Fetch YouTube video info.
 54
 55    Returns:
 56        {
 57            "downloadable": (bool),
 58            "error_msg": (str),
 59            "title": (str),
 60            "description": (str),
 61            "author": (str),
 62            "channel": (str) channel url,
 63            "pubdate": (str)
 64            "duration": (int) in seconds,
 65            "has_subtitle": (bool),
 66            "is_live": (bool),
 67            "live_start": (datetime),
 68            "live_end": (datetime),
 69            "scheduled_start": (datetime),
 70            "view_count": (int),
 71            "like_count": (int),
 72            "favorite_count": (int),
 73            "comment_count": (int),
 74            "statistics": (str) "👁100K 👍100K ⭐️100K 💬100K",
 75            "emoji": (str) "🔴"
 76        }
 77    """
 78    if not video_id:
 79        return {"downloadable": False, "error_msg": "❌未提供VideoID"}
 80    info: dict = {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
 81    try:
 82        logger.info(f"Fetch YouTube video info for {video_id=}, proxy={PROXY.GOOGLE}")
 83        api = "https://www.googleapis.com/youtube/v3/videos"
 84        params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet,status,contentDetails,liveStreamingDetails,statistics", "id": video_id, "hl": "zh-CN"}
 85        resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items.0.snippet"], max_retry=3)
 86        if resp.get("hx_error"):
 87            logger.warning(f"YouTube Videos API failed: {resp['hx_error']}")
 88            return {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
 89        if not glom(resp, "items.0.snippet", default={}):
 90            logger.warning("YouTube Videos API failed: Video not found")
 91            return {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
 92
 93        # basci info
 94        info["title"] = glom(resp, "items.0.snippet.title", default="Title")
 95        info["description"] = glom(resp, "items.0.snippet.description", default="")
 96        info["author"] = glom(resp, "items.0.snippet.channelTitle", default="YouTuber")
 97        channel = glom(resp, "items.0.snippet.channelId", default="")
 98        info["channel"] = f"https://www.youtube.com/channel/{channel}"
 99        if pubdate := glom(resp, "items.0.snippet.publishedAt", default=""):
100            dt = datetime.strptime(pubdate, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC).astimezone(ZoneInfo(TZ))
101            info["pubdate"] = f"{dt:%Y-%m-%d %H:%M:%S}"
102        else:
103            info["pubdate"] = nowstr(TZ)
104        info["has_subtitle"] = true(glom(resp, "items.0.contentDetails.caption", default=False))
105
106        # livestreaming
107        info |= {"is_live": False, "live_start": "", "live_end": "", "scheduled_start": ""}
108        if live_details := glom(resp, "items.0.liveStreamingDetails", default={}):
109            info["is_live"] = True
110            if live_start := live_details.get("actualStartTime"):
111                info["live_start"] = datetime.strptime(live_start, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC).astimezone(ZoneInfo(TZ))
112            if live_end := live_details.get("actualEndTime"):
113                info["live_end"] = datetime.strptime(live_end, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC).astimezone(ZoneInfo(TZ))
114            if scheduled_start := live_details.get("scheduledStartTime"):
115                info["scheduled_start"] = datetime.strptime(scheduled_start, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC).astimezone(ZoneInfo(TZ))
116
117        # statistics
118        info |= {
119            "view_count": int(glom(resp, "items.0.statistics.viewCount", default=0)),
120            "like_count": int(glom(resp, "items.0.statistics.likeCount", default=0)),
121            "favorite_count": int(glom(resp, "items.0.statistics.favoriteCount", default=0)),
122            "comment_count": int(glom(resp, "items.0.statistics.commentCount", default=0)),
123        }
124        statistics = ""
125        if view := info.get("view_count"):
126            statistics += f"👁{readable_count(view)}"
127        if like := info.get("like_count"):
128            statistics += f"👍{readable_count(like)}"
129        if favorite := info.get("favorite_count"):
130            statistics += f"⭐️{readable_count(favorite)}"
131        if comment := info.get("comment_count"):
132            statistics += f"💬{readable_count(comment)}"
133        info["statistics"] = statistics
134
135        # downloadable
136        info |= {"downloadable": True, "error_msg": ""}
137        privacy = glom(resp, "items.0.status.privacyStatus", default="private")  # public, private, unlisted
138        status = glom(resp, "items.0.status.uploadStatus", default="failed")  # deleted, failed, processed, uploaded, rejected
139        if privacy not in ["public", "unlisted"]:
140            info |= {"downloadable": False, "error_msg": "❌私享视频不可下载"}
141        if status != "processed":
142            info |= {"downloadable": False, "error_msg": f"❌转码视频未完成, 当前状态: {status}"}
143        if glom(resp, "items.0.snippet.liveBroadcastContent", default="") in ["live", "upcoming"]:
144            info |= {"downloadable": False, "error_msg": f"❌直播还未完成, 当前状态: {glom(resp, 'items.0.snippet.liveBroadcastContent')}"}
145        if info["is_live"] and not info["live_end"]:
146            info |= {"downloadable": False, "error_msg": f"❌直播还未完成, 当前状态: {glom(resp, 'items.0.snippet.liveBroadcastContent')}"}
147        blocked_regions = glom(resp, "items.0.contentDetails.regionRestriction.blocked", default=[]) or []
148        if "US" in blocked_regions:
149            info |= {"downloadable": False, "error_msg": f"❌视频在以下国家/地区被屏蔽: {', '.join(blocked_regions)}"}
150
151        # parse duration
152        """For a video that is at least one minute long and less than one hour long, the duration is in the format PT#M#S,
153        in which the letters PT indicate that the value specifies a period of time, and the letters M and S refer to length in minutes and seconds, respectively.
154        The # characters preceding the M and S letters are both integers that specify the number of minutes (or seconds) of the video.
155        For example, a value of PT15M33S indicates that the video is 15 minutes and 33 seconds long.
156
157        If the video is at least one hour long, the duration is in the format PT#H#M#S,
158        in which the # preceding the letter H specifies the length of the video in hours and all of the other details are the same as described above.
159        If the video is at least one day long, the letters P and T are separated, and the value's format is P#DT#H#M#S.
160
161        Please refer to the ISO 8601 specification for complete details. (https://en.wikipedia.org/wiki/ISO_8601#Durations)
162        """
163        duration = glom(resp, "items.0.contentDetails.duration", default="PT0M0S")
164        pattern = r"^P(?:(?P<days>\d+\.\d+|\d*?)D)?T?(?:(?P<hours>\d+\.\d+|\d*?)H)?(?:(?P<minutes>\d+\.\d+|\d*?)M)?(?:(?P<seconds>\d+\.\d+|\d*?)S)?$"
165        if matched := re.match(pattern, duration):
166            parts = {k: float(v) for k, v in matched.groupdict("0").items()}
167            info["duration"] = int(timedelta(**parts).total_seconds())
168        else:
169            info["duration"] = 0
170    except Exception as e:
171        logger.error(f"Failed to get video info: {e}")
172        return {"downloadable": False, "error_msg": "❌无法获取此视频信息"}
173    return info | {"emoji": "🔴"}
174
175
176async def get_youtube_channel_thumb(channel_id: str) -> str:
177    """Get YouTube channel thumbnail url."""
178    if not channel_id:
179        return ""
180    api = "https://www.googleapis.com/youtube/v3/channels"
181    params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet", "id": channel_id, "hl": "zh-CN"}
182    resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items.0.snippet"], max_retry=3)
183    if resp.get("hx_error"):
184        logger.warning(f"YouTube Channels API failed: {resp['hx_error']}")
185        return ""
186    thumbnails = glom(resp, "items.0.snippet.thumbnails", default={})
187    return glom(thumbnails, Coalesce("high.url", "medium.url", "default.url"), default="")
188
189
190@cache.memoize(ttl=120)
191async def get_youtube_channel_name_by_handle(handle: str) -> str:
192    """Get YouTube channel by handle."""
193    if not handle:
194        return ""
195    api = "https://www.googleapis.com/youtube/v3/channels"
196    params = {"key": TOKEN.YOUTUBE_API_KEY, "part": "snippet", "forHandle": handle}
197    resp = await hx_req(api, proxy=PROXY.GOOGLE, params=params, check_keys=["items.0.snippet"], max_retry=3)
198    if resp.get("hx_error"):
199        logger.warning(f"YouTube Channels API failed: {resp['hx_error']}")
200        return ""
201    return glom(resp, "items.0.snippet.title", default="")