Commit 7b63892

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-08-31 05:13:23
chore(social): extract video `duration` for bilibili and youtube
1 parent 594d287
Changed files (2)
src/preview/bilibili.py
@@ -142,6 +142,7 @@ async def get_bilibili_vinfo(url_or_vid: int | str) -> dict:
             "author": (str),
             "channel": (str) channel url,
             "pubdate": (str)
+            "duration": (int) in seconds,
             "upload_date": (str)
             "view_count": (int),
             "like_count": (int),
@@ -167,7 +168,7 @@ async def get_bilibili_vinfo(url_or_vid: int | str) -> dict:
         info["channel"] = f"https://space.bilibili.com/{glom(info, 'owner.mid', default='')}"
         info["pubdate"] = datetime.fromtimestamp(info["pubdate"], tz=ZoneInfo(TZ)).strftime("%Y-%m-%d %H:%M:%S")
         info["upload_date"] = datetime.fromtimestamp(info["ctime"], tz=ZoneInfo(TZ)).strftime("%Y-%m-%d %H:%M:%S")
-
+        info["duration"] = int(info.get("duration", 0))
         # statistics
         info |= {
             "view_count": int(glom(info, "stat.view", default=0)),
src/preview/youtube.py
@@ -6,7 +6,8 @@ But not for downloading YouTube videos.
 For downloading YouTube videos, please see `src/preview/ytdlp.py`.
 """
 
-from datetime import UTC, datetime
+import re
+from datetime import UTC, datetime, timedelta
 from zoneinfo import ZoneInfo
 
 from glom import glom
@@ -59,6 +60,7 @@ async def get_youtube_vinfo(video_id: str) -> dict:
             "author": (str),
             "channel": (str) channel url,
             "pubdate": (str)
+            "duration": (int) in seconds,
             "has_subtitle": (bool),
             "is_live": (bool),
             "live_start": (datetime),
@@ -141,7 +143,25 @@ async def get_youtube_vinfo(video_id: str) -> dict:
             info |= {"downloadable": False, "error_msg": f"❌直播还未完成, 当前状态: {glom(resp, 'items.0.snippet.liveBroadcastContent')}"}
         if info["is_live"] and not info["live_end"]:
             info |= {"downloadable": False, "error_msg": f"❌直播还未完成, 当前状态: {glom(resp, 'items.0.snippet.liveBroadcastContent')}"}
+        # parse duration
+        """For a video that is at least one minute long and less than one hour long, the duration is in the format PT#M#S,
+        in which the letters PT indicate that the value specifies a period of time, and the letters M and S refer to length in minutes and seconds, respectively.
+        The # characters preceding the M and S letters are both integers that specify the number of minutes (or seconds) of the video.
+        For example, a value of PT15M33S indicates that the video is 15 minutes and 33 seconds long.
 
+        If the video is at least one hour long, the duration is in the format PT#H#M#S,
+        in which the # preceding the letter H specifies the length of the video in hours and all of the other details are the same as described above.
+        If the video is at least one day long, the letters P and T are separated, and the value's format is P#DT#H#M#S.
+
+        Please refer to the ISO 8601 specification for complete details. (https://en.wikipedia.org/wiki/ISO_8601#Durations)
+        """
+        duration = glom(resp, "items.0.contentDetails.duration", default="PT0M0S")
+        pattern = r"^P(?:(?P<days>\d+\.\d+|\d*?)D)?T?(?:(?P<hours>\d+\.\d+|\d*?)H)?(?:(?P<minutes>\d+\.\d+|\d*?)M)?(?:(?P<seconds>\d+\.\d+|\d*?)S)?$"
+        if matched := re.match(pattern, duration):
+            parts = {k: float(v) for k, v in matched.groupdict("0").items()}
+            info["duration"] = int(timedelta(**parts).total_seconds())
+        else:
+            info["duration"] = 0
     except Exception as e:
         logger.error(f"Failed to get video info: {e}")
         return {"downloadable": False, "error_msg": "❌无法获取此视频信息"}