Commit 8c447be

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-02-09 11:01:08
feat(ytdlp): convert video to H264 format
1 parent 0dfd493
Changed files (3)
src/preview/ytdlp.py
@@ -17,14 +17,14 @@ from pyrogram.types import Message
 from yt_dlp import YoutubeDL
 from yt_dlp.utils import DownloadError, ExtractorError, YoutubeDLError
 
-from config import API, CAPTION_LENGTH, DB, DOWNLOAD_DIR, MAX_FILE_BYTES, PROVIDER, PROXY, TID, TOKEN, cache
+from config import API, CAPTION_LENGTH, DB, DOWNLOAD_DIR, MAX_FILE_BYTES, PROVIDER, PROXY, TID, TOKEN, YTDLP_RE_ENCODING_MAX_FILE_BYTES, cache
 from database import get_db
 from messages.database import copy_messages_from_db, save_messages
 from messages.preprocess import preprocess_media
 from messages.progress import modify_progress, telegram_uploading
 from messages.sender import send2tg
 from messages.utils import get_reply_to
-from multimedia import generate_cover
+from multimedia import convert_to_h264, generate_cover
 from networking import hx_req
 from others.emoji import emojify
 from utils import readable_size, readable_time, soup_to_text, to_int, true, ts_to_dt, unicode_to_ascii
@@ -175,6 +175,7 @@ async def preview_ytdlp(
         thumb = None
     # split large videos into multiple parts (less than 2GB)
     if video_path.is_file():
+        video_path = convert_to_h264(video_path, re_encoding=True, max_file_size=YTDLP_RE_ENCODING_MAX_FILE_BYTES, skip_h264=True)
         if video_path.stat().st_size > MAX_FILE_BYTES:
             await modify_progress(text="🎬视频大小超过Telegram限制(2000MB), 正在切分...", **kwargs)
         videos = preprocess_media([{"video": video_path, "thumb": thumb}])
src/config.py
@@ -22,6 +22,8 @@ MAX_MESSAGE_COMBINATION = int(os.getenv("MAX_MESSAGE_COMBINATION", "5000"))  # M
 MAX_MESSAGE_SUMMARY = int(os.getenv("MAX_MESSAGE_SUMMARY", "1000"))  # Maximum number of messages to summay
 READING_SPEED = int(os.getenv("READING_SPEED", "300"))  # words per minute
 DAILY_MESSAGES = os.getenv("DAILY_MESSAGES", "{}")  # Useful for daily checkin for some services. Should be a json string: '{"chat-1": "msg-1", "chat-2": "msg-2"}'
+# For ytdlp downloaded video, re-encoding to H264 format. This set the max file size for re-encoding. 0 means no limit
+YTDLP_RE_ENCODING_MAX_FILE_BYTES = int(os.getenv("YTDLP_RE_ENCODING_MAX_FILE_BYTES", "0"))
 
 
 class ENABLE:
src/multimedia.py
@@ -129,23 +129,56 @@ def split_large_video(path: str | Path | None, *, delete: bool = True) -> list[P
     return videos
 
 
-def convert_to_h264(path: str | Path | None, *, re_encoding: bool = False, delete: bool = True) -> Path:
+def convert_to_h264(
+    path: str | Path | None,
+    *,
+    re_encoding: bool = False,
+    max_file_size: int = 0,
+    skip_h264: bool = False,
+    audio_codec: str = "aac",
+    ext: str = "mp4",
+    delete: bool = True,
+) -> Path:
+    """Convert video to H264 format.
+
+    Args:
+        path (str | Path | None): video file path
+        re_encoding (bool, optional): re-encode video. Defaults to False.
+        max_file_size (int, optional): limit the max file size for re-encoding. Defaults to 0 (no limit).
+        skip_h264 (bool, optional): skip conversion if video is already H264. Defaults to False.
+        audio_codec (str, optional): audio codec used in re-encoding. Defaults to "aac".
+        ext (str, optional): output format. Defaults to "mp4".
+        delete (bool, optional): delete original file. Defaults to True.
+
+    Returns:
+        Path: output video path
+    """
     if path is None or not Path(path).expanduser().resolve().is_file():
         return Path("")
     path = Path(path).expanduser().resolve()
-    logger.debug(f"Checking H264 mp4: {path.name}")
+    logger.debug(f"Checking H264 codec: {path.name}")
     info = parse_media_info(path)
-    tmp_path = path.with_suffix(".tmp.mp4")
-    mp4_path = path.with_suffix(".h264.mp4")
+    tmp_path = path.with_suffix(f".tmp.{ext}")
+    mp4_path = path.with_suffix(f".h264.{ext}")
     success = True
+    if info["video_codec"] == "h264":
+        if skip_h264:
+            logger.debug(f"Video is already H264, skip conversion: {path.name}")
+            return path
+        logger.debug("Video is already H264, skip re-encoding")
+        re_encoding = False
+    if max_file_size > 0 and path.stat().st_size > max_file_size:
+        logger.warning(f"Video file size is too large: {path.stat().st_size}, skip re-encoding")
+        re_encoding = False
+
     try:
-        if not re_encoding and info["video_codec"] == "h264" and info["audio_codec"] == "aac":
-            logger.debug(f"Video is already H264, without re-encoding: {path.name} -> {tmp_path.name}")
-            ffmpeg = FFmpeg().option("y").input(path).output(tmp_path, codec="copy", movflags="+faststart", f="mp4")
+        if not re_encoding:
+            logger.debug(f"Convert video to H264 (copy): {path.name} -> {tmp_path.name}")
+            ffmpeg = FFmpeg().option("y").input(path).output(tmp_path, codec="copy", movflags="+faststart", f=ext)
             ffmpeg.execute()
         else:
-            logger.warning(f"Re-encoding video: {path.name} -> {tmp_path.name}")
-            ffmpeg = FFmpeg().option("y").input(path).output(tmp_path, acodec="aac", vcodec="libx264", f="mp4")
+            logger.warning(f"Convert video to H264 (re-encoding): {path.name} -> {tmp_path.name}")
+            ffmpeg = FFmpeg().option("y").input(path).output(tmp_path, acodec=audio_codec, vcodec="libx264", f=ext)
 
             @ffmpeg.on("progress")
             def on_progress(progress: Progress):