Commit 63f13c3

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-09-11 08:06:29
fix(video): convert CTTS invalid video files
1 parent 3e4c3ef
Changed files (3)
src/messages/preprocess.py
@@ -7,7 +7,7 @@ from pyrogram.types import InputMediaAudio, InputMediaDocument, InputMediaPhoto,
 
 from config import CAPTION_LENGTH
 from messages.utils import count_without_entities, smart_split
-from multimedia import fix_video_rotation, generate_cover, is_valid_video_or_audio, parse_media_info, split_large_video, split_long_img, validate_img
+from multimedia import fix_ctts_invalid, fix_video_rotation, generate_cover, is_valid_video_or_audio, parse_media_info, split_large_video, split_long_img, validate_img
 
 
 async def preprocess_media(media: list[dict]) -> list[dict]:
@@ -71,6 +71,7 @@ async def preprocess_media(media: list[dict]) -> list[dict]:
         thumb = data.get("thumb")  # thumb is provided
         if video_path := data.get("video"):
             video_path = await fix_video_rotation(video_path)
+            video_path = await fix_ctts_invalid(video_path)
             if not await is_valid_video_or_audio(video_path):
                 logger.warning(f"Video is invalid: {video_path}")
                 continue
src/ytdlp/main.py
@@ -304,7 +304,7 @@ async def send_media(
 
     # split large videos into multiple parts (less than 2GB)
     if true(ytdlp_send_video) and video_path.is_file():
-        video_path = await convert_to_h264(video_path, re_encoding=True, max_file_size=YTDLP_RE_ENCODING_MAX_FILE_BYTES, skip_h264=True)
+        video_path = await convert_to_h264(video_path, allow_re_encoding=True, max_file_size=YTDLP_RE_ENCODING_MAX_FILE_BYTES, skip_h264=True)
         if video_path.stat().st_size > MAX_FILE_BYTES:
             await modify_progress(text=f"🎬视频大小超过Telegram限制({MAX_FILE_BYTES / 1024 / 1024:.0f}MB), 正在切分...", **kwargs)
         videos = await preprocess_media([{"video": video_path, "thumb": thumb}])
src/multimedia.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import asyncio
 import contextlib
 import json
 import math
@@ -21,15 +22,17 @@ async def parse_media_info(path: str | Path | None) -> dict:
         return {}
     path = Path(path).expanduser().resolve()
     logger.trace(f"Parsing media info: {path.name} [{readable_size(path=path)}]")
-    ffprobe = FFmpegAsync(executable="ffprobe").input(path.as_posix(), print_format="json", show_streams=None)
+    # ffprobe = FFmpegAsync(executable="ffprobe").input(path.as_posix(), print_format="json", show_streams=None)
     info = {}
     try:
-        metadata = json.loads(await ffprobe.execute())
+        # metadata = json.loads(await ffprobe.execute())
+        cmd = ["ffprobe", "-show_streams", "-print_format", "json", path.as_posix()]
+        process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
+        stdout_data, stderr_data = await process.communicate()
+        metadata = json.loads(stdout_data)
         streams = metadata.get("streams", [])
         audio_stream = next((x for x in streams if x.get("codec_name") and x.get("codec_type", "") == "audio"), {})
         video_stream = next((x for x in streams if x.get("codec_name") and x.get("codec_type", "") == "video"), {})
-        audio_codec = audio_stream.get("codec_name", "")
-        video_codec = video_stream.get("codec_name", "")
         durations = [x.get("duration", 0) for x in streams]  # all channels duration (some file embed the duration in subtitle stream)
         duration = max(map(float, durations))
         width = video_stream.get("width", "0")
@@ -41,10 +44,11 @@ async def parse_media_info(path: str | Path | None) -> dict:
             "duration": math.ceil(float(duration)),
             "width": round(float(width)),
             "height": round(float(height)),
-            "audio_codec": audio_codec,
-            "video_codec": video_codec,
+            "audio_codec": audio_stream.get("codec_name", ""),
+            "video_codec": video_stream.get("codec_name", ""),
             "rotation": round(side_data[0].get("rotation", 0)) if side_data else 0,
             "filesize": readable_size(path=path),
+            "ctts_invalid": "ctts invalid" in stderr_data.decode(errors="ignore").lower(),
         }
     except Exception as e:
         logger.error(f"Failed to parse media file info: {e}")
@@ -134,7 +138,8 @@ async def split_large_video(path: str | Path | None, *, delete: bool = True) ->
 async def convert_to_h264(
     path: str | Path | None,
     *,
-    re_encoding: bool = False,
+    allow_re_encoding: bool = False,
+    force_re_encoding: bool = False,
     max_file_size: int = 0,
     skip_h264: bool = False,
     audio_codec: str = "aac",
@@ -145,7 +150,8 @@ async def convert_to_h264(
 
     Args:
         path (str | Path | None): video file path
-        re_encoding (bool, optional): re-encode video. Defaults to False.
+        allow_re_encoding (bool, optional): re-encode video. Defaults to False.
+        force_re_encoding (bool, optional): force re-encode video. Defaults to False.
         max_file_size (int, optional): limit the max file size for re-encoding. Defaults to 0 (no limit).
         skip_h264 (bool, optional): skip conversion if video is already H264. Defaults to False.
         audio_codec (str, optional): audio codec used in re-encoding. Defaults to "aac".
@@ -168,13 +174,13 @@ async def convert_to_h264(
             logger.debug(f"Video is already H264, skip conversion: {path.name}")
             return path
         logger.debug("Video is already H264, skip re-encoding")
-        re_encoding = False
+        allow_re_encoding = False
     if max_file_size > 0 and path.stat().st_size > max_file_size:
         logger.warning(f"Video file size is too large: {path.stat().st_size}, skip re-encoding")
-        re_encoding = False
+        allow_re_encoding = False
 
     try:
-        if not re_encoding:
+        if not allow_re_encoding and not force_re_encoding:
             logger.debug(f"Convert video to H264 (copy): {path.name} -> {tmp_path.name}")
             ffmpeg = FFmpegAsync().option("y").input(path).output(tmp_path, codec="copy", movflags="+faststart", f=ext)
             await ffmpeg.execute()
@@ -470,10 +476,25 @@ async def fix_video_rotation(path: str | Path | None) -> Path:
         return path
     if probe_info.get("rotation") in [-90, 90]:
         logger.warning(f"Fixing video rotation from {probe_info['height']}x{probe_info['width']}")
-        path = await convert_to_h264(path, re_encoding=True)
+        path = await convert_to_h264(path, allow_re_encoding=True)
     return path
 
 
+async def fix_ctts_invalid(path: str | Path | None) -> Path:
+    """Convert CTTS invalid video to H264.
+
+    CTTS invalid videos can't be played on Telegram iOS client.
+    """
+    if path is None or not Path(path).expanduser().resolve().is_file():
+        return Path("")
+    path = Path(path).expanduser().resolve()
+    probe_info = await parse_media_info(path)
+    if not probe_info or not probe_info.get("ctts_invalid", False):
+        return path
+    logger.warning(f"Converting CTTS invalid video: {path.name}")
+    return await convert_to_h264(path, force_re_encoding=True)
+
+
 if __name__ == "__main__":
     # print(convert_to_h264("~/tests/test.mov"))
     # is_valid_video_or_audio("~/tests/test.jpg")