chore(PyAV): cleaning up PyAV utils and encoding parameters checks to stick to the minimun required tooling.

This commit is contained in:
CarolinePascal
2026-04-30 14:23:48 +02:00
parent cc9a2e5c99
commit 51b9038458
3 changed files with 150 additions and 173 deletions
+69 -129
View File
@@ -33,21 +33,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
FFMPEG_NUMERIC_OPTION_TYPES = ("INT", "INT64", "UINT64", "FLOAT", "DOUBLE")
# Abstract tuning fields of VideoEncoderConfig
TUNING_FIELDS: tuple[str, ...] = ("g", "crf", "preset", "fast_decode")
# Codec-specific FFmpeg private option whose value is controlled by the
# abstract ``crf`` tuning field.
CRF_OPTION_BY_CODEC: dict[str, str] = {
"libsvtav1": "crf",
"h264": "crf",
"hevc": "crf",
"h264_nvenc": "qp",
"hevc_nvenc": "qp",
"h264_vaapi": "qp",
"h264_qsv": "global_quality",
}
FFMPEG_INTEGER_OPTION_TYPES = ("INT", "INT64", "UINT64")
@functools.cache
@@ -95,85 +81,67 @@ def detect_available_encoders_pyav(encoders: list[str] | str) -> list[str]:
return available
def _is_field_supported(field_name: str, vcodec: str, options: dict[str, av.option.Option]) -> bool:
"""Whether tuning option *field_name* is meaningful for *vcodec*."""
# GOP is a stream-level option (AVStream.gop_size) not stored in private options.
# Every video codec accepts it.
if field_name == "g":
return True
if field_name == "crf":
# Semantic "crf" maps to the codec's private option (see
# CRF_OPTION_BY_CODEC), or to stream-level q:v for VideoToolbox.
opt_name = CRF_OPTION_BY_CODEC.get(vcodec)
return (opt_name is not None and opt_name in options) or vcodec in {
"h264_videotoolbox",
"hevc_videotoolbox",
}
if field_name == "fast_decode":
# libsvtav1: svtav1-params:fast-decode=N — h264/hevc: tune=fastdecode.
return "svtav1-params" in options or "tune" in options
# preset and any future private-option-backed field: direct membership test.
return field_name in options
def _check_numeric_range(label: str, num: float, opt: av.option.Option, vcodec: str) -> None:
"""Raise if *num* lies outside *opt*'s numeric range (no-op if range is degenerate)."""
lo, hi = float(opt.min), float(opt.max)
if lo < hi and not (lo <= num <= hi):
raise ValueError(f"{label}={num} is out of range for codec {vcodec!r}; must be in [{lo}, {hi}]")
def _validate_option_value(vcodec: str, field_name: str, value: Any, opt: av.option.Option) -> None:
"""Range-check numeric *value* and choice-check string *value* against *opt*.
Type mismatches fall through to FFmpeg's own validation at encode time.
"""
def _check_option_value(vcodec: str, label: str, value: Any, opt: av.option.Option) -> None:
"""Range-check numeric *value* and choice-check string *value* against *opt*."""
type_name = opt.type.name
if type_name in FFMPEG_NUMERIC_OPTION_TYPES:
if isinstance(value, bool) or not isinstance(value, (int, float)):
return
_check_numeric_range(field_name, float(value), opt, vcodec)
elif type_name == "STRING":
if not isinstance(value, str):
return
choices = [c.name for c in (opt.choices or [])]
if choices and value not in choices:
if isinstance(value, bool):
raise ValueError(
f"{field_name}={value!r} is not a supported choice for codec "
f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option."
)
elif isinstance(value, str):
try:
num_val = float(value)
except ValueError as e:
raise ValueError(
f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option."
) from e
elif isinstance(value, (float, int)):
num_val = value
else:
raise ValueError(
f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option."
)
# Check integer type compatibility
if type_name in FFMPEG_INTEGER_OPTION_TYPES and not num_val.is_integer():
raise ValueError(
f"{label}={num_val!r} must be an integer for codec {vcodec!r} "
f"(FFmpeg option {opt.name!r} is {type_name}); float values are not allowed."
)
# Check numeric range compatibility
lo, hi = float(opt.min), float(opt.max)
if lo < hi and not (lo <= num_val <= hi):
raise ValueError(f"{label}={num_val} is out of range for codec {vcodec!r}; must be in [{lo}, {hi}]")
elif type_name == "STRING":
if isinstance(value, bool):
raise ValueError(
f"{label}={value!r} is not a valid string value for codec {vcodec!r}."
)
if isinstance(value, str):
str_val = value
elif isinstance(value, (int, float)):
str_val = str(value)
else:
raise ValueError(
f"{label}={value!r} has unsupported type for STRING option on codec {vcodec!r}"
)
# Check string choice compatibility
choices = [c.name for c in (opt.choices or [])]
if choices and str_val not in choices:
raise ValueError(
f"{label}={str_val!r} is not a supported choice for codec "
f"{vcodec!r}; valid choices: {choices}"
)
else:
return
def _validate_extra_option(vcodec: str, key: str, value: Any, opt: av.option.Option) -> None:
"""Validate an ``extra_options`` entry: enforce numeric range/type only.
Non-numeric options are passed through (FFmpeg accepts many ad-hoc strings).
"""
if opt.type.name not in FFMPEG_NUMERIC_OPTION_TYPES:
return
label = f"extra_options[{key!r}]"
not_numeric = ValueError(
f"{label}={value!r} is not numeric; codec {vcodec!r} expects a number for this option."
)
if isinstance(value, bool):
raise not_numeric
if isinstance(value, (int, float)):
num = float(value)
elif isinstance(value, str):
try:
num = float(value)
except ValueError as e:
raise not_numeric from e
else:
raise not_numeric
_check_numeric_range(label, num, opt, vcodec)
def _check_pixel_format(vcodec: str, pix_fmt: str, formats: tuple[str, ...]) -> None:
def _check_pixel_format(vcodec: str, pix_fmt: str) -> None:
formats = _get_codec_video_formats(vcodec)
if formats and pix_fmt not in formats:
raise ValueError(
f"pix_fmt={pix_fmt!r} is not supported by codec {vcodec!r}; "
@@ -181,57 +149,30 @@ def _check_pixel_format(vcodec: str, pix_fmt: str, formats: tuple[str, ...]) ->
)
def _check_tuning_fields(
config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option]
def _check_codec_options(
vcodec: str, codec_options: dict[str, Any], config: VideoEncoderConfig
) -> None:
supported_fields = [f for f in TUNING_FIELDS if _is_field_supported(f, vcodec, options)]
for field_name in TUNING_FIELDS:
value = getattr(config, field_name)
if not value:
continue
if field_name not in supported_fields:
raise ValueError(
f"{field_name}={value!r} is not supported by codec {vcodec!r}; "
f"supported fields for this codec: {supported_fields}"
)
# ``g`` is stream-level (AVCodecContext.gop_size), not in ``options``.
# Enforce a positive integer value.
if field_name == "g":
"""Validate merged encoder options (typed) against the codec's published AVOptions."""
supported_options = _get_codec_options_by_name(vcodec)
for key, value in codec_options.items():
# GOP size is not a codec-specific option, it has to be validated separately.
if key == "g":
if isinstance(value, bool) or not isinstance(value, int) or value < 1:
raise ValueError(f"g={value!r} must be a positive integer for codec {vcodec!r}")
continue
# Value shape is only cross-checkable when the field maps directly
# to a private option: ``preset`` is literally ``"preset"``;
# ``crf`` maps per-codec. ``fast_decode`` (composite) falls through
# to FFmpeg at encode time.
if field_name == "preset":
opt = options.get("preset")
elif field_name == "crf":
opt = options.get(CRF_OPTION_BY_CODEC.get(vcodec, ""))
else:
if key not in supported_options:
continue
if opt is not None:
_validate_option_value(vcodec, field_name, value, opt)
def _check_extra_options(
config: VideoEncoderConfig, vcodec: str, options: dict[str, av.option.Option]
) -> None:
# Torchcodec-style: only validate keys the codec exposes as AVOptions,
# and only enforce numeric range / numeric-type. Everything else is
# passed through (muxer options, ``x264-params``-style strings, etc.).
for key, value in config.extra_options.items():
opt = options.get(key)
if opt is None:
continue
_validate_extra_option(vcodec, key, value, opt)
opt = supported_options[key]
label = f"extra_options[{key!r}]" if key in config.extra_options else key
_check_option_value(vcodec, label, value, opt)
def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
"""Verify *config* is compatible with the bundled FFmpeg build.
Checks pixel format, tuning-field availability, value range/choices for
fields that map to a private option, and numeric ``extra_options``.
Checks pixel format, abstract tuning-field compatibility, and each merged
encoder option from :meth:`~lerobot.datasets.video_utils.VideoEncoderConfig.get_codec_options`
against PyAV (including numeric ``extra_options`` present in that dict).
No-op when ``config.vcodec`` isn't in the local FFmpeg build.
Raises:
@@ -245,6 +186,5 @@ def check_video_encoder_config_pyav(config: VideoEncoderConfig) -> None:
vcodec,
)
return
_check_pixel_format(vcodec, config.pix_fmt, _get_codec_video_formats(vcodec))
_check_tuning_fields(config, vcodec, options)
_check_extra_options(config, vcodec, options)
_check_pixel_format(config.vcodec, config.pix_fmt)
_check_codec_options(config.vcodec, config.get_codec_options(), config)
+26 -17
View File
@@ -74,9 +74,11 @@ class VideoEncoderConfig:
codec (``crf`` for software, ``qp`` for NVENC/VAAPI,
``q:v`` for VideoToolbox, ``global_quality`` for QSV).
preset: Speed/quality preset. Accepted type is per-codec.
fast_decode: Fast-decode tuning. For libsvtav1 this is a level
(0-4); for h264/hevc any non-zero value enables ``tune=fastdecode``.
video_backend: Python library driving FFmpeg for encoding. Only ``"pyav"`` is currently supported.
fast_decode: Fast-decode tuning. For ``libsvtav1`` this is a level (0-2)
embedded in ``svtav1-params``. For ``h264`` and ``hevc`` non-zero values
set ``tune=fastdecode``. Ignored for other codecs.
video_backend: Python library driving FFmpeg for encoding. Only ``"pyav"``
is currently supported.
extra_options: Free-form dictionary of additional FFmpeg options
(e.g. ``{"tune": "film", "profile:v": "high", "bf": 2}``).
"""
@@ -138,42 +140,46 @@ class VideoEncoderConfig:
return
raise ValueError(f"Unsupported video codec: {self.vcodec} with video backend {self.video_backend}")
def get_codec_options(self, encoder_threads: int | None = None) -> dict[str, str]:
def get_codec_options(self, encoder_threads: int | None = None, as_strings: bool = False) -> dict[str, str]:
"""Translate the tuning fields to codec-specific FFmpeg options.
``VideoEncoderConfig.extra_options`` are merged last but never override a structured field.
Args:
encoder_threads: Number of encoder threads set globally for all VideoEncoderConfigs. For libsvtav1, this is mapped to ``lp`` via ``svtav1-params``. For h264/hevc, this is mapped to ``threads``. Hardware encoders ignore this.
encoder_threads: Number of encoder threads set globally for all VideoEncoderConfigs.
For libsvtav1, this is mapped to ``lp`` via ``svtav1-params``.
For h264/hevc, this is mapped to ``threads``.
Hardware encoders ignore this parameter.
as_strings: If ``True``, casts values to strings.
"""
opts: dict[str, str] = {}
opts: dict[str, Any] = {}
def set_if(key: str, value: Any) -> None:
if value is not None:
opts[key] = str(value)
opts[key] = value if not as_strings else str(value)
# GOP size is not a codec-specific option, so it is always set.
set_if("g", self.g)
if self.vcodec == "libsvtav1":
set_if("g", self.g)
set_if("crf", self.crf)
set_if("preset", self.preset)
svtav1_parts: list[str] = []
if self.fast_decode:
svtav1_parts.append(f"fast-decode={self.fast_decode}")
if self.fast_decode is not None:
svtav1_parts.append(f"fast-decode={max(0, min(2, self.fast_decode))}")
if encoder_threads is not None:
svtav1_parts.append(f"lp={encoder_threads}")
if svtav1_parts:
opts["svtav1-params"] = ":".join(svtav1_parts)
elif self.vcodec in ("h264", "hevc"):
set_if("g", self.g)
set_if("crf", self.crf)
set_if("preset", self.preset)
if self.fast_decode:
opts["tune"] = "fastdecode"
set_if("threads", encoder_threads)
elif self.vcodec in ("h264_videotoolbox", "hevc_videotoolbox"):
set_if("g", self.g)
if self.crf is not None:
opts["q:v"] = str(max(1, min(100, 100 - self.crf * 2)))
opts["q:v"] = max(1, min(100, 100 - self.crf * 2))
elif self.vcodec in ("h264_nvenc", "hevc_nvenc"):
opts["rc"] = "constqp"
set_if("qp", self.crf)
@@ -184,12 +190,13 @@ class VideoEncoderConfig:
set_if("global_quality", self.crf)
set_if("preset", self.preset)
else:
set_if("g", self.g)
set_if("crf", self.crf)
set_if("preset", self.preset)
# Extra options are merged last but never override structured fields (values are kept as given).
for k, v in self.extra_options.items():
opts.setdefault(k, str(v))
if k not in opts:
set_if(k, v)
return opts
@@ -513,7 +520,7 @@ def encode_video_frames(
with Image.open(input_list[0]) as dummy_image:
width, height = dummy_image.size
video_options = camera_encoder_config.get_codec_options(encoder_threads)
video_options = camera_encoder_config.get_codec_options(encoder_threads, as_strings=True)
# Set logging level
if log_level is not None:
@@ -834,7 +841,9 @@ class StreamingVideoEncoder:
video_path = temp_video_dir / f"{video_key.replace('/', '_')}_streaming.mp4"
vcodec = self._camera_encoder_config.vcodec
codec_options = self._camera_encoder_config.get_codec_options(self._encoder_threads)
codec_options = self._camera_encoder_config.get_codec_options(
self._encoder_threads, as_strings=True
)
encoder_thread = _CameraEncoderThread(
video_path=video_path,
fps=self.fps,
+55 -27
View File
@@ -61,29 +61,29 @@ class TestCodecOptions:
def test_libsvtav1_defaults(self):
cfg = VideoEncoderConfig()
opts = cfg.get_codec_options()
assert opts["g"] == "2"
assert opts["crf"] == "30"
assert opts["preset"] == "12"
assert opts["g"] == 2
assert opts["crf"] == 30
assert opts["preset"] == 12
@require_libsvtav1
def test_libsvtav1_custom_preset(self):
cfg = VideoEncoderConfig(preset=8)
assert cfg.get_codec_options()["preset"] == "8"
assert cfg.get_codec_options()["preset"] == 8
@require_h264
def test_h264_options(self):
cfg = VideoEncoderConfig(vcodec="h264", g=10, crf=23, preset=None)
opts = cfg.get_codec_options()
assert opts["g"] == "10"
assert opts["crf"] == "23"
assert opts["g"] == 10
assert opts["crf"] == 23
assert "preset" not in opts
@require_videotoolbox
def test_videotoolbox_options(self):
cfg = VideoEncoderConfig(vcodec="h264_videotoolbox", g=2, crf=30, preset=None)
opts = cfg.get_codec_options()
assert opts["g"] == "2"
assert opts["q:v"] == "40"
assert opts["g"] == 2
assert opts["q:v"] == 40
assert "crf" not in opts
@_require_encoder("h264_nvenc")
@@ -91,19 +91,19 @@ class TestCodecOptions:
cfg = VideoEncoderConfig(vcodec="h264_nvenc", g=2, crf=25, preset=None)
opts = cfg.get_codec_options()
assert opts["rc"] == "constqp"
assert opts["qp"] == "25"
assert opts["qp"] == 25
assert "crf" not in opts
assert "g" not in opts
@_require_encoder("h264_vaapi")
def test_vaapi_options(self):
cfg = VideoEncoderConfig(vcodec="h264_vaapi", crf=28, preset=None)
assert cfg.get_codec_options()["qp"] == "28"
assert cfg.get_codec_options()["qp"] == 28
@_require_encoder("h264_qsv")
def test_qsv_options(self):
cfg = VideoEncoderConfig(vcodec="h264_qsv", crf=25, preset=None)
assert cfg.get_codec_options()["global_quality"] == "25"
assert cfg.get_codec_options()["global_quality"] == 25
@require_h264
def test_no_g_no_crf(self):
@@ -121,7 +121,7 @@ class TestCodecOptions:
@require_h264
def test_encoder_threads_h264(self):
cfg = VideoEncoderConfig(vcodec="h264", preset=None)
assert cfg.get_codec_options(encoder_threads=2)["threads"] == "2"
assert cfg.get_codec_options(encoder_threads=2)["threads"] == 2
@require_libsvtav1
def test_fast_decode_libsvtav1(self):
@@ -129,6 +129,14 @@ class TestCodecOptions:
opts = cfg.get_codec_options()
assert "fast-decode=1" in opts.get("svtav1-params", "")
@require_libsvtav1
def test_libsvtav1_fast_decode_clamped_to_svt_range(self):
"""Out-of-range fast_decode is clamped to [0, 2] in svtav1-params (SVT-AV1 FastDecode)."""
cfg = VideoEncoderConfig(fast_decode=100)
assert "fast-decode=2" in cfg.get_codec_options().get("svtav1-params", "")
cfg_neg = VideoEncoderConfig(fast_decode=-5)
assert "fast-decode=0" in cfg_neg.get_codec_options().get("svtav1-params", "")
@require_h264
def test_fast_decode_h264(self):
cfg = VideoEncoderConfig(vcodec="h264", fast_decode=1, preset=None)
@@ -156,10 +164,10 @@ class TestCodecOptions:
assert cfg.get_codec_options()["preset"] == "slow"
@require_videotoolbox
def test_preset_on_videotoolbox_raises(self):
def test_preset_on_videotoolbox_not_set(self):
"""videotoolbox has no preset option at all."""
with pytest.raises(ValueError, match="preset"):
VideoEncoderConfig(vcodec="h264_videotoolbox", preset="slow")
cfg = VideoEncoderConfig(vcodec="h264_videotoolbox", preset="slow")
assert "preset" not in cfg.get_codec_options()
@require_libsvtav1
def test_libsvtav1_preset_out_of_range_raises(self):
@@ -175,11 +183,36 @@ class TestCodecOptions:
with pytest.raises(ValueError, match="crf.*out of range"):
VideoEncoderConfig(vcodec="libsvtav1", crf=64)
@require_libsvtav1
def test_libsvtav1_crf_rejects_python_float(self):
"""libsvtav1 exposes ``crf`` as an INT AVOption; Python float must not pass validation."""
with pytest.raises(ValueError, match="float values are not allowed"):
VideoEncoderConfig(vcodec="libsvtav1", crf=2.5)
@require_libsvtav1
def test_libsvtav1_extra_crf_rejects_fractional_string(self):
"""INT options reject fractional values even when supplied only via ``extra_options``."""
with pytest.raises(ValueError, match="float values are not allowed"):
VideoEncoderConfig(
vcodec="libsvtav1",
crf=None,
extra_options={"crf": "2.5"},
)
@require_libsvtav1
def test_libsvtav1_extra_crf_rejects_float(self):
with pytest.raises(ValueError, match="float values are not allowed"):
VideoEncoderConfig(
vcodec="libsvtav1",
crf=None,
extra_options={"crf": 2.5},
)
@require_h264
def test_h264_crf_accepts_float_and_int(self):
"""x264 exposes crf as a FLOAT option, so both int and float are accepted."""
assert VideoEncoderConfig(vcodec="h264", crf=23).get_codec_options()["crf"] == "23"
assert VideoEncoderConfig(vcodec="h264", crf=23.5).get_codec_options()["crf"] == "23.5"
assert VideoEncoderConfig(vcodec="h264", crf=23).get_codec_options()["crf"] == 23
assert VideoEncoderConfig(vcodec="h264", crf=23.5).get_codec_options()["crf"] == 23.5
@require_libsvtav1
def test_validate_is_rerunnable(self):
@@ -189,12 +222,6 @@ class TestCodecOptions:
with pytest.raises(ValueError, match="out of range"):
cfg.validate()
@require_videotoolbox
def test_fast_decode_on_videotoolbox_raises(self):
"""videotoolbox has no `tune` option; fast_decode must not be silently dropped."""
with pytest.raises(ValueError, match="fast_decode"):
VideoEncoderConfig(vcodec="h264_videotoolbox", preset=None, fast_decode=1)
class TestExtraOptions:
@require_libsvtav1
@@ -249,17 +276,18 @@ class TestExtraOptions:
@require_libsvtav1
def test_merged_into_codec_options_and_stringified(self):
"""extra_options are merged into get_codec_options() as strings."""
"""Typed merge by default; ``as_strings=True`` matches FFmpeg option dict."""
cfg = VideoEncoderConfig(extra_options={"qp": 20})
opts = cfg.get_codec_options()
assert opts["qp"] == "20"
assert isinstance(opts["qp"], str)
assert opts["qp"] == 20
assert isinstance(opts["qp"], int)
assert cfg.get_codec_options(as_strings=True)["qp"] == "20"
@require_libsvtav1
def test_structured_fields_win_on_collision(self):
"""A colliding extra_options key is discarded; the structured field wins."""
cfg = VideoEncoderConfig(crf=30, extra_options={"crf": 18})
assert cfg.get_codec_options()["crf"] == "30"
assert cfg.get_codec_options()["crf"] == 30
class TestEncoderDetection: