diff --git a/tests/datasets/test_streaming_video_encoder.py b/tests/datasets/test_streaming_video_encoder.py index 8b7a1540f..e49c477db 100644 --- a/tests/datasets/test_streaming_video_encoder.py +++ b/tests/datasets/test_streaming_video_encoder.py @@ -14,11 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for streaming video encoding and hardware-accelerated encoding.""" +"""Tests for streaming video encoding.""" import queue import threading -from unittest.mock import patch import numpy as np import pytest @@ -27,112 +26,20 @@ pytest.importorskip("av", reason="av is required (install lerobot[dataset])") import av # noqa: E402 +from lerobot.datasets.pyav_utils import get_codec from lerobot.datasets.video_utils import ( - VALID_VIDEO_CODECS, StreamingVideoEncoder, + VideoEncoderConfig, _CameraEncoderThread, - _get_codec_options, - detect_available_hw_encoders, - resolve_vcodec, ) from lerobot.utils.constants import OBS_IMAGES -# ─── _get_codec_options tests ─── - - -class TestGetCodecOptions: - def test_libsvtav1_defaults(self): - opts = _get_codec_options("libsvtav1") - assert opts["g"] == "2" - assert opts["crf"] == "30" - assert opts["preset"] == "12" - - def test_libsvtav1_custom_preset(self): - opts = _get_codec_options("libsvtav1", preset=8) - assert opts["preset"] == "8" - - def test_h264_options(self): - opts = _get_codec_options("h264", g=10, crf=23) - assert opts["g"] == "10" - assert opts["crf"] == "23" - assert "preset" not in opts - - def test_videotoolbox_options(self): - opts = _get_codec_options("h264_videotoolbox", g=2, crf=30) - assert opts["g"] == "2" - # CRF 30 maps to quality = max(1, min(100, 100 - 30*2)) = 40 - assert opts["q:v"] == "40" - assert "crf" not in opts - - def test_nvenc_options(self): - opts = _get_codec_options("h264_nvenc", g=2, crf=25) - assert opts["rc"] == "constqp" - assert opts["qp"] == "25" - assert "crf" not in opts - # NVENC doesn't support g - assert "g" not in opts - - def test_vaapi_options(self): - opts = _get_codec_options("h264_vaapi", crf=28) - assert opts["qp"] == "28" - - def test_qsv_options(self): - opts = _get_codec_options("h264_qsv", crf=25) - assert opts["global_quality"] == "25" - - def test_no_g_no_crf(self): - opts = _get_codec_options("h264", g=None, crf=None) - assert "g" not in opts - assert "crf" not in opts - - -# ─── HW encoder detection tests ─── - - -class TestHWEncoderDetection: - def test_detect_available_hw_encoders_returns_list(self): - result = detect_available_hw_encoders() - assert isinstance(result, list) - - def test_detect_available_hw_encoders_only_valid(self): - from lerobot.datasets.video_utils import HW_ENCODERS - - result = detect_available_hw_encoders() - for encoder in result: - assert encoder in HW_ENCODERS - - def test_resolve_vcodec_passthrough(self): - assert resolve_vcodec("libsvtav1") == "libsvtav1" - assert resolve_vcodec("h264") == "h264" - - def test_resolve_vcodec_auto_fallback(self): - """When no HW encoders are available, auto should fall back to libsvtav1.""" - with patch("lerobot.datasets.video_utils.detect_available_hw_encoders", return_value=[]): - assert resolve_vcodec("auto") == "libsvtav1" - - def test_resolve_vcodec_auto_picks_hw(self): - """When a HW encoder is available, auto should pick it.""" - with patch( - "lerobot.datasets.video_utils.detect_available_hw_encoders", - return_value=["h264_videotoolbox"], - ): - assert resolve_vcodec("auto") == "h264_videotoolbox" - - def test_resolve_vcodec_auto_returns_valid(self): - """Test that resolve_vcodec('auto') returns a known valid codec.""" - result = resolve_vcodec("auto") - assert result in VALID_VIDEO_CODECS - - def test_hw_encoder_names_accepted_in_validation(self): - """Test that HW encoder names pass validation in VALID_VIDEO_CODECS.""" - assert "auto" in VALID_VIDEO_CODECS - assert "h264_videotoolbox" in VALID_VIDEO_CODECS - assert "h264_nvenc" in VALID_VIDEO_CODECS - - def test_resolve_vcodec_invalid_raises(self): - """Test that resolve_vcodec raises ValueError for invalid codecs.""" - with pytest.raises(ValueError, match="Invalid vcodec"): - resolve_vcodec("not_a_real_codec") +# Cross-codec validation tests only fire when the target codec is present +# in the local FFmpeg build; on other platforms validate() is a no-op. +_has_videotoolbox = get_codec("h264_videotoolbox") is not None +_videotoolbox_only = pytest.mark.skipif( + not _has_videotoolbox, reason="h264_videotoolbox not in local FFmpeg build" +) # ─── _CameraEncoderThread tests ─── @@ -150,14 +57,13 @@ class TestCameraEncoderThread: result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() + enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, - vcodec="libsvtav1", - pix_fmt="yuv420p", - g=2, - crf=30, - preset=13, + vcodec=enc_cfg.vcodec, + pix_fmt=enc_cfg.pix_fmt, + codec_options=enc_cfg.get_codec_options(), frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, @@ -202,14 +108,13 @@ class TestCameraEncoderThread: result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() + enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, - vcodec="libsvtav1", - pix_fmt="yuv420p", - g=2, - crf=30, - preset=13, + vcodec=enc_cfg.vcodec, + pix_fmt=enc_cfg.pix_fmt, + codec_options=enc_cfg.get_codec_options(), frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, @@ -237,14 +142,13 @@ class TestCameraEncoderThread: result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() + enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, - vcodec="libsvtav1", - pix_fmt="yuv420p", - g=2, - crf=30, - preset=13, + vcodec=enc_cfg.vcodec, + pix_fmt=enc_cfg.pix_fmt, + codec_options=enc_cfg.get_codec_options(), frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, @@ -266,11 +170,20 @@ class TestCameraEncoderThread: class TestStreamingVideoEncoder: + def _make_encoder_config(self, **kwargs): + """Helper to build a VideoEncoderConfig.""" + return VideoEncoderConfig(**kwargs) + def test_single_camera_episode(self, tmp_path): """Test encoding a single camera episode.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) - video_keys = [f"{OBS_IMAGES}.laptop"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 + ), + ) + encoder.start_episode(video_keys, tmp_path) num_frames = 20 @@ -295,9 +208,13 @@ class TestStreamingVideoEncoder: def test_multi_camera_episode(self, tmp_path): """Test encoding multiple cameras simultaneously.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) - video_keys = [f"{OBS_IMAGES}.laptop", f"{OBS_IMAGES}.phone"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 + ), + ) encoder.start_episode(video_keys, tmp_path) num_frames = 15 @@ -319,8 +236,13 @@ class TestStreamingVideoEncoder: def test_sequential_episodes(self, tmp_path): """Test that multiple sequential episodes work correctly.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.cam"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 + ), + ) for ep in range(3): encoder.start_episode(video_keys, tmp_path) @@ -342,8 +264,13 @@ class TestStreamingVideoEncoder: def test_cancel_episode(self, tmp_path): """Test that canceling an episode cleans up properly.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.cam"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 + ), + ) encoder.start_episode(video_keys, tmp_path) @@ -365,28 +292,33 @@ class TestStreamingVideoEncoder: def test_feed_without_start_raises(self, tmp_path): """Test that feeding frames without starting an episode raises.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") + encoder = StreamingVideoEncoder(fps=30) with pytest.raises(RuntimeError, match="No active episode"): encoder.feed_frame("cam", np.zeros((64, 96, 3), dtype=np.uint8)) encoder.close() def test_finish_without_start_raises(self, tmp_path): """Test that finishing without starting raises.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") + encoder = StreamingVideoEncoder(fps=30) with pytest.raises(RuntimeError, match="No active episode"): encoder.finish_episode() encoder.close() def test_close_is_idempotent(self, tmp_path): """Test that close() can be called multiple times safely.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") + encoder = StreamingVideoEncoder(fps=30) encoder.close() encoder.close() # Should not raise def test_video_duration_matches_frame_count(self, tmp_path): """Test that encoded video duration matches num_frames / fps.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) video_keys = [f"{OBS_IMAGES}.cam"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 + ), + ) encoder.start_episode(video_keys, tmp_path) num_frames = 90 # 3 seconds at 30fps @@ -417,9 +349,13 @@ class TestStreamingVideoEncoder: def test_multi_camera_start_episode_called_once(self, tmp_path): """Test that with multiple cameras, no frames are lost due to double start_episode.""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) - video_keys = [f"{OBS_IMAGES}.cam1", f"{OBS_IMAGES}.cam2"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 + ), + ) encoder.start_episode(video_keys, tmp_path) num_frames = 30 @@ -446,17 +382,24 @@ class TestStreamingVideoEncoder: def test_encoder_threads_passed_to_thread(self, tmp_path): """Test that encoder_threads is stored and passed through to encoder threads.""" - encoder = StreamingVideoEncoder( - fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, encoder_threads=2 - ) - assert encoder.encoder_threads == 2 - video_keys = [f"{OBS_IMAGES}.cam"] + cfg = VideoEncoderConfig( + vcodec="libsvtav1", + pix_fmt="yuv420p", + g=2, + crf=30, + ) + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=cfg, + encoder_threads=2, + ) + assert encoder._encoder_threads == 2 encoder.start_episode(video_keys, tmp_path) - # Verify the thread received the encoder_threads value + # Verify codec options include thread tuning for libsvtav1 (lp=…) thread = encoder._threads[f"{OBS_IMAGES}.cam"] - assert thread.encoder_threads == 2 + assert "svtav1-params" in thread.codec_options or "threads" in thread.codec_options # Feed some frames and finish to ensure it works end-to-end num_frames = 10 @@ -478,16 +421,20 @@ class TestStreamingVideoEncoder: def test_encoder_threads_none_by_default(self, tmp_path): """Test that encoder_threads defaults to None (codec auto-detect).""" - encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") - assert encoder.encoder_threads is None + encoder = StreamingVideoEncoder(fps=30) + assert encoder._encoder_threads is None encoder.close() def test_graceful_frame_dropping(self, tmp_path): """Test that full queue drops frames instead of crashing.""" - encoder = StreamingVideoEncoder( - fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, queue_maxsize=1 - ) video_keys = [f"{OBS_IMAGES}.cam"] + encoder = StreamingVideoEncoder( + fps=30, + camera_encoder_config=self._make_encoder_config( + vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 + ), + queue_maxsize=1, + ) encoder.start_episode(video_keys, tmp_path) # Feed many frames quickly - with queue_maxsize=1, some will be dropped diff --git a/tests/datasets/test_video_encoding.py b/tests/datasets/test_video_encoding.py new file mode 100644 index 000000000..7826ab42b --- /dev/null +++ b/tests/datasets/test_video_encoding.py @@ -0,0 +1,538 @@ +#!/usr/bin/env python + +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for ``lerobot.datasets.video_utils`` encoding functions and ``VideoEncoderConfig`` config class.""" + +import json +from pathlib import Path + +import numpy as np +import pytest + +pytest.importorskip("av", reason="av is required (install lerobot[dataset])") + +import av # noqa: E402 + +from lerobot.datasets.image_writer import write_image +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.datasets.pyav_utils import detect_available_encoders_pyav, get_codec +from lerobot.datasets.utils import INFO_PATH +from lerobot.datasets.video_utils import ( + VALID_VIDEO_CODECS, + VideoEncoderConfig, + concatenate_video_files, + encode_video_frames, + get_video_info, +) + +# Per-codec skip markers — validation tests only fire when the codec is available +def _require_encoder(vcodec: str) -> pytest.MarkDecorator: + """Skip the test if ``vcodec`` is not available in the local FFmpeg build.""" + return pytest.mark.skipif( + get_codec(vcodec) is None, reason=f"{vcodec!r} not in local FFmpeg build" + ) + + +require_libsvtav1 = _require_encoder("libsvtav1") +require_h264 = _require_encoder("h264") +require_videotoolbox = _require_encoder("h264_videotoolbox") +require_nvenc = _require_encoder("h264_nvenc") +require_vaapi = _require_encoder("h264_vaapi") +require_qsv = _require_encoder("h264_qsv") + + +# ─── VideoEncoderConfig / codec options ────────────────────────────── + + +class TestCodecOptions: + @require_libsvtav1 + def test_libsvtav1_defaults(self): + cfg = VideoEncoderConfig() + opts = cfg.get_codec_options() + assert opts["g"] == "2" + assert opts["crf"] == "30" + assert opts["preset"] == "12" + + @require_libsvtav1 + def test_libsvtav1_custom_preset(self): + cfg = VideoEncoderConfig(preset=8) + assert cfg.get_codec_options()["preset"] == "8" + + @require_h264 + def test_h264_options(self): + cfg = VideoEncoderConfig(vcodec="h264", g=10, crf=23, preset=None) + opts = cfg.get_codec_options() + assert opts["g"] == "10" + assert opts["crf"] == "23" + assert "preset" not in opts + + @require_videotoolbox + def test_videotoolbox_options(self): + cfg = VideoEncoderConfig(vcodec="h264_videotoolbox", g=2, crf=30, preset=None) + opts = cfg.get_codec_options() + assert opts["g"] == "2" + assert opts["q:v"] == "40" + assert "crf" not in opts + + @_require_encoder("h264_nvenc") + def test_nvenc_options(self): + cfg = VideoEncoderConfig(vcodec="h264_nvenc", g=2, crf=25, preset=None) + opts = cfg.get_codec_options() + assert opts["rc"] == "constqp" + assert opts["qp"] == "25" + assert "crf" not in opts + assert "g" not in opts + + @_require_encoder("h264_vaapi") + def test_vaapi_options(self): + cfg = VideoEncoderConfig(vcodec="h264_vaapi", crf=28, preset=None) + assert cfg.get_codec_options()["qp"] == "28" + + @_require_encoder("h264_qsv") + def test_qsv_options(self): + cfg = VideoEncoderConfig(vcodec="h264_qsv", crf=25, preset=None) + assert cfg.get_codec_options()["global_quality"] == "25" + + @require_h264 + def test_no_g_no_crf(self): + cfg = VideoEncoderConfig(vcodec="h264", g=None, crf=None, preset=None) + opts = cfg.get_codec_options() + assert "g" not in opts + assert "crf" not in opts + + @require_libsvtav1 + def test_encoder_threads_libsvtav1(self): + cfg = VideoEncoderConfig(fast_decode=0) + opts = cfg.get_codec_options(encoder_threads=4) + assert "lp=4" in opts.get("svtav1-params", "") + + @require_h264 + def test_encoder_threads_h264(self): + cfg = VideoEncoderConfig(vcodec="h264", preset=None) + assert cfg.get_codec_options(encoder_threads=2)["threads"] == "2" + + @require_libsvtav1 + def test_fast_decode_libsvtav1(self): + cfg = VideoEncoderConfig(fast_decode=1) + opts = cfg.get_codec_options() + assert "fast-decode=1" in opts.get("svtav1-params", "") + + @require_h264 + def test_fast_decode_h264(self): + cfg = VideoEncoderConfig(vcodec="h264", fast_decode=1, preset=None) + assert cfg.get_codec_options()["tune"] == "fastdecode" + + @require_libsvtav1 + def test_pix_fmt_unsupported_raises(self): + """Passing an unsupported pix_fmt is a hard error.""" + with pytest.raises(ValueError, match="pix_fmt"): + VideoEncoderConfig(pix_fmt="yuv444p") # libsvtav1 only supports yuv420p variants + + @require_libsvtav1 + @require_h264 + def test_preset_default_behaviour(self): + """Empty constructor picks preset=12 (libsvtav1 path); other codecs stay None.""" + assert VideoEncoderConfig().preset == 12 + assert VideoEncoderConfig(vcodec="libsvtav1").preset == 12 + assert VideoEncoderConfig(vcodec="h264").preset is None + assert VideoEncoderConfig(vcodec="h264", preset=None).preset is None + + @require_h264 + def test_preset_string_on_h264(self): + """h264 accepts string presets and forwards them to FFmpeg.""" + cfg = VideoEncoderConfig(vcodec="h264", preset="slow") + assert cfg.get_codec_options()["preset"] == "slow" + + @require_videotoolbox + def test_preset_on_videotoolbox_raises(self): + """videotoolbox has no preset option at all.""" + with pytest.raises(ValueError, match="preset"): + VideoEncoderConfig(vcodec="h264_videotoolbox", preset="slow") + + @require_libsvtav1 + def test_libsvtav1_preset_out_of_range_raises(self): + """libsvtav1 preset must sit in [-2, 13] as exposed by PyAV.""" + with pytest.raises(ValueError, match="out of range"): + VideoEncoderConfig(vcodec="libsvtav1", preset=100) + with pytest.raises(ValueError, match="out of range"): + VideoEncoderConfig(vcodec="libsvtav1", preset=-3) + + @require_libsvtav1 + def test_libsvtav1_crf_out_of_range_raises(self): + """libsvtav1 crf must sit in [0, 63].""" + with pytest.raises(ValueError, match="crf.*out of range"): + VideoEncoderConfig(vcodec="libsvtav1", crf=64) + + @require_h264 + def test_h264_crf_accepts_float_and_int(self): + """x264 exposes crf as a FLOAT option, so both int and float are accepted.""" + assert VideoEncoderConfig(vcodec="h264", crf=23).get_codec_options()["crf"] == "23" + assert VideoEncoderConfig(vcodec="h264", crf=23.5).get_codec_options()["crf"] == "23.5" + + @require_libsvtav1 + def test_validate_is_rerunnable(self): + """After mutating a field, validate() re-checks and surfaces new issues.""" + cfg = VideoEncoderConfig(vcodec="libsvtav1") + cfg.preset = 100 # now out of range + with pytest.raises(ValueError, match="out of range"): + cfg.validate() + + @require_videotoolbox + def test_fast_decode_on_videotoolbox_raises(self): + """videotoolbox has no `tune` option; fast_decode must not be silently dropped.""" + with pytest.raises(ValueError, match="fast_decode"): + VideoEncoderConfig(vcodec="h264_videotoolbox", preset=None, fast_decode=1) + + +class TestExtraOptions: + @require_libsvtav1 + def test_default_is_empty_dict(self): + cfg = VideoEncoderConfig() + assert cfg.extra_options == {} + + @require_libsvtav1 + def test_unknown_key_passes_through(self): + """Keys not published as AVOptions are forwarded to FFmpeg.""" + cfg = VideoEncoderConfig(extra_options={"totally_made_up_option": "value"}) + assert cfg.extra_options == {"totally_made_up_option": "value"} + + @require_libsvtav1 + def test_numeric_value_in_range_ok(self): + """libsvtav1 exposes ``qp`` as INT in [0, 63].""" + cfg = VideoEncoderConfig(extra_options={"qp": 30}) + assert cfg.extra_options == {"qp": 30} + + @require_libsvtav1 + def test_numeric_out_of_range_raises(self): + with pytest.raises(ValueError, match=r"extra_options\['qp'\].*out of range"): + VideoEncoderConfig(extra_options={"qp": 999}) + + @require_libsvtav1 + def test_numeric_string_accepted_in_range(self): + """Numeric strings are accepted for numeric options (mirrors FFmpeg).""" + cfg = VideoEncoderConfig(extra_options={"qp": "18"}) + assert cfg.extra_options == {"qp": "18"} + + @require_libsvtav1 + def test_numeric_string_out_of_range_raises(self): + with pytest.raises(ValueError, match=r"extra_options\['qp'\].*out of range"): + VideoEncoderConfig(extra_options={"qp": "999"}) + + @require_libsvtav1 + def test_non_numeric_string_on_numeric_option_raises(self): + with pytest.raises(ValueError, match=r"extra_options\['qp'\].*not numeric"): + VideoEncoderConfig(extra_options={"qp": "medium"}) + + @require_libsvtav1 + def test_bool_on_numeric_option_raises(self): + """``bool`` is explicitly rejected for numeric options.""" + with pytest.raises(ValueError, match=r"extra_options\['qp'\].*not numeric"): + VideoEncoderConfig(extra_options={"qp": True}) + + @require_h264 + def test_string_option_passes_through_unchecked(self): + """String-typed AVOptions are NOT enum-checked (too many accept freeform).""" + cfg = VideoEncoderConfig(vcodec="h264", preset=None, extra_options={"tune": "some-future-tune"}) + assert cfg.extra_options == {"tune": "some-future-tune"} + + @require_libsvtav1 + def test_merged_into_codec_options_and_stringified(self): + """extra_options are merged into get_codec_options() as strings.""" + cfg = VideoEncoderConfig(extra_options={"qp": 20}) + opts = cfg.get_codec_options() + assert opts["qp"] == "20" + assert isinstance(opts["qp"], str) + + @require_libsvtav1 + def test_structured_fields_win_on_collision(self): + """A colliding extra_options key is discarded; the structured field wins.""" + cfg = VideoEncoderConfig(crf=30, extra_options={"crf": 18}) + assert cfg.get_codec_options()["crf"] == "30" + + +class TestEncoderDetection: + @require_h264 + def test_explicit_codec_kept_when_available(self): + cfg = VideoEncoderConfig(vcodec="h264") + assert cfg.vcodec == "h264" + + @require_videotoolbox + def test_auto_picks_videotoolbox_when_available(self): + """``h264_videotoolbox`` sits at the top of ``HW_ENCODERS`` so it wins when present.""" + cfg = VideoEncoderConfig(vcodec="auto") + assert cfg.vcodec == "h264_videotoolbox" + + def test_invalid_codec_raises(self): + with pytest.raises(ValueError, match="Invalid vcodec"): + VideoEncoderConfig(vcodec="not_a_real_codec") + + def test_hw_encoder_names_listed_as_valid(self): + assert "auto" in VALID_VIDEO_CODECS + assert "h264_videotoolbox" in VALID_VIDEO_CODECS + assert "h264_nvenc" in VALID_VIDEO_CODECS + + +ARTIFACTS = Path(__file__).parent.parent / "fixtures" / "artifacts" / "videos" + +# Default video feature set used by persistence tests. +VIDEO_FEATURES = { + "observation.images.cam": { + "dtype": "video", + "shape": (64, 96, 3), + "names": ["height", "width", "channels"], + }, + "action": {"dtype": "float32", "shape": (2,), "names": ["a", "b"]}, +} +VIDEO_KEY = "observation.images.cam" + + +def _write_frames(imgs_dir: Path, num_frames: int = 4, height: int = 64, width: int = 96) -> None: + imgs_dir.mkdir(parents=True, exist_ok=True) + for i in range(num_frames): + arr = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) + write_image(arr, imgs_dir / f"frame-{i:06d}.png") + + +def _encode_video(path: Path, num_frames: int = 4, fps: int = 30, cfg: VideoEncoderConfig | None = None) -> Path: + imgs_dir = path.parent / f"imgs_{path.stem}" + _write_frames(imgs_dir, num_frames=num_frames) + encode_video_frames(imgs_dir, path, fps=fps, camera_encoder_config=cfg, overwrite=True) + return path + + +def _read_feature_info(dataset: LeRobotDataset) -> dict: + info = json.loads((dataset.root / INFO_PATH).read_text()) + return info["features"][VIDEO_KEY]["info"] + + +def _add_frames(dataset: LeRobotDataset, num_frames: int) -> None: + shape = dataset.meta.features[VIDEO_KEY]["shape"] + for _ in range(num_frames): + dataset.add_frame({ + VIDEO_KEY: np.random.randint(0, 256, shape, dtype=np.uint8), + "action": np.zeros(2, dtype=np.float32), + "task": "test", + }) + + +class TestGetVideoInfo: + def test_returns_all_stream_fields(self): + info = get_video_info(ARTIFACTS / "clip_4frames.mp4") + + assert info["video.height"] == 64 + assert info["video.width"] == 96 + assert info["video.pix_fmt"] == "yuv420p" + assert info["video.fps"] == 30 + assert info["video.channels"] == 3 + assert info["video.is_depth_map"] is False + assert info["has_audio"] is False + assert "video.g" not in info + assert "video.crf" not in info + assert "video.preset" not in info + + @require_libsvtav1 + def test_merges_encoder_config_as_video_prefixed_entries(self): + cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) + + info = get_video_info(ARTIFACTS / "clip_4frames.mp4", camera_encoder_config=cfg) + + assert info["video.g"] == 2 + assert info["video.crf"] == 30 + assert info["video.preset"] == 12 + assert info["video.fast_decode"] == 0 + assert info["video.video_backend"] == "pyav" + assert info["video.extra_options"] == {} + + @require_libsvtav1 + def test_stream_derived_keys_take_precedence_over_config(self): + cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p") + + info = get_video_info(ARTIFACTS / "clip_4frames.mp4", camera_encoder_config=cfg) + + assert info["video.codec"] # populated from stream, not from config's vcodec + assert info["video.pix_fmt"] == "yuv420p" + + +class TestEncodeVideoFrames: + @require_libsvtav1 + def test_produces_readable_mp4(self, tmp_path): + video_path = _encode_video(tmp_path / "out.mp4") + + assert video_path.exists() + info = get_video_info(video_path) + assert info["video.height"] == 64 + assert info["video.width"] == 96 + + @require_libsvtav1 + def test_frame_count_and_duration_match_input(self, tmp_path): + num_frames = 10 + fps = 30 + video_path = _encode_video(tmp_path / "out.mp4", num_frames=num_frames, fps=fps) + + with av.open(str(video_path)) as container: + stream = container.streams.video[0] + actual_frames = sum(1 for _ in container.decode(stream)) + duration = ( + float(stream.duration * stream.time_base) + if stream.duration is not None + else float(container.duration / av.time_base) + ) + + assert actual_frames == num_frames + assert abs(duration - num_frames / fps) < 0.1 + + def test_overwrite_false_skips_existing_file(self, tmp_path): + imgs_dir = tmp_path / "imgs" + _write_frames(imgs_dir) + video_path = tmp_path / "out.mp4" + sentinel = b"pre-existing content" + video_path.write_bytes(sentinel) + + encode_video_frames(imgs_dir, video_path, fps=30, overwrite=False) + + assert video_path.read_bytes() == sentinel + + @require_libsvtav1 + def test_overwrite_true_replaces_existing_file(self, tmp_path): + imgs_dir = tmp_path / "imgs" + _write_frames(imgs_dir) + video_path = tmp_path / "out.mp4" + video_path.write_bytes(b"stale content") + + encode_video_frames(imgs_dir, video_path, fps=30, overwrite=True) + + info = get_video_info(video_path) + assert info["video.height"] == 64 + + @require_libsvtav1 + def test_custom_encoder_config_fields_stored_in_info(self, tmp_path): + """All stream-derived and encoder config fields are present after encoding.""" + cfg = VideoEncoderConfig(vcodec="libsvtav1", g=4, crf=25, preset=10) + video_path = _encode_video(tmp_path / "out.mp4", num_frames=4, fps=30, cfg=cfg) + + info = get_video_info(video_path, camera_encoder_config=cfg) + + # Stream-derived + assert info["video.height"] == 64 + assert info["video.width"] == 96 + assert info["video.channels"] == 3 + assert info["video.codec"] == "av1" + assert info["video.pix_fmt"] == "yuv420p" + assert info["video.fps"] == 30 + assert info["video.is_depth_map"] is False + assert info["has_audio"] is False + # Encoder config + assert info["video.g"] == 4 + assert info["video.crf"] == 25 + assert info["video.preset"] == 10 + assert info["video.fast_decode"] == 0 + assert info["video.video_backend"] == "pyav" + assert info["video.extra_options"] == {} + + +class TestConcatenateVideoFiles: + def test_two_clips_frame_count(self, tmp_path): + """Output frame count equals the sum of the two input frame counts.""" + out = tmp_path / "out.mp4" + concatenate_video_files([ARTIFACTS / "clip_6frames.mp4", ARTIFACTS / "clip_4frames.mp4"], out) + + with av.open(str(out)) as container: + total = sum(1 for _ in container.decode(video=0)) + assert total == 10 + + def test_three_clips_frame_count(self, tmp_path): + out = tmp_path / "out.mp4" + clip = ARTIFACTS / "clip_5frames.mp4" + concatenate_video_files([clip, clip, clip], out) + + with av.open(str(out)) as container: + total = sum(1 for _ in container.decode(video=0)) + assert total == 15 + + @require_libsvtav1 + def test_geometry_preserved(self, tmp_path): + """Output resolution, fps, codec and pixel format must match the inputs.""" + out = tmp_path / "out.mp4" + concatenate_video_files([ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_4frames.mp4"], out) + + info = get_video_info(out) + assert info["video.height"] == 64 + assert info["video.width"] == 96 + assert info["video.fps"] == 30 + assert info["video.codec"] == "av1" + assert info["video.pix_fmt"] == "yuv420p" + + def test_compatibility_check_raises_on_different_codec(self, tmp_path): + with pytest.raises(ValueError): + concatenate_video_files( + [ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_h264.mp4"], + tmp_path / "out.mp4", + compatibilty_check=True, + ) + + def test_compatibility_check_raises_on_different_resolution(self, tmp_path): + with pytest.raises(ValueError): + concatenate_video_files( + [ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_32x48.mp4"], + tmp_path / "out.mp4", + compatibilty_check=True, + ) + + +class TestEncoderConfigPersistence: + """Encoder config must be stored as ``video.`` entries in + ``info["features"][key]["info"]`` when the first episode is saved. + """ + + @require_libsvtav1 + def test_first_episode_save_persists_encoder_config(self, tmp_path, empty_lerobot_dataset_factory): + cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) + dataset = empty_lerobot_dataset_factory( + root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg + ) + + _add_frames(dataset, num_frames=4) + dataset.save_episode() + dataset.finalize() + + info = _read_feature_info(dataset) + + assert info["video.height"] == 64 + assert info["video.width"] == 96 + assert info["video.fps"] == 30 + assert info["video.g"] == 2 + assert info["video.crf"] == 30 + assert info["video.preset"] == 12 + assert info["video.fast_decode"] == 0 + assert info["video.video_backend"] == "pyav" + assert info["video.extra_options"] == {} + + @require_libsvtav1 + def test_second_episode_does_not_overwrite_encoder_fields(self, tmp_path, empty_lerobot_dataset_factory): + cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) + dataset = empty_lerobot_dataset_factory( + root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg + ) + + _add_frames(dataset, num_frames=4) + dataset.save_episode() + first_info = dict(_read_feature_info(dataset)) + + _add_frames(dataset, num_frames=4) + dataset.save_episode() + dataset.finalize() + + assert _read_feature_info(dataset) == first_info diff --git a/tests/fixtures/artifacts/videos/clip_32x48.mp4 b/tests/fixtures/artifacts/videos/clip_32x48.mp4 new file mode 100644 index 000000000..086c399d3 --- /dev/null +++ b/tests/fixtures/artifacts/videos/clip_32x48.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2191cd86e9e32ecbe18e33ad68d49060e479723ab5a3212bbb26df3025ccb568 +size 5815 diff --git a/tests/fixtures/artifacts/videos/clip_4frames.mp4 b/tests/fixtures/artifacts/videos/clip_4frames.mp4 new file mode 100644 index 000000000..487c3c8ad --- /dev/null +++ b/tests/fixtures/artifacts/videos/clip_4frames.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e0ebf563ba3ed9c24b691a0f0b29e0294a1fa9b51422e1ece296155f1465768 +size 16236 diff --git a/tests/fixtures/artifacts/videos/clip_5frames.mp4 b/tests/fixtures/artifacts/videos/clip_5frames.mp4 new file mode 100644 index 000000000..cbbe81c39 --- /dev/null +++ b/tests/fixtures/artifacts/videos/clip_5frames.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8475bfd5e6c4c780df46200e2b027e262b38436c57d01078bd943a5b87c65b8f +size 20726 diff --git a/tests/fixtures/artifacts/videos/clip_6frames.mp4 b/tests/fixtures/artifacts/videos/clip_6frames.mp4 new file mode 100644 index 000000000..50d9badca --- /dev/null +++ b/tests/fixtures/artifacts/videos/clip_6frames.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6434322d1c671a7d132367619f841a775317cb9ff973f3f4505831e3ed74076d +size 23808 diff --git a/tests/fixtures/artifacts/videos/clip_h264.mp4 b/tests/fixtures/artifacts/videos/clip_h264.mp4 new file mode 100644 index 000000000..90698dcf5 --- /dev/null +++ b/tests/fixtures/artifacts/videos/clip_h264.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efc84375e92a3499cef93100e04d8fb354670f3d9e0db2097b52575927284fc +size 12237