feat(encoding parameters): adding support for user provided video encoding parameters (#3455)

* chore(video backend): renaming codec into video_backend in get_safe_default_video_backend() * feat(pyav utils): adding suport for PyAV encoding parameters validation * feat(VideoEncoderConfig): creating a VideoEncoderConfig to encapsulate encoding parameters * feat(VideoEncoderConfig): propagating the VideoEncoderConfig in the codebase * chore(docs): updating the docs * feat(metadata): adding encoding parameters in dataset metadata * fix(concatenation compatibility): adding compatibility check when concatenating video files * feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends * feat(pyav checks): making pyav parameters checks more robust * chore(duplicate): removing duplicate get_codec_options definition * test(existing): adapting existing tests * test(new): adding new tests for encoding related features * chore(format): fixing formatting issues * chore(PyAV): cleaning up PyAV utils and encoding parameters checks to stick to the minimun required tooling. * chore(format): formatting code * chore(doctrings): updating docstrings * fix(camera_encoder_config): Removing camera_encoder_config from LeRobotDataset, as it's only required in LeRobotDatasetWriter. * feat(default values): applying a consistent naming convention for default RGB cameras video encoder parameters * fix(rollout): propagating VideoEncoderConfig to the latest recording modes * chore(format): formatting code, fixing error messages and variable names * fix(arguments order): reverting changes in arguments order in StreamingVideoEncoder * chore(relative imports): switching to relative local imports within lerobot.datasets * test(artifacts): cleaning up artifacts for the video encoding tests * chore(docs): updating docs * chore(fromat): formatting code * fix(imports): refactoring the file architecture to avoid circular imports. VideoEncoderConfig is now defined in lerobot.configs and lazily imports av at runtime. * fix(typos): fixing typos and small mistakes * test(factories): updating factories * feat(aggregate): updating dataset aggregation procedure. Encoding tuning paramters (crf, g,...) are ignored for validation and changed to None in the aggregated dataset if incompatible. * docs(typos): fixing typos * fix(deletion): reverting unwanted deletion * fix(typos): fixing multiple typos * feat(codec options): passing codec options to lerobot_edit_dataset episode deletion tool * typo(typo): typo * fix(typos): fixing remaining typos * chore(rename): renaming camera_encoder_config to camera_encoder * docs(clean): cleaning and formating docs * docs(dataset): addind details about datasets * chore(format): formatting code * docs(warning): adding warning regarding encoding parameters modification * fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset * typos(typos): typos * chore(format): resolving prettier issues * fix(h264_nvenc): fixing crf handling for h264_nvenc * docs(clean): removing too technical parts of the docs * fix(imports): fixing imports at the __init__ level * fix(imports): fixing not very pretty imports in video config file
2026-05-22 03:59:42 +00:00 · 2026-05-14 23:46:42 +02:00
parent 0a4a7c40ad
commit bd9619dfc3
56 changed files with 1765 additions and 527 deletions
@@ -25,6 +25,7 @@ from PIL import Image

 pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")

+from lerobot.configs import VideoEncoderConfig
 from lerobot.datasets.dataset_writer import _encode_video_worker
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.utils import DEFAULT_IMAGE_PATH
@@ -52,8 +53,8 @@ def _make_frame(features: dict, task: str = "Dummy task") -> dict:
 # ── Existing encode_video_worker tests ───────────────────────────────


-def test_encode_video_worker_forwards_vcodec(tmp_path):
-    """_encode_video_worker correctly forwards the vcodec parameter."""
+def test_encode_video_worker_forwards_camera_encoder(tmp_path):
+    """_encode_video_worker forwards camera_encoder to encode_video_frames."""
    video_key = "observation.images.laptop"
    fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=0, frame_index=0)
    img_dir = tmp_path / Path(fpath).parent
@@ -68,13 +69,21 @@ def test_encode_video_worker_forwards_vcodec(tmp_path):
        Path(video_path).touch()

    with patch("lerobot.datasets.dataset_writer.encode_video_frames", side_effect=mock_encode):
-        _encode_video_worker(video_key, 0, tmp_path, fps=30, vcodec="h264")
+        _encode_video_worker(
+            video_key,
+            0,
+            tmp_path,
+            fps=30,
+            camera_encoder=VideoEncoderConfig(vcodec="h264", preset=None),
+            encoder_threads=4,
+        )

-    assert captured_kwargs["vcodec"] == "h264"
+    assert captured_kwargs["camera_encoder"].vcodec == "h264"
+    assert captured_kwargs["encoder_threads"] == 4


-def test_encode_video_worker_default_vcodec(tmp_path):
-    """_encode_video_worker uses libsvtav1 as the default codec."""
+def test_encode_video_worker_default_camera_encoder(tmp_path):
+    """_encode_video_worker passes None camera_encoder which encode_video_frames defaults."""
    video_key = "observation.images.laptop"
    fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=0, frame_index=0)
    img_dir = tmp_path / Path(fpath).parent
@@ -91,7 +100,8 @@ def test_encode_video_worker_default_vcodec(tmp_path):
    with patch("lerobot.datasets.dataset_writer.encode_video_frames", side_effect=mock_encode):
        _encode_video_worker(video_key, 0, tmp_path, fps=30)

-    assert captured_kwargs["vcodec"] == "libsvtav1"
+    assert captured_kwargs["camera_encoder"] is None
+    assert captured_kwargs["encoder_threads"] is None


 # ── add_frame contracts ──────────────────────────────────────────────