feat(encoding parameters): adding support for user provided video encoding parameters (#3455)

* chore(video backend): renaming codec into video_backend in get_safe_default_video_backend() * feat(pyav utils): adding suport for PyAV encoding parameters validation * feat(VideoEncoderConfig): creating a VideoEncoderConfig to encapsulate encoding parameters * feat(VideoEncoderConfig): propagating the VideoEncoderConfig in the codebase * chore(docs): updating the docs * feat(metadata): adding encoding parameters in dataset metadata * fix(concatenation compatibility): adding compatibility check when concatenating video files * feat(VideoEncoderConfig init): making VideoEncoderConfig more robust and adaptable to multiple backends * feat(pyav checks): making pyav parameters checks more robust * chore(duplicate): removing duplicate get_codec_options definition * test(existing): adapting existing tests * test(new): adding new tests for encoding related features * chore(format): fixing formatting issues * chore(PyAV): cleaning up PyAV utils and encoding parameters checks to stick to the minimun required tooling. * chore(format): formatting code * chore(doctrings): updating docstrings * fix(camera_encoder_config): Removing camera_encoder_config from LeRobotDataset, as it's only required in LeRobotDatasetWriter. * feat(default values): applying a consistent naming convention for default RGB cameras video encoder parameters * fix(rollout): propagating VideoEncoderConfig to the latest recording modes * chore(format): formatting code, fixing error messages and variable names * fix(arguments order): reverting changes in arguments order in StreamingVideoEncoder * chore(relative imports): switching to relative local imports within lerobot.datasets * test(artifacts): cleaning up artifacts for the video encoding tests * chore(docs): updating docs * chore(fromat): formatting code * fix(imports): refactoring the file architecture to avoid circular imports. VideoEncoderConfig is now defined in lerobot.configs and lazily imports av at runtime. * fix(typos): fixing typos and small mistakes * test(factories): updating factories * feat(aggregate): updating dataset aggregation procedure. Encoding tuning paramters (crf, g,...) are ignored for validation and changed to None in the aggregated dataset if incompatible. * docs(typos): fixing typos * fix(deletion): reverting unwanted deletion * fix(typos): fixing multiple typos * feat(codec options): passing codec options to lerobot_edit_dataset episode deletion tool * typo(typo): typo * fix(typos): fixing remaining typos * chore(rename): renaming camera_encoder_config to camera_encoder * docs(clean): cleaning and formating docs * docs(dataset): addind details about datasets * chore(format): formatting code * docs(warning): adding warning regarding encoding parameters modification * fix(re-encoding): removing inconsistent re-encoding option in lerobot_edit_dataset * typos(typos): typos * chore(format): resolving prettier issues * fix(h264_nvenc): fixing crf handling for h264_nvenc * docs(clean): removing too technical parts of the docs * fix(imports): fixing imports at the __init__ level * fix(imports): fixing not very pretty imports in video config file
2026-05-20 02:59:50 +00:00 · 2026-05-14 23:46:42 +02:00
parent 0a4a7c40ad
commit bd9619dfc3
56 changed files with 1765 additions and 527 deletions
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import json
+import logging
 from unittest.mock import patch

 import pytest
@@ -23,7 +25,9 @@ pytest.importorskip("datasets", reason="datasets is required (install lerobot[da
 import datasets  # noqa: E402
 import torch

+from lerobot.configs import VIDEO_ENCODER_INFO_KEYS
 from lerobot.datasets.aggregate import aggregate_datasets
+from lerobot.datasets.feature_utils import features_equal_for_merge
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from tests.fixtures.constants import DUMMY_REPO_ID

@@ -117,8 +121,9 @@ def assert_metadata_consistency(aggr_ds, ds_0, ds_1):
        "Robot type should be the same"
    )

-    # Test features are the same
-    assert aggr_ds.features == ds_0.features == ds_1.features, "Features should be the same"
+    # Schema matches; merged video ``info`` is reconciled separately from per-source ``info``.
+    assert features_equal_for_merge(aggr_ds.features, ds_0.features)
+    assert features_equal_for_merge(aggr_ds.features, ds_1.features)

    # Test tasks aggregation
    expected_tasks = set(ds_0.meta.tasks.index) | set(ds_1.meta.tasks.index)
@@ -284,6 +289,73 @@ def test_aggregate_datasets(tmp_path, lerobot_dataset_factory):
    assert_dataset_iteration_works(aggr_ds)


+@pytest.mark.parametrize("mutation", ["mismatched_value", "missing_key"])
+def test_aggregate_incomplete_video_encoder_info_warns_and_nuls_encoders(
+    tmp_path, lerobot_dataset_factory, caplog, mutation
+):
+    """Mismatched or missing encoder ``info`` is merged per-key with fallbacks and a warning."""
+    suffix = "enc_mismatch" if mutation == "mismatched_value" else "enc_missing"
+    ds_0 = lerobot_dataset_factory(
+        root=tmp_path / f"{suffix}_a",
+        repo_id=f"{DUMMY_REPO_ID}_{suffix}_a",
+        total_episodes=2,
+        total_frames=20,
+    )
+    ds_1 = lerobot_dataset_factory(
+        root=tmp_path / f"{suffix}_b",
+        repo_id=f"{DUMMY_REPO_ID}_{suffix}_b",
+        total_episodes=2,
+        total_frames=20,
+    )
+
+    info_path = ds_1.root / "meta" / "info.json"
+    data = json.loads(info_path.read_text())
+    for ft in data["features"].values():
+        if ft.get("dtype") != "video":
+            continue
+        inf = ft.setdefault("info", {})
+        if mutation == "mismatched_value":
+            inf["video.crf"] = 99
+            inf["video.extra_options"] = {"tune": "film"}
+        else:
+            inf.pop("video.crf", None)
+            inf.pop("video.extra_options", None)
+    info_path.write_text(json.dumps(data))
+
+    aggr_id = f"{DUMMY_REPO_ID}_{suffix}_aggr"
+    aggr_root = tmp_path / f"{suffix}_aggr"
+    with caplog.at_level(logging.WARNING):
+        aggregate_datasets(
+            repo_ids=[ds_0.repo_id, ds_1.repo_id],
+            roots=[ds_0.root, ds_1.root],
+            aggr_repo_id=aggr_id,
+            aggr_root=aggr_root,
+        )
+
+    assert "heterogeneous" in caplog.text.lower() or "incomplete" in caplog.text.lower()
+
+    with (
+        patch("lerobot.datasets.dataset_metadata.get_safe_version") as mock_get_safe_version,
+        patch("lerobot.datasets.dataset_metadata.snapshot_download") as mock_snapshot_download,
+    ):
+        mock_get_safe_version.return_value = "v3.0"
+        mock_snapshot_download.return_value = str(aggr_root)
+        aggr_ds = LeRobotDataset(aggr_id, root=aggr_root)
+
+    for key, ft in aggr_ds.meta.info.features.items():
+        if ft.get("dtype") != "video":
+            continue
+        info = ft["info"]
+        reference = ds_0.meta.info.features[key]["info"]
+        for info_key in VIDEO_ENCODER_INFO_KEYS:
+            if info_key == "video.crf":
+                assert info[info_key] is None
+            elif info_key == "video.extra_options":
+                assert info[info_key] == {}
+            else:
+                assert info[info_key] == reference[info_key]
+
+
 def test_aggregate_with_low_threshold(tmp_path, lerobot_dataset_factory):
    """Test aggregation with small file size limits to force file rotation/sharding."""
    ds_0_num_episodes = ds_1_num_episodes = 10