fix(update video info): fixing update video info logic to match the recording and editing use cases

2026-06-19 01:07:18 +00:00 · 2026-06-15 17:51:07 +02:00
parent 1f66e6f5e4
commit c0db93f4a0
2 changed files with 28 additions and 16 deletions
@@ -603,11 +603,19 @@ class LeRobotDatasetMetadata:
        video_encoder: VideoEncoderConfig | None = None,
        preserve_keys: Iterable[str] | None = None,
    ) -> None:
-        """Populate per-feature video info in ``info.json``.
+        """Populate or refresh per-feature video info in ``info.json``.

        Warning: this function writes info from first episode videos, implicitly assuming that all videos have
        been encoded the same way. Also, this means it assumes the first episode exists.

+        Two modes, selected by ``preserve_keys``:
+
+        - **Populate** (``None``, default): write info for video keys that lack it,
+          skip the rest. Used when first encoding a dataset.
+        - **Refresh** (any iterable): re-probe and overwrite existing info, keeping
+          the listed keys. Used after re-encoding to preserve data-intrinsic entries
+          (``is_depth_map``, depth quantization params) while codec params change.
+
        Args:
            video_key: If provided, only update this video key. Otherwise update
                all video keys in the dataset.
@@ -615,21 +623,26 @@ class LeRobotDatasetMetadata:
                videos. When provided, its fields are recorded as
                ``video.<field>`` entries alongside the stream-derived
                ``video.*`` entries (see :func:`get_video_info`).
-            preserve_keys: Optional iterable of ``info`` keys whose existing
-                values must be kept as-is.
+            preserve_keys: ``None`` (default) for populate-once mode. An iterable
+                (possibly empty) switches to refresh mode, keeping these keys'
+                existing values while recomputing the rest.
        """
        if video_key is not None and video_key not in self.video_keys:
            raise ValueError(f"Video key {video_key} not found in dataset")

        video_keys = [video_key] if video_key is not None else self.video_keys
+        refresh = preserve_keys is not None
        preserve_set = set(preserve_keys or ())
        for key in video_keys:
            existing = self.features[key].get("info") or {}
-            # Skip only if real video info has already been written. The ``is_depth_map`` entry (created at feature creation) is not blocking.
-            if set(existing.keys()) - {"is_depth_map"}:
+            # ``is_depth_map`` is written at feature creation and does not count as real video info here.
+            already_populated = bool(set(existing.keys()) - {"is_depth_map"})
+            # Populate-once: never clobber info that has already been written unless a refresh is requested.
+            if already_populated and not refresh:
                continue
            video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
            new_info = get_video_info(video_path, video_encoder=video_encoder)
+            # Drop preserved keys so the existing values win on merge.
            new_info = {k: v for k, v in new_info.items() if k not in preserve_set}
            self.info.features[key]["info"] = {**existing, **new_info}

@@ -77,7 +77,6 @@ from .utils import (
 )
 from .video_utils import (
    encode_video_frames,
-    get_video_info,
    reencode_video,
 )

@@ -1887,14 +1886,11 @@ def convert_image_to_video_dataset(
        new_meta.info.total_tasks = dataset.meta.total_tasks
        new_meta.info.splits = {"train": f"0:{len(episode_indices)}"}

-        # Update video info for all image keys (now videos)
-        # We need to manually set video info since update_video_info() checks video_keys first
+        # Update video info for all image keys (now videos). They are registered as
+        # video features above, so update_video_info populates their (still-empty) info.
        for img_key in img_keys:
            target_encoder = depth_encoder if img_key in dataset.meta.depth_keys else camera_encoder
-            video_path = new_meta.root / new_meta.video_path.format(
-                video_key=img_key, chunk_index=0, file_index=0
-            )
-            new_meta.info.features[img_key]["info"] = get_video_info(video_path, video_encoder=target_encoder)
+            new_meta.update_video_info(video_key=img_key, video_encoder=target_encoder)

        write_info(new_meta.info, new_meta.root)

@@ -1976,7 +1972,7 @@ def reencode_dataset(
        if target_encoder is None:
            logging.info(f"No encoder provided for {video_key} video. Skipping re-encoding.")
        elif current_encoder != target_encoder:
-            video_keys_paths_dict[video_key] = (meta.root / VIDEO_DIR / video_key).rglob("*.mp4")
+            video_keys_paths_dict[video_key] = list((meta.root / VIDEO_DIR / video_key).rglob("*.mp4"))
            video_keys_encoders_dict[video_key] = target_encoder
        else:
            logging.info(f"{video_key} videos are already encoded with {target_encoder}. Nothing to do.")
@@ -2004,11 +2000,14 @@ def reencode_dataset(
        for args in tqdm(worker_args, desc="Re-encoding videos"):
            _reencode_video_worker(args)

-    # Refresh video info in metadata for every video key. For depth videos, preserve
-    # ``is_depth_map`` and the depth quantization parameters.
+    # Refresh video info in metadata for every re-encoded key. Re-encoding only
+    # changes codec/container params, so for depth videos we preserve ``is_depth_map``
+    # and the depth quantization params (``video.depth_min`` / ``video.depth_max`` /
+    # ...), which describe the data rather than the codec and must survive a transcode.
+    # RGB videos pass an empty set: still a refresh, but nothing to preserve.
    depth_preserve_keys = {"is_depth_map", *(f"video.{n}" for n in DEPTH_ENCODER_INFO_FIELD_NAMES)}
    for video_key, encoder in video_keys_encoders_dict.items():
-        preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else None
+        preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else set()
        meta.update_video_info(video_key=video_key, video_encoder=encoder, preserve_keys=preserve_keys)

    write_info(meta.info, meta.root)