From 73d60854b4ff54df1fbee705054b758302ba966b Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Mon, 15 Jun 2026 17:51:07 +0200 Subject: [PATCH] fix(update video info): fixing update video info logic to match the recording and editing use cases --- src/lerobot/datasets/dataset_metadata.py | 23 ++++++++++++++++++----- src/lerobot/datasets/dataset_tools.py | 21 ++++++++++----------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index a637aef7f..e6f3af152 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -602,11 +602,19 @@ class LeRobotDatasetMetadata: video_encoder: VideoEncoderConfig | None = None, preserve_keys: Iterable[str] | None = None, ) -> None: - """Populate per-feature video info in ``info.json``. + """Populate or refresh per-feature video info in ``info.json``. Warning: this function writes info from first episode videos, implicitly assuming that all videos have been encoded the same way. Also, this means it assumes the first episode exists. + Two modes, selected by ``preserve_keys``: + + - **Populate** (``None``, default): write info for video keys that lack it, + skip the rest. Used when first encoding a dataset. + - **Refresh** (any iterable): re-probe and overwrite existing info, keeping + the listed keys. Used after re-encoding to preserve data-intrinsic entries + (``is_depth_map``, depth quantization params) while codec params change. + Args: video_key: If provided, only update this video key. Otherwise update all video keys in the dataset. @@ -614,21 +622,26 @@ class LeRobotDatasetMetadata: videos. When provided, its fields are recorded as ``video.`` entries alongside the stream-derived ``video.*`` entries (see :func:`get_video_info`). - preserve_keys: Optional iterable of ``info`` keys whose existing - values must be kept as-is. + preserve_keys: ``None`` (default) for populate-once mode. An iterable + (possibly empty) switches to refresh mode, keeping these keys' + existing values while recomputing the rest. """ if video_key is not None and video_key not in self.video_keys: raise ValueError(f"Video key {video_key} not found in dataset") video_keys = [video_key] if video_key is not None else self.video_keys + refresh = preserve_keys is not None preserve_set = set(preserve_keys or ()) for key in video_keys: existing = self.features[key].get("info") or {} - # Skip only if real video info has already been written. The ``is_depth_map`` entry (created at feature creation) is not blocking. - if set(existing.keys()) - {"is_depth_map"}: + # ``is_depth_map`` is written at feature creation and does not count as real video info here. + already_populated = bool(set(existing.keys()) - {"is_depth_map"}) + # Populate-once: never clobber info that has already been written unless a refresh is requested. + if already_populated and not refresh: continue video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0) new_info = get_video_info(video_path, video_encoder=video_encoder) + # Drop preserved keys so the existing values win on merge. new_info = {k: v for k, v in new_info.items() if k not in preserve_set} self.info.features[key]["info"] = {**existing, **new_info} diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 4207d1449..b2c580dbc 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -76,7 +76,6 @@ from .utils import ( ) from .video_utils import ( encode_video_frames, - get_video_info, reencode_video, ) @@ -1884,14 +1883,11 @@ def convert_image_to_video_dataset( new_meta.info.total_tasks = dataset.meta.total_tasks new_meta.info.splits = {"train": f"0:{len(episode_indices)}"} - # Update video info for all image keys (now videos) - # We need to manually set video info since update_video_info() checks video_keys first + # Update video info for all image keys (now videos). They are registered as + # video features above, so update_video_info populates their (still-empty) info. for img_key in img_keys: target_encoder = depth_encoder if img_key in dataset.meta.depth_keys else camera_encoder - video_path = new_meta.root / new_meta.video_path.format( - video_key=img_key, chunk_index=0, file_index=0 - ) - new_meta.info.features[img_key]["info"] = get_video_info(video_path, video_encoder=target_encoder) + new_meta.update_video_info(video_key=img_key, video_encoder=target_encoder) write_info(new_meta.info, new_meta.root) @@ -1973,7 +1969,7 @@ def reencode_dataset( if target_encoder is None: logging.info(f"No encoder provided for {video_key} video. Skipping re-encoding.") elif current_encoder != target_encoder: - video_keys_paths_dict[video_key] = (meta.root / VIDEO_DIR / video_key).rglob("*.mp4") + video_keys_paths_dict[video_key] = list((meta.root / VIDEO_DIR / video_key).rglob("*.mp4")) video_keys_encoders_dict[video_key] = target_encoder else: logging.info(f"{video_key} videos are already encoded with {target_encoder}. Nothing to do.") @@ -2001,11 +1997,14 @@ def reencode_dataset( for args in tqdm(worker_args, desc="Re-encoding videos"): _reencode_video_worker(args) - # Refresh video info in metadata for every video key. For depth videos, preserve - # ``is_depth_map`` and the depth quantization parameters. + # Refresh video info in metadata for every re-encoded key. Re-encoding only + # changes codec/container params, so for depth videos we preserve ``is_depth_map`` + # and the depth quantization params (``video.depth_min`` / ``video.depth_max`` / + # ...), which describe the data rather than the codec and must survive a transcode. + # RGB videos pass an empty set: still a refresh, but nothing to preserve. depth_preserve_keys = {"is_depth_map", *(f"video.{n}" for n in DEPTH_ENCODER_INFO_FIELD_NAMES)} for video_key, encoder in video_keys_encoders_dict.items(): - preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else None + preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else set() meta.update_video_info(video_key=video_key, video_encoder=encoder, preserve_keys=preserve_keys) write_info(meta.info, meta.root)