fix(update video info): fixing update video info logic to match the recording and editing use cases

This commit is contained in:
CarolinePascal
2026-06-15 17:51:07 +02:00
parent 1f66e6f5e4
commit c0db93f4a0
2 changed files with 28 additions and 16 deletions
+18 -5
View File
@@ -603,11 +603,19 @@ class LeRobotDatasetMetadata:
video_encoder: VideoEncoderConfig | None = None,
preserve_keys: Iterable[str] | None = None,
) -> None:
"""Populate per-feature video info in ``info.json``.
"""Populate or refresh per-feature video info in ``info.json``.
Warning: this function writes info from first episode videos, implicitly assuming that all videos have
been encoded the same way. Also, this means it assumes the first episode exists.
Two modes, selected by ``preserve_keys``:
- **Populate** (``None``, default): write info for video keys that lack it,
skip the rest. Used when first encoding a dataset.
- **Refresh** (any iterable): re-probe and overwrite existing info, keeping
the listed keys. Used after re-encoding to preserve data-intrinsic entries
(``is_depth_map``, depth quantization params) while codec params change.
Args:
video_key: If provided, only update this video key. Otherwise update
all video keys in the dataset.
@@ -615,21 +623,26 @@ class LeRobotDatasetMetadata:
videos. When provided, its fields are recorded as
``video.<field>`` entries alongside the stream-derived
``video.*`` entries (see :func:`get_video_info`).
preserve_keys: Optional iterable of ``info`` keys whose existing
values must be kept as-is.
preserve_keys: ``None`` (default) for populate-once mode. An iterable
(possibly empty) switches to refresh mode, keeping these keys'
existing values while recomputing the rest.
"""
if video_key is not None and video_key not in self.video_keys:
raise ValueError(f"Video key {video_key} not found in dataset")
video_keys = [video_key] if video_key is not None else self.video_keys
refresh = preserve_keys is not None
preserve_set = set(preserve_keys or ())
for key in video_keys:
existing = self.features[key].get("info") or {}
# Skip only if real video info has already been written. The ``is_depth_map`` entry (created at feature creation) is not blocking.
if set(existing.keys()) - {"is_depth_map"}:
# ``is_depth_map`` is written at feature creation and does not count as real video info here.
already_populated = bool(set(existing.keys()) - {"is_depth_map"})
# Populate-once: never clobber info that has already been written unless a refresh is requested.
if already_populated and not refresh:
continue
video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
new_info = get_video_info(video_path, video_encoder=video_encoder)
# Drop preserved keys so the existing values win on merge.
new_info = {k: v for k, v in new_info.items() if k not in preserve_set}
self.info.features[key]["info"] = {**existing, **new_info}
+10 -11
View File
@@ -77,7 +77,6 @@ from .utils import (
)
from .video_utils import (
encode_video_frames,
get_video_info,
reencode_video,
)
@@ -1887,14 +1886,11 @@ def convert_image_to_video_dataset(
new_meta.info.total_tasks = dataset.meta.total_tasks
new_meta.info.splits = {"train": f"0:{len(episode_indices)}"}
# Update video info for all image keys (now videos)
# We need to manually set video info since update_video_info() checks video_keys first
# Update video info for all image keys (now videos). They are registered as
# video features above, so update_video_info populates their (still-empty) info.
for img_key in img_keys:
target_encoder = depth_encoder if img_key in dataset.meta.depth_keys else camera_encoder
video_path = new_meta.root / new_meta.video_path.format(
video_key=img_key, chunk_index=0, file_index=0
)
new_meta.info.features[img_key]["info"] = get_video_info(video_path, video_encoder=target_encoder)
new_meta.update_video_info(video_key=img_key, video_encoder=target_encoder)
write_info(new_meta.info, new_meta.root)
@@ -1976,7 +1972,7 @@ def reencode_dataset(
if target_encoder is None:
logging.info(f"No encoder provided for {video_key} video. Skipping re-encoding.")
elif current_encoder != target_encoder:
video_keys_paths_dict[video_key] = (meta.root / VIDEO_DIR / video_key).rglob("*.mp4")
video_keys_paths_dict[video_key] = list((meta.root / VIDEO_DIR / video_key).rglob("*.mp4"))
video_keys_encoders_dict[video_key] = target_encoder
else:
logging.info(f"{video_key} videos are already encoded with {target_encoder}. Nothing to do.")
@@ -2004,11 +2000,14 @@ def reencode_dataset(
for args in tqdm(worker_args, desc="Re-encoding videos"):
_reencode_video_worker(args)
# Refresh video info in metadata for every video key. For depth videos, preserve
# ``is_depth_map`` and the depth quantization parameters.
# Refresh video info in metadata for every re-encoded key. Re-encoding only
# changes codec/container params, so for depth videos we preserve ``is_depth_map``
# and the depth quantization params (``video.depth_min`` / ``video.depth_max`` /
# ...), which describe the data rather than the codec and must survive a transcode.
# RGB videos pass an empty set: still a refresh, but nothing to preserve.
depth_preserve_keys = {"is_depth_map", *(f"video.{n}" for n in DEPTH_ENCODER_INFO_FIELD_NAMES)}
for video_key, encoder in video_keys_encoders_dict.items():
preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else None
preserve_keys = depth_preserve_keys if video_key in meta.depth_keys else set()
meta.update_video_info(video_key=video_key, video_encoder=encoder, preserve_keys=preserve_keys)
write_info(meta.info, meta.root)