From b02e79bb5e602aa550e4cc0eb3d339164bc3d4fc Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Tue, 16 Jun 2026 16:14:18 +0200 Subject: [PATCH] fix(features copy): adding deepcopy on LeRobot dataset features to avoid shallow copy leaks --- src/lerobot/datasets/dataset_metadata.py | 3 ++- src/lerobot/datasets/dataset_tools.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 39a1b6d2b..b496e4f65 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -15,6 +15,7 @@ # limitations under the License. import contextlib from collections.abc import Callable +from copy import deepcopy from pathlib import Path import numpy as np @@ -709,7 +710,7 @@ class LeRobotDatasetMetadata: obj.root.mkdir(parents=True, exist_ok=False) - features = {**features, **DEFAULT_FEATURES} + features = {**deepcopy(features), **DEFAULT_FEATURES} _validate_feature_names(features) obj.tasks = None diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 91dc66af2..9aca859b4 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -27,6 +27,7 @@ import logging import shutil from collections.abc import Callable from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +from copy import deepcopy from pathlib import Path import datasets @@ -1101,7 +1102,9 @@ def _copy_episodes_metadata_and_stats( if dst_meta.video_keys and src_dataset.meta.video_keys: for key in dst_meta.video_keys: if key in src_dataset.meta.features: - dst_meta.info.features[key]["info"] = src_dataset.meta.info.features[key].get("info", {}) + dst_meta.info.features[key]["info"] = deepcopy( + src_dataset.meta.info.features[key].get("info", {}) + ) write_info(dst_meta.info, dst_meta.root)