Compare commits

...

2 Commits

Author SHA1 Message Date
CarolinePascal 06858a1b40 fix(image transforms): cleaning up image_transforms implementation in LeRobotDataset 2026-06-16 19:32:44 +02:00
Caroline Pascal 287c823f13 fix(features copy): adding deepcopy on LeRobot dataset features to avoid shallow copy leaks (#3826)
* fix(features copy): adding deepcopy on LeRobot dataset features to avoid shallow copy leaks

* tests(test): adding new test
2026-06-16 17:58:59 +02:00
5 changed files with 39 additions and 10 deletions
+2 -1
View File
@@ -15,6 +15,7 @@
# limitations under the License.
import contextlib
from collections.abc import Callable
from copy import deepcopy
from pathlib import Path
import numpy as np
@@ -709,7 +710,7 @@ class LeRobotDatasetMetadata:
obj.root.mkdir(parents=True, exist_ok=False)
features = {**features, **DEFAULT_FEATURES}
features = {**deepcopy(features), **DEFAULT_FEATURES}
_validate_feature_names(features)
obj.tasks = None
+12
View File
@@ -74,6 +74,8 @@ class DatasetReader:
self.episodes = episodes
self._tolerance_s = tolerance_s
self._video_backend = video_backend
if image_transforms is not None and not callable(image_transforms):
raise TypeError("image_transforms must be callable or None.")
self._image_transforms = image_transforms
self._return_uint8 = return_uint8
@@ -86,6 +88,16 @@ class DatasetReader:
check_delta_timestamps(delta_timestamps, meta.fps, tolerance_s)
self.delta_indices = get_delta_indices(delta_timestamps, meta.fps)
def set_image_transforms(self, image_transforms: Callable | None) -> None:
"""Replace the transform applied to visual observations."""
if image_transforms is not None and not callable(image_transforms):
raise TypeError("image_transforms must be callable or None.")
self._image_transforms = image_transforms
def clear_image_transforms(self) -> None:
"""Remove the transform applied to visual observations."""
self._image_transforms = None
def try_load(self) -> bool:
"""Attempt to load from local cache. Returns True if data is sufficient."""
try:
+4 -1
View File
@@ -27,6 +27,7 @@ import logging
import shutil
from collections.abc import Callable
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from copy import deepcopy
from pathlib import Path
import datasets
@@ -1101,7 +1102,9 @@ def _copy_episodes_metadata_and_stats(
if dst_meta.video_keys and src_dataset.meta.video_keys:
for key in dst_meta.video_keys:
if key in src_dataset.meta.features:
dst_meta.info.features[key]["info"] = src_dataset.meta.info.features[key].get("info", {})
dst_meta.info.features[key]["info"] = deepcopy(
src_dataset.meta.info.features[key].get("info", {})
)
write_info(dst_meta.info, dst_meta.root)
+5 -7
View File
@@ -201,8 +201,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
super().__init__()
self.repo_id = repo_id
self._requested_root = Path(root) if root else None
self.reader = None
self.set_image_transforms(image_transforms)
self.delta_timestamps = delta_timestamps
self.tolerance_s = tolerance_s
self.revision = revision if revision else CODEBASE_VERSION
@@ -249,6 +247,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
image_transforms=image_transforms,
return_uint8=self._return_uint8,
)
self.image_transforms = image_transforms
# Load actual data
if force_cache_sync or not self.reader.try_load():
@@ -505,15 +504,14 @@ class LeRobotDataset(torch.utils.data.Dataset):
def set_image_transforms(self, image_transforms: Callable | None) -> None:
"""Replace the transform applied to visual observations."""
if image_transforms is not None and not callable(image_transforms):
raise TypeError("image_transforms must be callable or None.")
self._ensure_reader().set_image_transforms(image_transforms)
self.image_transforms = image_transforms
if self.reader is not None:
self.reader._image_transforms = image_transforms
def clear_image_transforms(self) -> None:
"""Remove the transform applied to visual observations."""
self.set_image_transforms(None)
if self.reader is not None:
self.reader.set_image_transforms(None)
self.image_transforms = None
# ── Hub methods (stay on facade) ──────────────────────────────────
+16 -1
View File
@@ -51,7 +51,7 @@ from lerobot.robots import make_robot_from_config
from lerobot.transforms import ImageTransforms, ImageTransformsConfig
from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, OBS_STR, REWARD
from lerobot.utils.feature_utils import hw_to_dataset_features
from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID
from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_MOTOR_FEATURES, DUMMY_REPO_ID
from tests.mocks.mock_robot import MockRobotConfig
from tests.utils import require_x86_64_kernel
@@ -133,6 +133,21 @@ def test_dataset_feature_with_forward_slash_raises_error():
)
def test_create_does_not_mutate_input_features(tmp_path, empty_lerobot_dataset_factory):
# ``create`` must deep-copy features so a dataset built from another's features stays independent.
dataset = empty_lerobot_dataset_factory(
root=tmp_path / "ds1", features=DUMMY_MOTOR_FEATURES, use_videos=False
)
dataset_copy = empty_lerobot_dataset_factory(
root=tmp_path / "ds2", features=dataset.meta.features, use_videos=False
)
original_shape = dataset.meta.info.features["state"]["shape"]
dataset_copy.meta.info.features["state"]["shape"] = (999,)
assert dataset.meta.info.features["state"]["shape"] == original_shape
def test_add_frame_missing_task(tmp_path, empty_lerobot_dataset_factory):
features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)