diff --git a/src/lerobot/datasets/pipeline_features.py b/src/lerobot/datasets/pipeline_features.py index cf02a52ac..659c04405 100644 --- a/src/lerobot/datasets/pipeline_features.py +++ b/src/lerobot/datasets/pipeline_features.py @@ -70,19 +70,21 @@ def aggregate_pipeline_dataset_features( initial_features: dict[PipelineFeatureType, dict[str, Any]], *, use_videos: bool = True, + exclude_images: bool = False, patterns: Sequence[str] | None = None, ) -> dict[str, dict]: """ Aggregates and filters pipeline features to create a dataset-ready features dictionary. This function transforms initial features using the pipeline, categorizes them as action or observations - (image or state), filters them based on `use_videos` and `patterns`, and finally + (image or state), filters them based on `exclude_images` and `patterns`, and finally formats them for use with a Hugging Face LeRobot Dataset. Args: pipeline: The DataProcessorPipeline to apply. initial_features: A dictionary of raw feature specs for actions and observations. - use_videos: If False, image features are excluded. + use_videos: Storage dtype for image features that are kept. If True, images are stored as "video", if False, they are stored as "image". + exclude_images: If True, image features are dropped entirely from the output. patterns: A sequence of regex patterns to filter action and state features. Image features are not affected by this filter. @@ -120,7 +122,7 @@ def aggregate_pipeline_dataset_features( ) # 2. Apply filtering rules. - if is_image and not use_videos: + if is_image and exclude_images: continue if not is_image and not should_keep(key, compiled_patterns): continue diff --git a/tests/processor/test_pipeline.py b/tests/processor/test_pipeline.py index 57e948279..0e9746a63 100644 --- a/tests/processor/test_pipeline.py +++ b/tests/processor/test_pipeline.py @@ -2370,14 +2370,32 @@ def test_aggregate_images_when_use_videos_false(): out = aggregate_pipeline_dataset_features( pipeline=rp, initial_features={PipelineFeatureType.ACTION: {}, PipelineFeatureType.OBSERVATION: initial}, - use_videos=False, # expect "image" dtype + use_videos=False, # images kept, stored as "image" dtype patterns=None, ) key = f"{OBS_IMAGES}.back" key_front = f"{OBS_IMAGES}.front" - assert key not in out - assert key_front not in out + assert key in out + assert key_front in out + assert out[key]["dtype"] == "image" + assert out[key_front]["dtype"] == "image" + assert out[key]["shape"] == initial["back"] + + +def test_aggregate_images_excluded(): + rp = DataProcessorPipeline([AddObservationStateFeatures(add_front_image=True)]) + initial = {"back": (480, 640, 3)} + + out = aggregate_pipeline_dataset_features( + pipeline=rp, + initial_features={PipelineFeatureType.ACTION: {}, PipelineFeatureType.OBSERVATION: initial}, + exclude_images=True, + patterns=None, + ) + + assert f"{OBS_IMAGES}.back" not in out + assert f"{OBS_IMAGES}.front" not in out def test_aggregate_images_when_use_videos_true():