From 5231ba53dca301275f0ee7ed086f1b954b551359 Mon Sep 17 00:00:00 2001 From: CarolinePascal Date: Thu, 9 Apr 2026 17:28:28 +0200 Subject: [PATCH] fix(loading errors): improving dataset loading errors handling and logging --- src/lerobot/datasets/dataset_reader.py | 2 +- src/lerobot/datasets/io_utils.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lerobot/datasets/dataset_reader.py b/src/lerobot/datasets/dataset_reader.py index 3720a5084..dd496142f 100644 --- a/src/lerobot/datasets/dataset_reader.py +++ b/src/lerobot/datasets/dataset_reader.py @@ -87,7 +87,7 @@ class DatasetReader: """Attempt to load from local cache. Returns True if data is sufficient.""" try: self.hf_dataset = self._load_hf_dataset() - except (FileNotFoundError, NotADirectoryError): + except (FileNotFoundError, NotADirectoryError, ValueError): self.hf_dataset = None return False if not self._check_cached_episodes_sufficient(): diff --git a/src/lerobot/datasets/io_utils.py b/src/lerobot/datasets/io_utils.py index cee6cfba8..2d1b5433d 100644 --- a/src/lerobot/datasets/io_utils.py +++ b/src/lerobot/datasets/io_utils.py @@ -78,7 +78,10 @@ def load_nested_dataset( with SuppressProgressBars(): # We use .from_parquet() memory-mapped loading for efficiency filters = pa_ds.field("episode_index").isin(episodes) if episodes is not None else None - return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features) + try: + return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features) + except ValueError: + raise ValueError(f"Failed to load parquet files in {pq_dir}, make sure the dataset is valid and is not missing any files.") def get_parquet_num_frames(parquet_path: str | Path) -> int: