diff --git a/src/lerobot/datasets/dataset_reader.py b/src/lerobot/datasets/dataset_reader.py index 3720a5084..dd496142f 100644 --- a/src/lerobot/datasets/dataset_reader.py +++ b/src/lerobot/datasets/dataset_reader.py @@ -87,7 +87,7 @@ class DatasetReader: """Attempt to load from local cache. Returns True if data is sufficient.""" try: self.hf_dataset = self._load_hf_dataset() - except (FileNotFoundError, NotADirectoryError): + except (FileNotFoundError, NotADirectoryError, ValueError): self.hf_dataset = None return False if not self._check_cached_episodes_sufficient(): diff --git a/src/lerobot/datasets/io_utils.py b/src/lerobot/datasets/io_utils.py index cee6cfba8..2d1b5433d 100644 --- a/src/lerobot/datasets/io_utils.py +++ b/src/lerobot/datasets/io_utils.py @@ -78,7 +78,10 @@ def load_nested_dataset( with SuppressProgressBars(): # We use .from_parquet() memory-mapped loading for efficiency filters = pa_ds.field("episode_index").isin(episodes) if episodes is not None else None - return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features) + try: + return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features) + except ValueError: + raise ValueError(f"Failed to load parquet files in {pq_dir}, make sure the dataset is valid and is not missing any files.") def get_parquet_num_frames(parquet_path: str | Path) -> int: