Compare commits

...

1 Commits

Author SHA1 Message Date
CarolinePascal 5231ba53dc fix(loading errors): improving dataset loading errors handling and logging 2026-04-09 17:43:42 +02:00
2 changed files with 5 additions and 2 deletions
+1 -1
View File
@@ -87,7 +87,7 @@ class DatasetReader:
"""Attempt to load from local cache. Returns True if data is sufficient."""
try:
self.hf_dataset = self._load_hf_dataset()
except (FileNotFoundError, NotADirectoryError):
except (FileNotFoundError, NotADirectoryError, ValueError):
self.hf_dataset = None
return False
if not self._check_cached_episodes_sufficient():
+4 -1
View File
@@ -78,7 +78,10 @@ def load_nested_dataset(
with SuppressProgressBars():
# We use .from_parquet() memory-mapped loading for efficiency
filters = pa_ds.field("episode_index").isin(episodes) if episodes is not None else None
return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features)
try:
return Dataset.from_parquet([str(path) for path in paths], filters=filters, features=features)
except ValueError:
raise ValueError(f"Failed to load parquet files in {pq_dir}, make sure the dataset is valid and is not missing any files.")
def get_parquet_num_frames(parquet_path: str | Path) -> int: