WIP aggregate

This commit is contained in:
Remi Cadene
2025-05-16 17:40:34 +00:00
committed by Michel Aractingi
parent 220997ff47
commit 13a1f68b8e
2 changed files with 285 additions and 182 deletions
@@ -6,7 +6,6 @@ from datatrove.executor.slurm import SlurmPipelineExecutor
from datatrove.pipeline.base import PipelineStep
from examples.port_datasets.droid_rlds.port_droid import DROID_SHARDS
from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata
class PortDroidShards(PipelineStep):
@@ -30,6 +29,12 @@ class PortDroidShards(PipelineStep):
shard_repo_id = f"{self.repo_id}_world_{world_size}_rank_{rank}"
try:
validate_dataset(shard_repo_id)
return
except:
pass
port_droid(
self.raw_dir,
shard_repo_id,