mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-17 01:30:14 +00:00
add: final aggregation utils to obtain one dataset only
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -J b1k-aggregate
|
||||
#SBATCH -p hopper-cpu
|
||||
#SBATCH --qos=high
|
||||
#SBATCH -c 2 # More CPUs won't help much (I/O bound)
|
||||
#SBATCH -t 20:00:00 # Conservative: 5.2x pattern suggests ~13h, so 12h is tight but reasonable
|
||||
#SBATCH --mem=8G # Peak will be ~5-6GB, 8G gives comfortable margin
|
||||
#SBATCH -o logs/aggregate-50-%j.out
|
||||
#SBATCH -e logs/aggregate-50-%j.err
|
||||
|
||||
set -euo pipefail
|
||||
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||
export PYTHONUNBUFFERED=1
|
||||
|
||||
source "$HOME/.bashrc"
|
||||
conda activate lerobot
|
||||
|
||||
cd /admin/home/francesco_capuano/lerobot
|
||||
|
||||
python examples/behavior_1k/aggregate_tasks_datasets.py
|
||||
@@ -0,0 +1,30 @@
|
||||
from pathlib import Path
|
||||
|
||||
from lerobot.datasets.aggregate import aggregate_datasets
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
|
||||
def main():
|
||||
"""Aggregate all tasks datasets into a single LeRobotDataset and push it to the hub."""
|
||||
task_indices = range(50)
|
||||
|
||||
repo_ids = [f"fracapuano/behavior1k-task{i:04d}" for i in task_indices]
|
||||
|
||||
roots = [Path(f"/fsx/francesco_capuano/behavior1k/behavior1k-task{i:04d}") for i in task_indices]
|
||||
|
||||
aggregated_root = Path("/fsx/francesco_capuano/behavior1k/behavior1k")
|
||||
aggregated_repo_id = "fracapuano/behavior1k"
|
||||
|
||||
aggregate_datasets(
|
||||
repo_ids=repo_ids,
|
||||
roots=roots,
|
||||
aggr_repo_id=aggregated_repo_id,
|
||||
aggr_root=aggregated_root,
|
||||
)
|
||||
|
||||
ds = LeRobotDataset(repo_id=aggregated_repo_id, root=aggregated_root)
|
||||
ds.push_to_hub()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user