mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 20:19:43 +00:00
add: final aggregation utils to obtain one dataset only
This commit is contained in:
@@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -J b1k-aggregate
|
||||||
|
#SBATCH -p hopper-cpu
|
||||||
|
#SBATCH --qos=high
|
||||||
|
#SBATCH -c 2 # More CPUs won't help much (I/O bound)
|
||||||
|
#SBATCH -t 20:00:00 # Conservative: 5.2x pattern suggests ~13h, so 12h is tight but reasonable
|
||||||
|
#SBATCH --mem=8G # Peak will be ~5-6GB, 8G gives comfortable margin
|
||||||
|
#SBATCH -o logs/aggregate-50-%j.out
|
||||||
|
#SBATCH -e logs/aggregate-50-%j.err
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}
|
||||||
|
export PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
source "$HOME/.bashrc"
|
||||||
|
conda activate lerobot
|
||||||
|
|
||||||
|
cd /admin/home/francesco_capuano/lerobot
|
||||||
|
|
||||||
|
python examples/behavior_1k/aggregate_tasks_datasets.py
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lerobot.datasets.aggregate import aggregate_datasets
|
||||||
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Aggregate all tasks datasets into a single LeRobotDataset and push it to the hub."""
|
||||||
|
task_indices = range(50)
|
||||||
|
|
||||||
|
repo_ids = [f"fracapuano/behavior1k-task{i:04d}" for i in task_indices]
|
||||||
|
|
||||||
|
roots = [Path(f"/fsx/francesco_capuano/behavior1k/behavior1k-task{i:04d}") for i in task_indices]
|
||||||
|
|
||||||
|
aggregated_root = Path("/fsx/francesco_capuano/behavior1k/behavior1k")
|
||||||
|
aggregated_repo_id = "fracapuano/behavior1k"
|
||||||
|
|
||||||
|
aggregate_datasets(
|
||||||
|
repo_ids=repo_ids,
|
||||||
|
roots=roots,
|
||||||
|
aggr_repo_id=aggregated_repo_id,
|
||||||
|
aggr_root=aggregated_root,
|
||||||
|
)
|
||||||
|
|
||||||
|
ds = LeRobotDataset(repo_id=aggregated_repo_id, root=aggregated_root)
|
||||||
|
ds.push_to_hub()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user