mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-11 14:49:43 +00:00
more changres
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
Goal:
|
||||
Create a high and low policy, the high policy take obs + text and return text, its a VLM
|
||||
The low policy is a VLA that take text returned by the high and ouptut actions
|
||||
|
||||
|
||||
High policy is ran every one second or when user send prompt
|
||||
|
||||
|
||||
Synthetic data generation:
|
||||
D demo -> teleop data with a global task annotation
|
||||
D label -> segment data into short skills (one to three seconds)
|
||||
D syn: p-gen will create the high level prompt user might gave to p-hi
|
||||
Given D label prompt p-gen to imagine appropriate action, take images, ALL PRIOR skill labels in the episode: ℓ̂₀, …, ℓ̂ₜ₋₁
|
||||
|
||||
“Given the scene + all previous steps + current needed skill ℓ̂₅,
|
||||
generate a user request that logically leads to ℓ̂₅.”
|
||||
|
||||
Train:
|
||||
Phi(lt| images, global label) cross entropy - next token predictions
|
||||
Plow(At| images, qt, lt)
|
||||
@@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --time=24:00:00
|
||||
#SBATCH --partition=hopper-cpu
|
||||
#SBATCH --cpus-per-task=96
|
||||
#SBATCH --output=/fsx/jade_choghari/logs/launcher_%j.out
|
||||
#SBATCH --error=/fsx/jade_choghari/logs/launcher_%j.err
|
||||
# Activate conda environment
|
||||
# Load conda
|
||||
source /fsx/jade_choghari/miniforge3/etc/profile.d/conda.sh
|
||||
conda activate lerobot
|
||||
set -e # Exit on error
|
||||
|
||||
# Input dataset
|
||||
REPO_ID="lerobot"
|
||||
ROOT="/fsx/jade_choghari/vlabench-primitive/"
|
||||
|
||||
# Output paths
|
||||
OUTPUT_DIR="/fsx/jade_choghari/vlabench-primitive-encoded"
|
||||
OUTPUT_REPO_ID="vlabench-primitive-encoded"
|
||||
LOGS_DIR="/fsx/jade_choghari/logs/convert_video"
|
||||
|
||||
# Video encoding settings
|
||||
VCODEC="libsvtav1"
|
||||
PIX_FMT="yuv420p"
|
||||
GOP_SIZE=2
|
||||
CRF=30
|
||||
FAST_DECODE=0
|
||||
|
||||
# Parallelization settings
|
||||
NUM_WORKERS=24 # Number of parallel SLURM workers
|
||||
NUM_IMAGE_WORKERS=4 # Threads per worker for image saving
|
||||
|
||||
# SLURM settings
|
||||
PARTITION="hopper-cpu" # Change to your CPU partition name
|
||||
CPUS_PER_TASK=32 # CPUs per worker
|
||||
MEM_PER_CPU="4G" # Memory per CPU
|
||||
TIME_LIMIT="24:00:00" # Time limit per job
|
||||
|
||||
###############################################################################
|
||||
# STEP 1: Parallel Video Conversion
|
||||
###############################################################################
|
||||
rm -rf "${OUTPUT_DIR}"
|
||||
mkdir -p "${OUTPUT_DIR}"
|
||||
|
||||
echo "=============================================="
|
||||
echo "STEP 1: Starting parallel video conversion"
|
||||
echo " Workers: ${NUM_WORKERS}"
|
||||
echo " Input: ${REPO_ID} (root: ${ROOT})"
|
||||
echo " Output: ${OUTPUT_DIR}"
|
||||
echo "=============================================="
|
||||
|
||||
python /admin/home/jade_choghari/lerobot/examples/port_datasets/slurm_convert_to_video.py\
|
||||
--repo-id "${REPO_ID}" \
|
||||
--root "${ROOT}" \
|
||||
--output-dir "${OUTPUT_DIR}" \
|
||||
--output-repo-id "${OUTPUT_REPO_ID}" \
|
||||
--vcodec "${VCODEC}" \
|
||||
--pix-fmt "${PIX_FMT}" \
|
||||
--g ${GOP_SIZE} \
|
||||
--crf ${CRF} \
|
||||
--fast-decode ${FAST_DECODE} \
|
||||
--num-image-workers ${NUM_IMAGE_WORKERS} \
|
||||
--logs-dir "${LOGS_DIR}" \
|
||||
--job-name "convert_video" \
|
||||
--slurm 1 \
|
||||
--workers ${NUM_WORKERS} \
|
||||
--partition "${PARTITION}" \
|
||||
--cpus-per-task ${CPUS_PER_TASK} \
|
||||
--mem-per-cpu "${MEM_PER_CPU}" \
|
||||
--time-limit "${TIME_LIMIT}"
|
||||
|
||||
echo ""
|
||||
echo "✓ Parallel conversion jobs submitted!"
|
||||
echo " Monitor with: squeue -u \$USER"
|
||||
echo " Check logs in: ${LOGS_DIR}/convert_video"
|
||||
echo ""
|
||||
echo "Wait for all jobs to complete before running Step 2."
|
||||
echo "You can check completion with: squeue -u \$USER | grep convert_video"
|
||||
echo ""
|
||||
echo "After all jobs complete, run Step 2 to aggregate shards:"
|
||||
echo " bash convert_to_video_parallel.sh aggregate"
|
||||
|
||||
###############################################################################
|
||||
# STEP 2: Aggregate Shards (run this after Step 1 completes)
|
||||
###############################################################################
|
||||
|
||||
if [ "$1" == "aggregate" ]; then
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo "STEP 2: Aggregating video shards"
|
||||
echo " Shards: ${NUM_WORKERS}"
|
||||
echo " Input: ${OUTPUT_DIR}/shard_XXXX"
|
||||
echo " Output: ${OUTPUT_DIR}_final"
|
||||
echo "=============================================="
|
||||
|
||||
python slurm_aggregate_video_shards.py \
|
||||
--shards-dir "${OUTPUT_DIR}" \
|
||||
--output-dir "${OUTPUT_DIR}_final" \
|
||||
--output-repo-id "${OUTPUT_REPO_ID}" \
|
||||
--num-shards ${NUM_WORKERS} \
|
||||
--logs-dir "${LOGS_DIR}" \
|
||||
--job-name "aggregate_video" \
|
||||
--slurm 1 \
|
||||
--partition "${PARTITION}" \
|
||||
--cpus-per-task 16 \
|
||||
--mem-per-cpu "8G" \
|
||||
--time-limit "08:00:00"
|
||||
|
||||
echo ""
|
||||
echo "✓ Aggregation job submitted!"
|
||||
echo " Monitor with: squeue -u \$USER | grep aggregate_video"
|
||||
echo " Check logs in: ${LOGS_DIR}/aggregate_video"
|
||||
echo ""
|
||||
echo "After completion, your final dataset will be in:"
|
||||
echo " ${OUTPUT_DIR}_final"
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Helpful information
|
||||
###############################################################################
|
||||
|
||||
if [ "$1" != "aggregate" ]; then
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo "WORKFLOW SUMMARY"
|
||||
echo "=============================================="
|
||||
echo ""
|
||||
echo "1. Step 1 is now running - it will:"
|
||||
echo " - Split episodes across ${NUM_WORKERS} workers"
|
||||
echo " - Each worker converts its episodes to video"
|
||||
echo " - Creates shard datasets in ${OUTPUT_DIR}/shard_XXXX"
|
||||
echo ""
|
||||
echo "2. After Step 1 completes, run Step 2:"
|
||||
echo " bash convert_to_video_parallel.sh aggregate"
|
||||
echo ""
|
||||
echo "3. Step 2 will merge all shards into a single dataset"
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,266 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Aggregate video dataset shards into a single dataset.
|
||||
|
||||
After parallel conversion using slurm_convert_to_video.py, this script merges
|
||||
all the shard datasets into one final dataset.
|
||||
|
||||
Example usage:
|
||||
python slurm_aggregate_video_shards.py \
|
||||
--shards-dir /fsx/jade_choghari/libero_video \
|
||||
--output-dir /fsx/jade_choghari/libero_video_final \
|
||||
--output-repo-id lerobot_video \
|
||||
--num-workers 100 \
|
||||
--partition cpu_partition \
|
||||
--cpus-per-task 16
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from datatrove.executor import LocalPipelineExecutor
|
||||
from datatrove.executor.slurm import SlurmPipelineExecutor
|
||||
from datatrove.pipeline.base import PipelineStep
|
||||
|
||||
|
||||
class AggregateVideoShards(PipelineStep):
|
||||
"""Pipeline step that aggregates video dataset shards."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
shards_dir: str | Path,
|
||||
output_dir: str | Path,
|
||||
output_repo_id: str,
|
||||
num_shards: int,
|
||||
):
|
||||
super().__init__()
|
||||
self.shards_dir = Path(shards_dir)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_repo_id = output_repo_id
|
||||
self.num_shards = num_shards
|
||||
|
||||
def run(self, data=None, rank: int = 0, world_size: int = 1):
|
||||
"""Aggregate all shards into a single dataset."""
|
||||
from lerobot.datasets.dataset_tools import merge_datasets
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
init_logging()
|
||||
|
||||
# Only worker 0 performs aggregation
|
||||
if rank != 0:
|
||||
logging.info(f"Worker {rank} skipping - only worker 0 performs aggregation")
|
||||
return
|
||||
|
||||
logging.info(f"Starting aggregation of {self.num_shards} shards")
|
||||
|
||||
# Collect all shard datasets
|
||||
shard_datasets = []
|
||||
for shard_idx in range(self.num_shards):
|
||||
shard_dir = self.shards_dir / f"shard_{shard_idx:04d}"
|
||||
if not shard_dir.exists():
|
||||
logging.warning(f"Shard directory not found: {shard_dir}")
|
||||
continue
|
||||
|
||||
# Find the repo_id for this shard
|
||||
shard_repo_id = f"{self.output_repo_id}_shard_{shard_idx:04d}"
|
||||
try:
|
||||
shard_dataset = LeRobotDataset(shard_repo_id, root=shard_dir)
|
||||
shard_datasets.append(shard_dataset)
|
||||
logging.info(
|
||||
f"Loaded shard {shard_idx}: {shard_dataset.meta.total_episodes} episodes, "
|
||||
f"{shard_dataset.meta.total_frames} frames"
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load shard {shard_idx}: {e}")
|
||||
continue
|
||||
|
||||
if len(shard_datasets) == 0:
|
||||
raise ValueError(f"No valid shards found in {self.shards_dir}")
|
||||
|
||||
logging.info(f"Successfully loaded {len(shard_datasets)} shards, starting merge")
|
||||
|
||||
# Merge all shards
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
merged_dataset = merge_datasets(
|
||||
shard_datasets,
|
||||
output_repo_id=self.output_repo_id,
|
||||
output_dir=self.output_dir,
|
||||
)
|
||||
|
||||
logging.info("✓ Aggregation complete!")
|
||||
logging.info(f"Merged dataset saved to: {self.output_dir}")
|
||||
logging.info(f"Total episodes: {merged_dataset.meta.total_episodes}")
|
||||
logging.info(f"Total frames: {merged_dataset.meta.total_frames}")
|
||||
|
||||
|
||||
def make_aggregate_executor(
|
||||
shards_dir,
|
||||
output_dir,
|
||||
output_repo_id,
|
||||
num_shards,
|
||||
job_name,
|
||||
logs_dir,
|
||||
partition,
|
||||
cpus_per_task,
|
||||
mem_per_cpu,
|
||||
time_limit,
|
||||
slurm=True,
|
||||
):
|
||||
"""Create executor for shard aggregation."""
|
||||
kwargs = {
|
||||
"pipeline": [
|
||||
AggregateVideoShards(
|
||||
shards_dir=shards_dir,
|
||||
output_dir=output_dir,
|
||||
output_repo_id=output_repo_id,
|
||||
num_shards=num_shards,
|
||||
),
|
||||
],
|
||||
"logging_dir": str(logs_dir / job_name),
|
||||
}
|
||||
|
||||
if slurm:
|
||||
# Only need 1 worker for aggregation
|
||||
kwargs.update(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"tasks": 1,
|
||||
"workers": 1,
|
||||
"time": time_limit,
|
||||
"partition": partition,
|
||||
"cpus_per_task": cpus_per_task,
|
||||
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
|
||||
}
|
||||
)
|
||||
executor = SlurmPipelineExecutor(**kwargs)
|
||||
else:
|
||||
kwargs.update(
|
||||
{
|
||||
"tasks": 1,
|
||||
"workers": 1,
|
||||
}
|
||||
)
|
||||
executor = LocalPipelineExecutor(**kwargs)
|
||||
|
||||
return executor
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Aggregate video dataset shards into a single dataset",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--shards-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Directory containing shard_XXXX subdirectories",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Output directory for the aggregated dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-repo-id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Repository ID for the aggregated dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-shards",
|
||||
type=int,
|
||||
required=True,
|
||||
help="Number of shards to aggregate (should match --workers from conversion)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--logs-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to logs directory for datatrove",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--job-name",
|
||||
type=str,
|
||||
default="aggregate_video_shards",
|
||||
help="Job name for SLURM",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--slurm",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Launch over SLURM (1) or locally (0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--partition",
|
||||
type=str,
|
||||
required=True,
|
||||
help="SLURM partition (use CPU partition)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cpus-per-task",
|
||||
type=int,
|
||||
default=16,
|
||||
help="Number of CPUs per task (aggregation can use more CPUs)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-per-cpu",
|
||||
type=str,
|
||||
default="8G",
|
||||
help="Memory per CPU",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time-limit",
|
||||
type=str,
|
||||
default="08:00:00",
|
||||
help="Time limit for SLURM job",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Convert slurm flag to boolean
|
||||
slurm = args.slurm == 1
|
||||
|
||||
# Create and run executor
|
||||
executor = make_aggregate_executor(
|
||||
shards_dir=args.shards_dir,
|
||||
output_dir=args.output_dir,
|
||||
output_repo_id=args.output_repo_id,
|
||||
num_shards=args.num_shards,
|
||||
job_name=args.job_name,
|
||||
logs_dir=args.logs_dir,
|
||||
partition=args.partition,
|
||||
cpus_per_task=args.cpus_per_task,
|
||||
mem_per_cpu=args.mem_per_cpu,
|
||||
time_limit=args.time_limit,
|
||||
slurm=slurm,
|
||||
)
|
||||
|
||||
logging.info("Starting shard aggregation")
|
||||
executor.run()
|
||||
logging.info("Aggregation job submitted/completed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
@@ -0,0 +1,539 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Parallelize video conversion using SLURM and Datatrove.
|
||||
|
||||
This script converts an image-based LeRobot dataset to video format by distributing
|
||||
episodes across multiple workers for parallel processing.
|
||||
|
||||
Example usage:
|
||||
python slurm_convert_to_video.py \
|
||||
--repo-id lerobot \
|
||||
--root /fsx/jade_choghari/libero/ \
|
||||
--output-dir /fsx/jade_choghari/libero_video \
|
||||
--output-repo-id lerobot_video \
|
||||
--workers 100 \
|
||||
--partition cpu_partition \
|
||||
--cpus-per-task 8 \
|
||||
--logs-dir ./logs
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from datatrove.executor import LocalPipelineExecutor
|
||||
from datatrove.executor.slurm import SlurmPipelineExecutor
|
||||
from datatrove.pipeline.base import PipelineStep
|
||||
|
||||
|
||||
class ConvertEpisodesToVideo(PipelineStep):
|
||||
"""Pipeline step that converts episodes to videos in parallel."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_id: str,
|
||||
root: str | None,
|
||||
output_dir: str,
|
||||
output_repo_id: str | None,
|
||||
vcodec: str = "libsvtav1",
|
||||
pix_fmt: str = "yuv420p",
|
||||
g: int = 2,
|
||||
crf: int = 30,
|
||||
fast_decode: int = 0,
|
||||
num_image_workers: int = 4,
|
||||
):
|
||||
super().__init__()
|
||||
self.repo_id = repo_id
|
||||
self.root = root
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_repo_id = output_repo_id or f"{repo_id}_video"
|
||||
self.vcodec = vcodec
|
||||
self.pix_fmt = pix_fmt
|
||||
self.g = g
|
||||
self.crf = crf
|
||||
self.fast_decode = fast_decode
|
||||
self.num_image_workers = num_image_workers
|
||||
|
||||
def run(self, data=None, rank: int = 0, world_size: int = 1):
|
||||
"""Process a shard of episodes."""
|
||||
from datasets.utils.tqdm import disable_progress_bars
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.scripts.lerobot_edit_dataset import (
|
||||
encode_episode_videos,
|
||||
)
|
||||
from lerobot.utils.utils import init_logging
|
||||
import logging
|
||||
init_logging()
|
||||
disable_progress_bars()
|
||||
|
||||
logging.info(f"Worker {rank}/{world_size} starting video conversion")
|
||||
|
||||
# Load source dataset
|
||||
dataset = LeRobotDataset(self.repo_id, root=self.root)
|
||||
total_episodes = dataset.meta.total_episodes
|
||||
|
||||
# Determine which episodes this worker processes
|
||||
episodes_per_worker = total_episodes // world_size
|
||||
remainder = total_episodes % world_size
|
||||
|
||||
if rank < remainder:
|
||||
# First 'remainder' workers get one extra episode
|
||||
start_ep = rank * (episodes_per_worker + 1)
|
||||
end_ep = start_ep + episodes_per_worker + 1
|
||||
else:
|
||||
start_ep = remainder * (episodes_per_worker + 1) + (rank - remainder) * episodes_per_worker
|
||||
end_ep = start_ep + episodes_per_worker
|
||||
|
||||
episode_indices = list(range(start_ep, end_ep))
|
||||
|
||||
logging.info(
|
||||
f"Worker {rank} processing episodes {start_ep} to {end_ep-1} ({len(episode_indices)} episodes)"
|
||||
)
|
||||
|
||||
if len(episode_indices) == 0:
|
||||
logging.info(f"Worker {rank} has no episodes to process")
|
||||
return
|
||||
|
||||
# Create shard-specific output directory
|
||||
import shutil
|
||||
shard_output_dir = self.output_dir / f"shard_{rank:04d}"
|
||||
|
||||
# Remove existing directory to avoid conflicts with LeRobotDatasetMetadata.create
|
||||
if shard_output_dir.exists():
|
||||
logging.warning(f"Shard directory {shard_output_dir} already exists, removing it")
|
||||
shutil.rmtree(shard_output_dir)
|
||||
|
||||
# Import conversion function
|
||||
from lerobot.scripts.lerobot_edit_dataset import convert_dataset_to_videos
|
||||
|
||||
logging.info(
|
||||
f"Worker {rank} converting {len(episode_indices)} episodes with codec {self.vcodec}, CRF {self.crf}"
|
||||
)
|
||||
|
||||
# Convert this shard's episodes with remapped indices
|
||||
# We need to remap episode indices to start from 0 for proper file structure
|
||||
self._convert_shard_to_videos(
|
||||
dataset=dataset,
|
||||
shard_output_dir=shard_output_dir,
|
||||
shard_repo_id=f"{self.output_repo_id}_shard_{rank:04d}",
|
||||
episode_indices=episode_indices,
|
||||
)
|
||||
|
||||
logging.info(f"Worker {rank} completed successfully")
|
||||
|
||||
def _convert_shard_to_videos(
|
||||
self,
|
||||
dataset,
|
||||
shard_output_dir,
|
||||
shard_repo_id,
|
||||
episode_indices,
|
||||
):
|
||||
"""Convert a shard's episodes to videos with proper index remapping."""
|
||||
import shutil
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
|
||||
from lerobot.datasets.utils import write_info, write_stats, write_tasks
|
||||
from lerobot.datasets.video_utils import encode_video_frames, get_video_info
|
||||
from lerobot.scripts.lerobot_edit_dataset import (
|
||||
save_episode_images_for_video,
|
||||
_copy_data_without_images,
|
||||
)
|
||||
from lerobot.utils.constants import OBS_IMAGE
|
||||
|
||||
# Check that it's an image dataset
|
||||
if len(dataset.meta.video_keys) > 0:
|
||||
raise ValueError(
|
||||
f"This operation is for image datasets only. Video dataset provided: {dataset.repo_id}"
|
||||
)
|
||||
|
||||
# Get all image keys
|
||||
hf_dataset = dataset.hf_dataset.with_format(None)
|
||||
img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)]
|
||||
|
||||
if len(img_keys) == 0:
|
||||
raise ValueError(f"No image keys found in dataset {dataset.repo_id}")
|
||||
|
||||
logging.info(f"Converting {len(episode_indices)} episodes with {len(img_keys)} cameras")
|
||||
|
||||
# Create new features dict, converting image features to video features
|
||||
new_features = {}
|
||||
for key, value in dataset.meta.features.items():
|
||||
if key not in img_keys:
|
||||
new_features[key] = value
|
||||
else:
|
||||
# Convert image key to video format
|
||||
new_features[key] = value.copy()
|
||||
new_features[key]["dtype"] = "video"
|
||||
|
||||
# Create new metadata for video dataset
|
||||
new_meta = LeRobotDatasetMetadata.create(
|
||||
repo_id=shard_repo_id,
|
||||
fps=dataset.meta.fps,
|
||||
features=new_features,
|
||||
robot_type=dataset.meta.robot_type,
|
||||
root=shard_output_dir,
|
||||
use_videos=True,
|
||||
chunks_size=dataset.meta.chunks_size,
|
||||
data_files_size_in_mb=dataset.meta.data_files_size_in_mb,
|
||||
video_files_size_in_mb=dataset.meta.video_files_size_in_mb,
|
||||
)
|
||||
|
||||
# Create temporary directory for image extraction
|
||||
temp_dir = shard_output_dir / "temp_images"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process each episode with REMAPPED indices (0, 1, 2, ...)
|
||||
all_episode_metadata = []
|
||||
fps = int(dataset.fps)
|
||||
|
||||
try:
|
||||
for new_ep_idx, orig_ep_idx in enumerate(tqdm(episode_indices, desc="Converting episodes")):
|
||||
# Get episode metadata from source using ORIGINAL index
|
||||
src_episode = dataset.meta.episodes[orig_ep_idx]
|
||||
episode_length = src_episode["length"]
|
||||
episode_duration = episode_length / dataset.fps
|
||||
|
||||
video_metadata = {}
|
||||
|
||||
# Encode videos for each camera
|
||||
for img_key in img_keys:
|
||||
# Save images temporarily using ORIGINAL episode index
|
||||
imgs_dir = temp_dir / f"episode_{orig_ep_idx:06d}" / img_key
|
||||
save_episode_images_for_video(
|
||||
dataset, imgs_dir, img_key, orig_ep_idx, self.num_image_workers
|
||||
)
|
||||
|
||||
# Determine chunk and file indices using NEW (remapped) episode index
|
||||
chunk_idx = new_ep_idx // new_meta.chunks_size
|
||||
file_idx = new_ep_idx % new_meta.chunks_size
|
||||
|
||||
# Create video path in the new dataset structure
|
||||
video_path = new_meta.root / new_meta.video_path.format(
|
||||
video_key=img_key, chunk_index=chunk_idx, file_index=file_idx
|
||||
)
|
||||
video_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Encode video
|
||||
encode_video_frames(
|
||||
imgs_dir=imgs_dir,
|
||||
video_path=video_path,
|
||||
fps=fps,
|
||||
vcodec=self.vcodec,
|
||||
pix_fmt=self.pix_fmt,
|
||||
g=self.g,
|
||||
crf=self.crf,
|
||||
fast_decode=self.fast_decode,
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
# Clean up temporary images
|
||||
shutil.rmtree(imgs_dir)
|
||||
|
||||
# Store video metadata
|
||||
video_metadata[img_key] = {
|
||||
f"videos/{img_key}/chunk_index": chunk_idx,
|
||||
f"videos/{img_key}/file_index": file_idx,
|
||||
f"videos/{img_key}/from_timestamp": 0.0,
|
||||
f"videos/{img_key}/to_timestamp": episode_duration,
|
||||
}
|
||||
|
||||
# Build episode metadata using NEW index
|
||||
episode_meta = {
|
||||
"episode_index": new_ep_idx,
|
||||
"length": episode_length,
|
||||
"dataset_from_index": new_ep_idx * episode_length,
|
||||
"dataset_to_index": (new_ep_idx + 1) * episode_length,
|
||||
}
|
||||
|
||||
# Add video metadata
|
||||
for img_key in img_keys:
|
||||
episode_meta.update(video_metadata[img_key])
|
||||
|
||||
# Add data chunk/file info
|
||||
if "data/chunk_index" in src_episode:
|
||||
episode_meta["data/chunk_index"] = src_episode["data/chunk_index"]
|
||||
episode_meta["data/file_index"] = src_episode["data/file_index"]
|
||||
|
||||
all_episode_metadata.append(episode_meta)
|
||||
|
||||
# Copy and transform data files (removing image columns)
|
||||
_copy_data_without_images(dataset, new_meta, episode_indices, img_keys)
|
||||
|
||||
# Save episode metadata
|
||||
episodes_df = pd.DataFrame(all_episode_metadata)
|
||||
episodes_path = new_meta.root / "meta" / "episodes" / "chunk-000" / "file-000.parquet"
|
||||
episodes_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
episodes_df.to_parquet(episodes_path, index=False)
|
||||
|
||||
# Update metadata info
|
||||
new_meta.info["total_episodes"] = len(episode_indices)
|
||||
new_meta.info["total_frames"] = sum(ep["length"] for ep in all_episode_metadata)
|
||||
new_meta.info["total_tasks"] = dataset.meta.total_tasks
|
||||
new_meta.info["splits"] = {"train": f"0:{len(episode_indices)}"}
|
||||
|
||||
# Update video info for all image keys using the actual first video file
|
||||
for img_key in img_keys:
|
||||
if not new_meta.features[img_key].get("info", None):
|
||||
# Use the first actually created video file
|
||||
chunk_idx = all_episode_metadata[0][f"videos/{img_key}/chunk_index"]
|
||||
file_idx = all_episode_metadata[0][f"videos/{img_key}/file_index"]
|
||||
video_path = new_meta.root / new_meta.video_path.format(
|
||||
video_key=img_key, chunk_index=chunk_idx, file_index=file_idx
|
||||
)
|
||||
new_meta.info["features"][img_key]["info"] = get_video_info(video_path)
|
||||
|
||||
write_info(new_meta.info, new_meta.root)
|
||||
|
||||
# Copy stats and tasks
|
||||
if dataset.meta.stats is not None:
|
||||
# Remove image stats
|
||||
new_stats = {k: v for k, v in dataset.meta.stats.items() if k not in img_keys}
|
||||
write_stats(new_stats, new_meta.root)
|
||||
|
||||
if dataset.meta.tasks is not None:
|
||||
write_tasks(dataset.meta.tasks, new_meta.root)
|
||||
|
||||
finally:
|
||||
# Clean up temporary directory
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
|
||||
def make_convert_executor(
|
||||
repo_id,
|
||||
root,
|
||||
output_dir,
|
||||
output_repo_id,
|
||||
vcodec,
|
||||
pix_fmt,
|
||||
g,
|
||||
crf,
|
||||
fast_decode,
|
||||
num_image_workers,
|
||||
job_name,
|
||||
logs_dir,
|
||||
workers,
|
||||
partition,
|
||||
cpus_per_task,
|
||||
mem_per_cpu,
|
||||
time_limit,
|
||||
slurm=True,
|
||||
):
|
||||
"""Create executor for parallel video conversion."""
|
||||
kwargs = {
|
||||
"pipeline": [
|
||||
ConvertEpisodesToVideo(
|
||||
repo_id=repo_id,
|
||||
root=root,
|
||||
output_dir=output_dir,
|
||||
output_repo_id=output_repo_id,
|
||||
vcodec=vcodec,
|
||||
pix_fmt=pix_fmt,
|
||||
g=g,
|
||||
crf=crf,
|
||||
fast_decode=fast_decode,
|
||||
num_image_workers=num_image_workers,
|
||||
),
|
||||
],
|
||||
"logging_dir": str(logs_dir / job_name),
|
||||
}
|
||||
|
||||
if slurm:
|
||||
kwargs.update(
|
||||
{
|
||||
"job_name": job_name,
|
||||
"tasks": workers, # Number of parallel tasks = number of workers
|
||||
"workers": workers,
|
||||
"time": time_limit,
|
||||
"partition": partition,
|
||||
"cpus_per_task": cpus_per_task,
|
||||
"sbatch_args": {"mem-per-cpu": mem_per_cpu},
|
||||
}
|
||||
)
|
||||
executor = SlurmPipelineExecutor(**kwargs)
|
||||
else:
|
||||
kwargs.update(
|
||||
{
|
||||
"tasks": workers,
|
||||
"workers": 1, # Local mode: sequential
|
||||
}
|
||||
)
|
||||
executor = LocalPipelineExecutor(**kwargs)
|
||||
|
||||
return executor
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parallelize video conversion across SLURM workers",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
|
||||
# Input/Output paths
|
||||
parser.add_argument(
|
||||
"--repo-id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Repository ID of the source image dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--root",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Root directory containing the source dataset (default: HF cache)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Output directory for the video dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-repo-id",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Repository ID for output dataset (default: <repo_id>_video)",
|
||||
)
|
||||
|
||||
# Video encoding parameters
|
||||
parser.add_argument(
|
||||
"--vcodec",
|
||||
type=str,
|
||||
default="libsvtav1",
|
||||
help="Video codec",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pix-fmt",
|
||||
type=str,
|
||||
default="yuv420p",
|
||||
help="Pixel format",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--g",
|
||||
type=int,
|
||||
default=2,
|
||||
help="GOP size (group of pictures)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--crf",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Constant rate factor (quality)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fast-decode",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Fast decode tuning",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num-image-workers",
|
||||
type=int,
|
||||
default=4,
|
||||
help="Number of threads per worker for saving images",
|
||||
)
|
||||
|
||||
# SLURM parameters
|
||||
parser.add_argument(
|
||||
"--logs-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to logs directory for datatrove",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--job-name",
|
||||
type=str,
|
||||
default="convert_to_video",
|
||||
help="Job name for SLURM",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--slurm",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Launch over SLURM (1) or locally (0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Number of parallel workers (each processes different episodes)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--partition",
|
||||
type=str,
|
||||
required=True,
|
||||
help="SLURM partition (use CPU partition)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cpus-per-task",
|
||||
type=int,
|
||||
default=8,
|
||||
help="Number of CPUs per SLURM task",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-per-cpu",
|
||||
type=str,
|
||||
default="4G",
|
||||
help="Memory per CPU",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time-limit",
|
||||
type=str,
|
||||
default="08:00:00",
|
||||
help="Time limit for SLURM job",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Convert slurm flag to boolean
|
||||
slurm = args.slurm == 1
|
||||
|
||||
# Create and run executor
|
||||
executor = make_convert_executor(
|
||||
repo_id=args.repo_id,
|
||||
root=args.root,
|
||||
output_dir=args.output_dir,
|
||||
output_repo_id=args.output_repo_id,
|
||||
vcodec=args.vcodec,
|
||||
pix_fmt=args.pix_fmt,
|
||||
g=args.g,
|
||||
crf=args.crf,
|
||||
fast_decode=args.fast_decode,
|
||||
num_image_workers=args.num_image_workers,
|
||||
job_name=args.job_name,
|
||||
logs_dir=args.logs_dir,
|
||||
workers=args.workers,
|
||||
partition=args.partition,
|
||||
cpus_per_task=args.cpus_per_task,
|
||||
mem_per_cpu=args.mem_per_cpu,
|
||||
time_limit=args.time_limit,
|
||||
slurm=slurm,
|
||||
)
|
||||
|
||||
logging.info(f"Starting video conversion with {args.workers} workers")
|
||||
executor.run()
|
||||
logging.info("All workers submitted/completed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
==============================================
|
||||
STEP 1: Starting parallel video conversion
|
||||
Workers: 2
|
||||
Input: lerobot (root: /fsx/jade_choghari/vlabench-primitive/)
|
||||
Output: /fsx/jade_choghari/vlabench-primitive-encoded
|
||||
==============================================
|
||||
python: can't open file '/admin/home/jade_choghari/lerobot/slurm_convert_to_video.py': [Errno 2] No such file or directory
|
||||
Reference in New Issue
Block a user