add video encoding tool

This commit is contained in:
Jade Choghari
2025-12-01 13:46:22 +01:00
parent 5f7b5f2817
commit d22fa6446b
2 changed files with 307 additions and 5 deletions
+49 -1
View File
@@ -11,13 +11,14 @@ LeRobot provides several utilities for manipulating datasets:
3. **Merge Datasets** - Combine multiple datasets into one. The datasets must have identical features, and episodes are concatenated in the order specified in `repo_ids` 3. **Merge Datasets** - Combine multiple datasets into one. The datasets must have identical features, and episodes are concatenated in the order specified in `repo_ids`
4. **Add Features** - Add new features to a dataset 4. **Add Features** - Add new features to a dataset
5. **Remove Features** - Remove features from a dataset 5. **Remove Features** - Remove features from a dataset
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
The core implementation is in `lerobot.datasets.dataset_tools`. The core implementation is in `lerobot.datasets.dataset_tools`.
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`. An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
## Command-Line Tool: lerobot-edit-dataset ## Command-Line Tool: lerobot-edit-dataset
`lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, and remove features. `lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, remove features, and convert image datasets to video format.
Run `lerobot-edit-dataset --help` for more information on the configuration of each operation. Run `lerobot-edit-dataset --help` for more information on the configuration of each operation.
@@ -86,6 +87,53 @@ lerobot-edit-dataset \
--operation.feature_names "['observation.images.top']" --operation.feature_names "['observation.images.top']"
``` ```
#### Convert to Video
Convert an image-based dataset to video format. This is useful for reducing storage requirements and improving data loading performance. Videos are encoded with configurable quality settings.
```bash
# Convert all episodes to video format with default settings
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos
# Convert with custom video codec and quality settings
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.vcodec libsvtav1 \
--operation.pix_fmt yuv420p \
--operation.g 2 \
--operation.crf 30
# Convert only specific episodes
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.episode_indices "[0, 1, 2, 5, 10]"
# Convert with multiple workers for parallel processing
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.num_workers 8
```
**Parameters:**
- `output_dir`: Directory where videos will be saved (default: `outputs/converted_videos`)
- `vcodec`: Video codec to use - options: `h264`, `hevc`, `libsvtav1` (default: `libsvtav1`)
- `pix_fmt`: Pixel format - options: `yuv420p`, `yuv444p` (default: `yuv420p`)
- `g`: Group of pictures (GOP) size - lower values give better quality but larger files (default: 2)
- `crf`: Constant rate factor - lower values give better quality but larger files, 0 is lossless (default: 30)
- `fast_decode`: Fast decode tuning option (default: 0)
- `episode_indices`: List of specific episodes to convert (default: all episodes)
- `num_workers`: Number of parallel workers for processing (default: 4)
- `overwrite`: Overwrite existing video files if they exist
### Push to Hub ### Push to Hub
Add the `--push_to_hub` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub: Add the `--push_to_hub` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
+258 -4
View File
@@ -18,7 +18,8 @@
Edit LeRobot datasets using various transformation tools. Edit LeRobot datasets using various transformation tools.
This script allows you to delete episodes, split datasets, merge datasets, This script allows you to delete episodes, split datasets, merge datasets,
and remove features. When new_repo_id is specified, creates a new dataset. remove features, and convert image datasets to video format.
When new_repo_id is specified, creates a new dataset.
Usage Examples: Usage Examples:
@@ -65,6 +66,14 @@ Remove camera feature:
--operation.type remove_feature \ --operation.type remove_feature \
--operation.feature_names "['observation.images.top']" --operation.feature_names "['observation.images.top']"
Convert image dataset to video format:
python -m lerobot.scripts.lerobot_edit_dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.vcodec libsvtav1 \
--operation.crf 30
Using JSON config file: Using JSON config file:
python -m lerobot.scripts.lerobot_edit_dataset \ python -m lerobot.scripts.lerobot_edit_dataset \
--config_path path/to/edit_config.json --config_path path/to/edit_config.json
@@ -72,9 +81,12 @@ Using JSON config file:
import logging import logging
import shutil import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from tqdm import tqdm
from lerobot.configs import parser from lerobot.configs import parser
from lerobot.datasets.dataset_tools import ( from lerobot.datasets.dataset_tools import (
delete_episodes, delete_episodes,
@@ -83,7 +95,8 @@ from lerobot.datasets.dataset_tools import (
split_dataset, split_dataset,
) )
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.constants import HF_LEROBOT_HOME from lerobot.datasets.video_utils import encode_video_frames
from lerobot.utils.constants import HF_LEROBOT_HOME, OBS_IMAGE
from lerobot.utils.utils import init_logging from lerobot.utils.utils import init_logging
@@ -111,10 +124,24 @@ class RemoveFeatureConfig:
feature_names: list[str] | None = None feature_names: list[str] | None = None
@dataclass
class ConvertToVideoConfig:
type: str = "convert_to_video"
output_dir: str = "outputs/converted_videos"
vcodec: str = "libsvtav1"
pix_fmt: str = "yuv420p"
g: int = 2
crf: int = 30
fast_decode: int = 0
episode_indices: list[int] | None = None
num_workers: int = 4
overwrite: bool = False
@dataclass @dataclass
class EditDatasetConfig: class EditDatasetConfig:
repo_id: str repo_id: str
operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig | ConvertToVideoConfig
root: str | None = None root: str | None = None
new_repo_id: str | None = None new_repo_id: str | None = None
push_to_hub: bool = False push_to_hub: bool = False
@@ -258,6 +285,231 @@ def handle_remove_feature(cfg: EditDatasetConfig) -> None:
LeRobotDataset(output_repo_id, root=output_dir).push_to_hub() LeRobotDataset(output_repo_id, root=output_dir).push_to_hub()
def save_episode_images(
dataset: LeRobotDataset,
imgs_dir: Path,
episode_index: int = 0,
overwrite: bool = False,
num_workers: int = 4,
) -> None:
"""Save images from a specific episode to disk.
Args:
dataset: The LeRobot dataset to extract images from
imgs_dir: Directory to save images to
episode_index: Index of the episode to save (default: 0)
overwrite: Whether to overwrite existing images
num_workers: Number of threads for parallel image saving (default: 4)
"""
ep_num_images = dataset.meta.episodes["length"][episode_index]
# Check if images already exist
if not overwrite and imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images:
logging.info(f"Images for episode {episode_index} already exist in {imgs_dir}. Skipping.")
return
# Create directory
imgs_dir.mkdir(parents=True, exist_ok=True)
# Get dataset without torch format for PIL image access
hf_dataset = dataset.hf_dataset.with_format(None)
# Get all image keys (for all cameras)
img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)]
if len(img_keys) == 0:
raise ValueError(f"No image keys found in dataset {dataset.repo_id}")
# Use first camera only
img_key = img_keys[0]
imgs_dataset = hf_dataset.select_columns(img_key)
# Get episode start and end indices
from_idx = dataset.meta.episodes["dataset_from_index"][episode_index]
to_idx = dataset.meta.episodes["dataset_to_index"][episode_index]
# Get all items for this episode
episode_dataset = imgs_dataset.select(range(from_idx, to_idx))
# Define function to save a single image
def save_single_image(i_item_tuple):
i, item = i_item_tuple
img = item[img_key]
# Use frame-XXXXXX.png format to match encode_video_frames expectations
img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100)
return i
# Save images with proper naming convention for encode_video_frames (frame-XXXXXX.png)
# Use ThreadPoolExecutor for parallel processing
items = list(enumerate(episode_dataset))
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [executor.submit(save_single_image, item) for item in items]
for future in tqdm(
as_completed(futures),
total=len(items),
desc=f"Saving {dataset.repo_id} episode {episode_index} images",
leave=False,
):
future.result() # This will raise any exceptions that occurred
def process_single_episode(
dataset: LeRobotDataset,
episode_index: int,
output_dir: Path,
vcodec: str,
pix_fmt: str,
g: int | None,
crf: int | None,
fast_decode: int,
fps: int,
num_image_workers: int,
overwrite: bool,
) -> str:
"""Process a single episode: save images and encode to video.
Args:
dataset: The LeRobot dataset
episode_index: Index of the episode to process
output_dir: Base directory for outputs
vcodec: Video codec
pix_fmt: Pixel format
g: Group of pictures size
crf: Constant rate factor
fast_decode: Fast decode tuning
fps: Frames per second
num_image_workers: Number of threads for parallel image saving
overwrite: Whether to overwrite existing files
Returns:
Status message for this episode
"""
# Create paths
imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") / f"episode_{episode_index:06d}"
# Create video filename with encoding parameters
video_filename = f"{dataset.repo_id.replace('/', '_')}_ep{episode_index:06d}_{vcodec}_{pix_fmt}_g{g}_crf{crf}.mp4"
video_path = output_dir / "videos" / dataset.repo_id.replace("/", "_") / video_filename
# Save episode images
save_episode_images(dataset, imgs_dir, episode_index, overwrite, num_image_workers)
# Encode to video
if overwrite or not video_path.is_file():
video_path.parent.mkdir(parents=True, exist_ok=True)
encode_video_frames(
imgs_dir=imgs_dir,
video_path=video_path,
fps=fps,
vcodec=vcodec,
pix_fmt=pix_fmt,
g=g,
crf=crf,
fast_decode=fast_decode,
overwrite=True,
)
return f"✓ Video saved to {video_path}"
else:
return f"Video already exists: {video_path}. Skipping."
def convert_dataset_to_videos(
dataset: LeRobotDataset,
output_dir: Path,
vcodec: str = "libsvtav1",
pix_fmt: str = "yuv420p",
g: int = 2,
crf: int = 30,
fast_decode: int = 0,
episode_indices: list[int] | None = None,
num_workers: int = 4,
overwrite: bool = False,
) -> None:
"""Convert dataset images to video files.
Args:
dataset: The LeRobot dataset
output_dir: Base directory for outputs
vcodec: Video codec (default: libsvtav1)
pix_fmt: Pixel format (default: yuv420p)
g: Group of pictures size (default: 2)
crf: Constant rate factor (default: 30)
fast_decode: Fast decode tuning (default: 0)
episode_indices: List of episode indices to convert (None = all episodes)
num_workers: Number of threads for parallel episode processing (default: 4)
overwrite: Whether to overwrite existing files
"""
# Check that it's an image dataset
if len(dataset.meta.video_keys) > 0:
raise ValueError(
f"This operation is for image datasets only. Video dataset provided: {dataset.repo_id}"
)
fps = dataset.fps
# Determine which episodes to process
num_episodes = len(dataset.meta.episodes)
if episode_indices is None:
episode_indices = list(range(num_episodes))
logging.info(f"Processing {len(episode_indices)} episodes from {dataset.repo_id} with {num_workers} workers")
# Process episodes in parallel
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [
executor.submit(
process_single_episode,
dataset=dataset,
episode_index=episode_index,
output_dir=output_dir,
vcodec=vcodec,
pix_fmt=pix_fmt,
g=g,
crf=crf,
fast_decode=fast_decode,
fps=fps,
num_image_workers=4, # Use fixed workers for image saving within each episode
overwrite=overwrite,
)
for episode_index in episode_indices
]
for future in tqdm(
as_completed(futures),
total=len(episode_indices),
desc="Episodes",
):
result = future.result() # This will raise any exceptions that occurred
logging.info(result)
logging.info(f"\n✓ Completed processing {dataset.repo_id}")
def handle_convert_to_video(cfg: EditDatasetConfig) -> None:
if not isinstance(cfg.operation, ConvertToVideoConfig):
raise ValueError("Operation config must be ConvertToVideoConfig")
dataset = LeRobotDataset(cfg.repo_id, root=cfg.root)
output_dir = Path(cfg.operation.output_dir)
logging.info(f"Converting dataset {cfg.repo_id} to video format")
convert_dataset_to_videos(
dataset=dataset,
output_dir=output_dir,
vcodec=cfg.operation.vcodec,
pix_fmt=cfg.operation.pix_fmt,
g=cfg.operation.g,
crf=cfg.operation.crf,
fast_decode=cfg.operation.fast_decode,
episode_indices=cfg.operation.episode_indices,
num_workers=cfg.operation.num_workers,
overwrite=cfg.operation.overwrite,
)
@parser.wrap() @parser.wrap()
def edit_dataset(cfg: EditDatasetConfig) -> None: def edit_dataset(cfg: EditDatasetConfig) -> None:
operation_type = cfg.operation.type operation_type = cfg.operation.type
@@ -270,10 +522,12 @@ def edit_dataset(cfg: EditDatasetConfig) -> None:
handle_merge(cfg) handle_merge(cfg)
elif operation_type == "remove_feature": elif operation_type == "remove_feature":
handle_remove_feature(cfg) handle_remove_feature(cfg)
elif operation_type == "convert_to_video":
handle_convert_to_video(cfg)
else: else:
raise ValueError( raise ValueError(
f"Unknown operation type: {operation_type}\n" f"Unknown operation type: {operation_type}\n"
f"Available operations: delete_episodes, split, merge, remove_feature" f"Available operations: delete_episodes, split, merge, remove_feature, convert_to_video"
) )