mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
add video encoding tool
This commit is contained in:
@@ -11,13 +11,14 @@ LeRobot provides several utilities for manipulating datasets:
|
||||
3. **Merge Datasets** - Combine multiple datasets into one. The datasets must have identical features, and episodes are concatenated in the order specified in `repo_ids`
|
||||
4. **Add Features** - Add new features to a dataset
|
||||
5. **Remove Features** - Remove features from a dataset
|
||||
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage
|
||||
|
||||
The core implementation is in `lerobot.datasets.dataset_tools`.
|
||||
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.
|
||||
|
||||
## Command-Line Tool: lerobot-edit-dataset
|
||||
|
||||
`lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, and remove features.
|
||||
`lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, remove features, and convert image datasets to video format.
|
||||
|
||||
Run `lerobot-edit-dataset --help` for more information on the configuration of each operation.
|
||||
|
||||
@@ -86,6 +87,53 @@ lerobot-edit-dataset \
|
||||
--operation.feature_names "['observation.images.top']"
|
||||
```
|
||||
|
||||
#### Convert to Video
|
||||
|
||||
Convert an image-based dataset to video format. This is useful for reducing storage requirements and improving data loading performance. Videos are encoded with configurable quality settings.
|
||||
|
||||
```bash
|
||||
# Convert all episodes to video format with default settings
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/converted_videos
|
||||
|
||||
# Convert with custom video codec and quality settings
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/converted_videos \
|
||||
--operation.vcodec libsvtav1 \
|
||||
--operation.pix_fmt yuv420p \
|
||||
--operation.g 2 \
|
||||
--operation.crf 30
|
||||
|
||||
# Convert only specific episodes
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/converted_videos \
|
||||
--operation.episode_indices "[0, 1, 2, 5, 10]"
|
||||
|
||||
# Convert with multiple workers for parallel processing
|
||||
lerobot-edit-dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/converted_videos \
|
||||
--operation.num_workers 8
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `output_dir`: Directory where videos will be saved (default: `outputs/converted_videos`)
|
||||
- `vcodec`: Video codec to use - options: `h264`, `hevc`, `libsvtav1` (default: `libsvtav1`)
|
||||
- `pix_fmt`: Pixel format - options: `yuv420p`, `yuv444p` (default: `yuv420p`)
|
||||
- `g`: Group of pictures (GOP) size - lower values give better quality but larger files (default: 2)
|
||||
- `crf`: Constant rate factor - lower values give better quality but larger files, 0 is lossless (default: 30)
|
||||
- `fast_decode`: Fast decode tuning option (default: 0)
|
||||
- `episode_indices`: List of specific episodes to convert (default: all episodes)
|
||||
- `num_workers`: Number of parallel workers for processing (default: 4)
|
||||
- `overwrite`: Overwrite existing video files if they exist
|
||||
|
||||
### Push to Hub
|
||||
|
||||
Add the `--push_to_hub` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
|
||||
|
||||
@@ -18,7 +18,8 @@
|
||||
Edit LeRobot datasets using various transformation tools.
|
||||
|
||||
This script allows you to delete episodes, split datasets, merge datasets,
|
||||
and remove features. When new_repo_id is specified, creates a new dataset.
|
||||
remove features, and convert image datasets to video format.
|
||||
When new_repo_id is specified, creates a new dataset.
|
||||
|
||||
Usage Examples:
|
||||
|
||||
@@ -65,6 +66,14 @@ Remove camera feature:
|
||||
--operation.type remove_feature \
|
||||
--operation.feature_names "['observation.images.top']"
|
||||
|
||||
Convert image dataset to video format:
|
||||
python -m lerobot.scripts.lerobot_edit_dataset \
|
||||
--repo_id lerobot/pusht_image \
|
||||
--operation.type convert_to_video \
|
||||
--operation.output_dir outputs/converted_videos \
|
||||
--operation.vcodec libsvtav1 \
|
||||
--operation.crf 30
|
||||
|
||||
Using JSON config file:
|
||||
python -m lerobot.scripts.lerobot_edit_dataset \
|
||||
--config_path path/to/edit_config.json
|
||||
@@ -72,9 +81,12 @@ Using JSON config file:
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from lerobot.configs import parser
|
||||
from lerobot.datasets.dataset_tools import (
|
||||
delete_episodes,
|
||||
@@ -83,7 +95,8 @@ from lerobot.datasets.dataset_tools import (
|
||||
split_dataset,
|
||||
)
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.utils.constants import HF_LEROBOT_HOME
|
||||
from lerobot.datasets.video_utils import encode_video_frames
|
||||
from lerobot.utils.constants import HF_LEROBOT_HOME, OBS_IMAGE
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
|
||||
@@ -111,10 +124,24 @@ class RemoveFeatureConfig:
|
||||
feature_names: list[str] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConvertToVideoConfig:
|
||||
type: str = "convert_to_video"
|
||||
output_dir: str = "outputs/converted_videos"
|
||||
vcodec: str = "libsvtav1"
|
||||
pix_fmt: str = "yuv420p"
|
||||
g: int = 2
|
||||
crf: int = 30
|
||||
fast_decode: int = 0
|
||||
episode_indices: list[int] | None = None
|
||||
num_workers: int = 4
|
||||
overwrite: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class EditDatasetConfig:
|
||||
repo_id: str
|
||||
operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig
|
||||
operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig | ConvertToVideoConfig
|
||||
root: str | None = None
|
||||
new_repo_id: str | None = None
|
||||
push_to_hub: bool = False
|
||||
@@ -258,6 +285,231 @@ def handle_remove_feature(cfg: EditDatasetConfig) -> None:
|
||||
LeRobotDataset(output_repo_id, root=output_dir).push_to_hub()
|
||||
|
||||
|
||||
def save_episode_images(
|
||||
dataset: LeRobotDataset,
|
||||
imgs_dir: Path,
|
||||
episode_index: int = 0,
|
||||
overwrite: bool = False,
|
||||
num_workers: int = 4,
|
||||
) -> None:
|
||||
"""Save images from a specific episode to disk.
|
||||
|
||||
Args:
|
||||
dataset: The LeRobot dataset to extract images from
|
||||
imgs_dir: Directory to save images to
|
||||
episode_index: Index of the episode to save (default: 0)
|
||||
overwrite: Whether to overwrite existing images
|
||||
num_workers: Number of threads for parallel image saving (default: 4)
|
||||
"""
|
||||
ep_num_images = dataset.meta.episodes["length"][episode_index]
|
||||
|
||||
# Check if images already exist
|
||||
if not overwrite and imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images:
|
||||
logging.info(f"Images for episode {episode_index} already exist in {imgs_dir}. Skipping.")
|
||||
return
|
||||
|
||||
# Create directory
|
||||
imgs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get dataset without torch format for PIL image access
|
||||
hf_dataset = dataset.hf_dataset.with_format(None)
|
||||
|
||||
# Get all image keys (for all cameras)
|
||||
img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)]
|
||||
|
||||
if len(img_keys) == 0:
|
||||
raise ValueError(f"No image keys found in dataset {dataset.repo_id}")
|
||||
|
||||
# Use first camera only
|
||||
img_key = img_keys[0]
|
||||
imgs_dataset = hf_dataset.select_columns(img_key)
|
||||
|
||||
# Get episode start and end indices
|
||||
from_idx = dataset.meta.episodes["dataset_from_index"][episode_index]
|
||||
to_idx = dataset.meta.episodes["dataset_to_index"][episode_index]
|
||||
|
||||
# Get all items for this episode
|
||||
episode_dataset = imgs_dataset.select(range(from_idx, to_idx))
|
||||
|
||||
# Define function to save a single image
|
||||
def save_single_image(i_item_tuple):
|
||||
i, item = i_item_tuple
|
||||
img = item[img_key]
|
||||
# Use frame-XXXXXX.png format to match encode_video_frames expectations
|
||||
img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100)
|
||||
return i
|
||||
|
||||
# Save images with proper naming convention for encode_video_frames (frame-XXXXXX.png)
|
||||
# Use ThreadPoolExecutor for parallel processing
|
||||
items = list(enumerate(episode_dataset))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||
futures = [executor.submit(save_single_image, item) for item in items]
|
||||
for future in tqdm(
|
||||
as_completed(futures),
|
||||
total=len(items),
|
||||
desc=f"Saving {dataset.repo_id} episode {episode_index} images",
|
||||
leave=False,
|
||||
):
|
||||
future.result() # This will raise any exceptions that occurred
|
||||
|
||||
|
||||
def process_single_episode(
|
||||
dataset: LeRobotDataset,
|
||||
episode_index: int,
|
||||
output_dir: Path,
|
||||
vcodec: str,
|
||||
pix_fmt: str,
|
||||
g: int | None,
|
||||
crf: int | None,
|
||||
fast_decode: int,
|
||||
fps: int,
|
||||
num_image_workers: int,
|
||||
overwrite: bool,
|
||||
) -> str:
|
||||
"""Process a single episode: save images and encode to video.
|
||||
|
||||
Args:
|
||||
dataset: The LeRobot dataset
|
||||
episode_index: Index of the episode to process
|
||||
output_dir: Base directory for outputs
|
||||
vcodec: Video codec
|
||||
pix_fmt: Pixel format
|
||||
g: Group of pictures size
|
||||
crf: Constant rate factor
|
||||
fast_decode: Fast decode tuning
|
||||
fps: Frames per second
|
||||
num_image_workers: Number of threads for parallel image saving
|
||||
overwrite: Whether to overwrite existing files
|
||||
|
||||
Returns:
|
||||
Status message for this episode
|
||||
"""
|
||||
# Create paths
|
||||
imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") / f"episode_{episode_index:06d}"
|
||||
|
||||
# Create video filename with encoding parameters
|
||||
video_filename = f"{dataset.repo_id.replace('/', '_')}_ep{episode_index:06d}_{vcodec}_{pix_fmt}_g{g}_crf{crf}.mp4"
|
||||
video_path = output_dir / "videos" / dataset.repo_id.replace("/", "_") / video_filename
|
||||
|
||||
# Save episode images
|
||||
save_episode_images(dataset, imgs_dir, episode_index, overwrite, num_image_workers)
|
||||
|
||||
# Encode to video
|
||||
if overwrite or not video_path.is_file():
|
||||
video_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
encode_video_frames(
|
||||
imgs_dir=imgs_dir,
|
||||
video_path=video_path,
|
||||
fps=fps,
|
||||
vcodec=vcodec,
|
||||
pix_fmt=pix_fmt,
|
||||
g=g,
|
||||
crf=crf,
|
||||
fast_decode=fast_decode,
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
return f"✓ Video saved to {video_path}"
|
||||
else:
|
||||
return f"Video already exists: {video_path}. Skipping."
|
||||
|
||||
|
||||
def convert_dataset_to_videos(
|
||||
dataset: LeRobotDataset,
|
||||
output_dir: Path,
|
||||
vcodec: str = "libsvtav1",
|
||||
pix_fmt: str = "yuv420p",
|
||||
g: int = 2,
|
||||
crf: int = 30,
|
||||
fast_decode: int = 0,
|
||||
episode_indices: list[int] | None = None,
|
||||
num_workers: int = 4,
|
||||
overwrite: bool = False,
|
||||
) -> None:
|
||||
"""Convert dataset images to video files.
|
||||
|
||||
Args:
|
||||
dataset: The LeRobot dataset
|
||||
output_dir: Base directory for outputs
|
||||
vcodec: Video codec (default: libsvtav1)
|
||||
pix_fmt: Pixel format (default: yuv420p)
|
||||
g: Group of pictures size (default: 2)
|
||||
crf: Constant rate factor (default: 30)
|
||||
fast_decode: Fast decode tuning (default: 0)
|
||||
episode_indices: List of episode indices to convert (None = all episodes)
|
||||
num_workers: Number of threads for parallel episode processing (default: 4)
|
||||
overwrite: Whether to overwrite existing files
|
||||
"""
|
||||
# Check that it's an image dataset
|
||||
if len(dataset.meta.video_keys) > 0:
|
||||
raise ValueError(
|
||||
f"This operation is for image datasets only. Video dataset provided: {dataset.repo_id}"
|
||||
)
|
||||
|
||||
fps = dataset.fps
|
||||
|
||||
# Determine which episodes to process
|
||||
num_episodes = len(dataset.meta.episodes)
|
||||
if episode_indices is None:
|
||||
episode_indices = list(range(num_episodes))
|
||||
|
||||
logging.info(f"Processing {len(episode_indices)} episodes from {dataset.repo_id} with {num_workers} workers")
|
||||
|
||||
# Process episodes in parallel
|
||||
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||
futures = [
|
||||
executor.submit(
|
||||
process_single_episode,
|
||||
dataset=dataset,
|
||||
episode_index=episode_index,
|
||||
output_dir=output_dir,
|
||||
vcodec=vcodec,
|
||||
pix_fmt=pix_fmt,
|
||||
g=g,
|
||||
crf=crf,
|
||||
fast_decode=fast_decode,
|
||||
fps=fps,
|
||||
num_image_workers=4, # Use fixed workers for image saving within each episode
|
||||
overwrite=overwrite,
|
||||
)
|
||||
for episode_index in episode_indices
|
||||
]
|
||||
|
||||
for future in tqdm(
|
||||
as_completed(futures),
|
||||
total=len(episode_indices),
|
||||
desc="Episodes",
|
||||
):
|
||||
result = future.result() # This will raise any exceptions that occurred
|
||||
logging.info(result)
|
||||
|
||||
logging.info(f"\n✓ Completed processing {dataset.repo_id}")
|
||||
|
||||
|
||||
def handle_convert_to_video(cfg: EditDatasetConfig) -> None:
|
||||
if not isinstance(cfg.operation, ConvertToVideoConfig):
|
||||
raise ValueError("Operation config must be ConvertToVideoConfig")
|
||||
|
||||
dataset = LeRobotDataset(cfg.repo_id, root=cfg.root)
|
||||
output_dir = Path(cfg.operation.output_dir)
|
||||
|
||||
logging.info(f"Converting dataset {cfg.repo_id} to video format")
|
||||
convert_dataset_to_videos(
|
||||
dataset=dataset,
|
||||
output_dir=output_dir,
|
||||
vcodec=cfg.operation.vcodec,
|
||||
pix_fmt=cfg.operation.pix_fmt,
|
||||
g=cfg.operation.g,
|
||||
crf=cfg.operation.crf,
|
||||
fast_decode=cfg.operation.fast_decode,
|
||||
episode_indices=cfg.operation.episode_indices,
|
||||
num_workers=cfg.operation.num_workers,
|
||||
overwrite=cfg.operation.overwrite,
|
||||
)
|
||||
|
||||
|
||||
@parser.wrap()
|
||||
def edit_dataset(cfg: EditDatasetConfig) -> None:
|
||||
operation_type = cfg.operation.type
|
||||
@@ -270,10 +522,12 @@ def edit_dataset(cfg: EditDatasetConfig) -> None:
|
||||
handle_merge(cfg)
|
||||
elif operation_type == "remove_feature":
|
||||
handle_remove_feature(cfg)
|
||||
elif operation_type == "convert_to_video":
|
||||
handle_convert_to_video(cfg)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown operation type: {operation_type}\n"
|
||||
f"Available operations: delete_episodes, split, merge, remove_feature"
|
||||
f"Available operations: delete_episodes, split, merge, remove_feature, convert_to_video"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user