diff --git a/docs/source/using_dataset_tools.mdx b/docs/source/using_dataset_tools.mdx index 10cf24841..22c28828c 100644 --- a/docs/source/using_dataset_tools.mdx +++ b/docs/source/using_dataset_tools.mdx @@ -89,20 +89,33 @@ lerobot-edit-dataset \ #### Convert to Video -Convert an image-based dataset to video format. This is useful for reducing storage requirements and improving data loading performance. Videos are encoded with configurable quality settings. +Convert an image-based dataset to video format, creating a new LeRobotDataset where images are stored as videos. This is useful for reducing storage requirements and improving data loading performance. The new dataset will have the exact same structure as the original, but with images encoded as MP4 videos in the proper LeRobot format. ```bash -# Convert all episodes to video format with default settings +# Local-only: Save to a custom output directory (no hub push) lerobot-edit-dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_to_video \ - --operation.output_dir outputs/converted_videos + --operation.output_dir /path/to/output/pusht_video + +# Save with new repo_id (local storage) +lerobot-edit-dataset \ + --repo_id lerobot/pusht_image \ + --new_repo_id lerobot/pusht_video \ + --operation.type convert_to_video + +# Convert and push to Hugging Face Hub +lerobot-edit-dataset \ + --repo_id lerobot/pusht_image \ + --new_repo_id lerobot/pusht_video \ + --operation.type convert_to_video \ + --push_to_hub true # Convert with custom video codec and quality settings lerobot-edit-dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_to_video \ - --operation.output_dir outputs/converted_videos \ + --operation.output_dir outputs/pusht_video \ --operation.vcodec libsvtav1 \ --operation.pix_fmt yuv420p \ --operation.g 2 \ @@ -112,20 +125,20 @@ lerobot-edit-dataset \ lerobot-edit-dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_to_video \ - --operation.output_dir outputs/converted_videos \ + --operation.output_dir outputs/pusht_video \ --operation.episode_indices "[0, 1, 2, 5, 10]" # Convert with multiple workers for parallel processing lerobot-edit-dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_to_video \ - --operation.output_dir outputs/converted_videos \ + --operation.output_dir outputs/pusht_video \ --operation.num_workers 8 ``` **Parameters:** -- `output_dir`: Directory where videos will be saved (default: `outputs/converted_videos`) +- `output_dir`: Custom output directory (optional - by default uses `new_repo_id` or `{repo_id}_video`) - `vcodec`: Video codec to use - options: `h264`, `hevc`, `libsvtav1` (default: `libsvtav1`) - `pix_fmt`: Pixel format - options: `yuv420p`, `yuv444p` (default: `yuv420p`) - `g`: Group of pictures (GOP) size - lower values give better quality but larger files (default: 2) @@ -133,11 +146,12 @@ lerobot-edit-dataset \ - `fast_decode`: Fast decode tuning option (default: 0) - `episode_indices`: List of specific episodes to convert (default: all episodes) - `num_workers`: Number of parallel workers for processing (default: 4) -- `overwrite`: Overwrite existing video files if they exist + +**Note:** The resulting dataset will be a proper LeRobotDataset with all cameras encoded as videos in the `videos/` directory, with parquet files containing only metadata (no raw image data). All episodes, stats, and tasks are preserved. ### Push to Hub -Add the `--push_to_hub` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub: +Add the `--push_to_hub true` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub: ```bash lerobot-edit-dataset \ @@ -145,7 +159,7 @@ lerobot-edit-dataset \ --new_repo_id lerobot/pusht_after_deletion \ --operation.type delete_episodes \ --operation.episode_indices "[0, 2, 5]" \ - --push_to_hub + --push_to_hub true ``` There is also a tool for adding features to a dataset that is not yet covered in `lerobot-edit-dataset`. diff --git a/src/lerobot/scripts/lerobot_edit_dataset.py b/src/lerobot/scripts/lerobot_edit_dataset.py index f929dff6d..7c58389ac 100644 --- a/src/lerobot/scripts/lerobot_edit_dataset.py +++ b/src/lerobot/scripts/lerobot_edit_dataset.py @@ -66,13 +66,24 @@ Remove camera feature: --operation.type remove_feature \ --operation.feature_names "['observation.images.top']" -Convert image dataset to video format: +Convert image dataset to video format (saves locally): python -m lerobot.scripts.lerobot_edit_dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_to_video \ - --operation.output_dir outputs/converted_videos \ - --operation.vcodec libsvtav1 \ - --operation.crf 30 + --operation.output_dir /path/to/output/pusht_video + +Convert image dataset and save with new repo_id: + python -m lerobot.scripts.lerobot_edit_dataset \ + --repo_id lerobot/pusht_image \ + --new_repo_id lerobot/pusht_video \ + --operation.type convert_to_video + +Convert and push to hub: + python -m lerobot.scripts.lerobot_edit_dataset \ + --repo_id lerobot/pusht_image \ + --new_repo_id lerobot/pusht_video \ + --operation.type convert_to_video \ + --push_to_hub true Using JSON config file: python -m lerobot.scripts.lerobot_edit_dataset \ @@ -85,6 +96,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from pathlib import Path +import pandas as pd from tqdm import tqdm from lerobot.configs import parser @@ -94,8 +106,9 @@ from lerobot.datasets.dataset_tools import ( remove_feature, split_dataset, ) -from lerobot.datasets.lerobot_dataset import LeRobotDataset -from lerobot.datasets.video_utils import encode_video_frames +from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.datasets.utils import write_stats, write_tasks +from lerobot.datasets.video_utils import encode_video_frames, get_video_info from lerobot.utils.constants import HF_LEROBOT_HOME, OBS_IMAGE from lerobot.utils.utils import init_logging @@ -127,7 +140,7 @@ class RemoveFeatureConfig: @dataclass class ConvertToVideoConfig: type: str = "convert_to_video" - output_dir: str = "outputs/converted_videos" + output_dir: str | None = None vcodec: str = "libsvtav1" pix_fmt: str = "yuv420p" g: int = 2 @@ -135,7 +148,6 @@ class ConvertToVideoConfig: fast_decode: int = 0 episode_indices: list[int] | None = None num_workers: int = 4 - overwrite: bool = False @dataclass @@ -285,43 +297,29 @@ def handle_remove_feature(cfg: EditDatasetConfig) -> None: LeRobotDataset(output_repo_id, root=output_dir).push_to_hub() -def save_episode_images( +def save_episode_images_for_video( dataset: LeRobotDataset, imgs_dir: Path, - episode_index: int = 0, - overwrite: bool = False, + img_key: str, + episode_index: int, num_workers: int = 4, ) -> None: - """Save images from a specific episode to disk. + """Save images from a specific episode and camera to disk for video encoding. Args: dataset: The LeRobot dataset to extract images from imgs_dir: Directory to save images to - episode_index: Index of the episode to save (default: 0) - overwrite: Whether to overwrite existing images - num_workers: Number of threads for parallel image saving (default: 4) + img_key: The image key (camera) to extract + episode_index: Index of the episode to save + num_workers: Number of threads for parallel image saving """ - ep_num_images = dataset.meta.episodes["length"][episode_index] - - # Check if images already exist - if not overwrite and imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images: - logging.info(f"Images for episode {episode_index} already exist in {imgs_dir}. Skipping.") - return - # Create directory imgs_dir.mkdir(parents=True, exist_ok=True) # Get dataset without torch format for PIL image access hf_dataset = dataset.hf_dataset.with_format(None) - # Get all image keys (for all cameras) - img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)] - - if len(img_keys) == 0: - raise ValueError(f"No image keys found in dataset {dataset.repo_id}") - - # Use first camera only - img_key = img_keys[0] + # Select only this camera's images imgs_dataset = hf_dataset.select_columns(img_key) # Get episode start and end indices @@ -340,67 +338,68 @@ def save_episode_images( return i # Save images with proper naming convention for encode_video_frames (frame-XXXXXX.png) - # Use ThreadPoolExecutor for parallel processing items = list(enumerate(episode_dataset)) with ThreadPoolExecutor(max_workers=num_workers) as executor: futures = [executor.submit(save_single_image, item) for item in items] - for future in tqdm( - as_completed(futures), - total=len(items), - desc=f"Saving {dataset.repo_id} episode {episode_index} images", - leave=False, - ): + for future in as_completed(futures): future.result() # This will raise any exceptions that occurred -def process_single_episode( +def encode_episode_videos( dataset: LeRobotDataset, + new_meta: LeRobotDatasetMetadata, episode_index: int, - output_dir: Path, vcodec: str, pix_fmt: str, - g: int | None, - crf: int | None, + g: int, + crf: int, fast_decode: int, - fps: int, - num_image_workers: int, - overwrite: bool, -) -> str: - """Process a single episode: save images and encode to video. + temp_dir: Path, + num_image_workers: int = 4, +) -> dict[str, dict]: + """Encode videos for a single episode and return video metadata. Args: - dataset: The LeRobot dataset - episode_index: Index of the episode to process - output_dir: Base directory for outputs + dataset: Source dataset with images + new_meta: Metadata object for the new video dataset + episode_index: Episode index to process vcodec: Video codec pix_fmt: Pixel format g: Group of pictures size crf: Constant rate factor fast_decode: Fast decode tuning - fps: Frames per second - num_image_workers: Number of threads for parallel image saving - overwrite: Whether to overwrite existing files + temp_dir: Temporary directory for images + num_image_workers: Number of workers for saving images Returns: - Status message for this episode + Dictionary mapping video keys to their metadata (chunk_index, file_index, timestamps) """ - # Create paths - imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") / f"episode_{episode_index:06d}" + hf_dataset = dataset.hf_dataset.with_format(None) + img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)] - # Create video filename with encoding parameters - video_filename = ( - f"{dataset.repo_id.replace('/', '_')}_ep{episode_index:06d}_{vcodec}_{pix_fmt}_g{g}_crf{crf}.mp4" - ) - video_path = output_dir / "videos" / dataset.repo_id.replace("/", "_") / video_filename + video_metadata = {} + fps = dataset.fps + episode_length = dataset.meta.episodes["length"][episode_index] + episode_duration = episode_length / fps - # Save episode images - save_episode_images(dataset, imgs_dir, episode_index, overwrite, num_image_workers) + for img_key in img_keys: + # Save images temporarily + imgs_dir = temp_dir / f"episode_{episode_index:06d}" / img_key + save_episode_images_for_video(dataset, imgs_dir, img_key, episode_index, num_image_workers) - # Encode to video - if overwrite or not video_path.is_file(): + # Determine chunk and file indices + # For simplicity, we'll put each episode in its own file + chunk_idx = episode_index // new_meta.chunks_size + file_idx = episode_index % new_meta.chunks_size + + # Create video path in the new dataset structure + video_path = new_meta.root / new_meta.video_path.format( + video_key=img_key, chunk_index=chunk_idx, file_index=file_idx + ) video_path.parent.mkdir(parents=True, exist_ok=True) + # Encode video encode_video_frames( imgs_dir=imgs_dir, video_path=video_path, @@ -413,14 +412,24 @@ def process_single_episode( overwrite=True, ) - return f"āœ“ Video saved to {video_path}" - else: - return f"Video already exists: {video_path}. Skipping." + # Clean up temporary images + shutil.rmtree(imgs_dir) + + # Store video metadata + video_metadata[img_key] = { + f"videos/{img_key}/chunk_index": chunk_idx, + f"videos/{img_key}/file_index": file_idx, + f"videos/{img_key}/from_timestamp": 0.0, + f"videos/{img_key}/to_timestamp": episode_duration, + } + + return video_metadata def convert_dataset_to_videos( dataset: LeRobotDataset, output_dir: Path, + repo_id: str | None = None, vcodec: str = "libsvtav1", pix_fmt: str = "yuv420p", g: int = 2, @@ -428,21 +437,26 @@ def convert_dataset_to_videos( fast_decode: int = 0, episode_indices: list[int] | None = None, num_workers: int = 4, - overwrite: bool = False, -) -> None: - """Convert dataset images to video files. +) -> LeRobotDataset: + """Convert image-based dataset to video-based dataset. + + Creates a new LeRobotDataset with videos instead of images, following the proper + LeRobot dataset structure with videos stored in chunked MP4 files. Args: - dataset: The LeRobot dataset - output_dir: Base directory for outputs + dataset: The source LeRobot dataset with images + output_dir: Directory to save the new video dataset + repo_id: Repository ID for the new dataset (default: original_id + "_video") vcodec: Video codec (default: libsvtav1) pix_fmt: Pixel format (default: yuv420p) g: Group of pictures size (default: 2) crf: Constant rate factor (default: 30) fast_decode: Fast decode tuning (default: 0) episode_indices: List of episode indices to convert (None = all episodes) - num_workers: Number of threads for parallel episode processing (default: 4) - overwrite: Whether to overwrite existing files + num_workers: Number of threads for parallel processing (default: 4) + + Returns: + New LeRobotDataset with videos """ # Check that it's an image dataset if len(dataset.meta.video_keys) > 0: @@ -450,69 +464,247 @@ def convert_dataset_to_videos( f"This operation is for image datasets only. Video dataset provided: {dataset.repo_id}" ) - fps = dataset.fps + # Get all image keys + hf_dataset = dataset.hf_dataset.with_format(None) + img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)] + + if len(img_keys) == 0: + raise ValueError(f"No image keys found in dataset {dataset.repo_id}") # Determine which episodes to process - num_episodes = len(dataset.meta.episodes) if episode_indices is None: - episode_indices = list(range(num_episodes)) + episode_indices = list(range(dataset.meta.total_episodes)) + + if repo_id is None: + repo_id = f"{dataset.repo_id}_video" logging.info( - f"Processing {len(episode_indices)} episodes from {dataset.repo_id} with {num_workers} workers" + f"Converting {len(episode_indices)} episodes with {len(img_keys)} cameras from {dataset.repo_id}" + ) + logging.info(f"Video codec: {vcodec}, pixel format: {pix_fmt}, GOP: {g}, CRF: {crf}") + + # Create new features dict, converting image features to video features + new_features = {} + for key, value in dataset.meta.features.items(): + if key not in img_keys: + new_features[key] = value + else: + # Convert image key to video format + new_features[key] = value.copy() + new_features[key]["dtype"] = "video" # Change dtype from "image" to "video" + # Video info will be updated after episodes are encoded + + # Create new metadata for video dataset + new_meta = LeRobotDatasetMetadata.create( + repo_id=repo_id, + fps=dataset.meta.fps, + features=new_features, + robot_type=dataset.meta.robot_type, + root=output_dir, + use_videos=True, + chunks_size=dataset.meta.chunks_size, + data_files_size_in_mb=dataset.meta.data_files_size_in_mb, + video_files_size_in_mb=dataset.meta.video_files_size_in_mb, ) - # Process episodes in parallel - with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = [ - executor.submit( - process_single_episode, + # Create temporary directory for image extraction + temp_dir = output_dir / "temp_images" + temp_dir.mkdir(parents=True, exist_ok=True) + + # Process each episode + all_episode_metadata = [] + + try: + for ep_idx in tqdm(episode_indices, desc="Converting episodes to videos"): + # Get episode metadata from source + src_episode = dataset.meta.episodes[ep_idx] + + # Encode videos for this episode + video_metadata = encode_episode_videos( dataset=dataset, - episode_index=episode_index, - output_dir=output_dir, + new_meta=new_meta, + episode_index=ep_idx, vcodec=vcodec, pix_fmt=pix_fmt, g=g, crf=crf, fast_decode=fast_decode, - fps=fps, - num_image_workers=4, # Use fixed workers for image saving within each episode - overwrite=overwrite, + temp_dir=temp_dir, + num_image_workers=num_workers, ) - for episode_index in episode_indices - ] - for future in tqdm( - as_completed(futures), - total=len(episode_indices), - desc="Episodes", - ): - result = future.result() # This will raise any exceptions that occurred - logging.info(result) + # Build episode metadata + episode_meta = { + "episode_index": ep_idx, + "length": src_episode["length"], + "dataset_from_index": ep_idx * src_episode["length"], + "dataset_to_index": (ep_idx + 1) * src_episode["length"], + } - logging.info(f"\nāœ“ Completed processing {dataset.repo_id}") + # Add video metadata + for img_key in img_keys: + episode_meta.update(video_metadata[img_key]) + + # Add data chunk/file info (using same structure as source) + if "data/chunk_index" in src_episode: + episode_meta["data/chunk_index"] = src_episode["data/chunk_index"] + episode_meta["data/file_index"] = src_episode["data/file_index"] + + all_episode_metadata.append(episode_meta) + + # Copy and transform data files (removing image columns) + _copy_data_without_images(dataset, new_meta, episode_indices, img_keys) + + # Save episode metadata + episodes_df = pd.DataFrame(all_episode_metadata) + episodes_path = new_meta.root / "meta" / "episodes" / "chunk-000" / "file-000.parquet" + episodes_path.parent.mkdir(parents=True, exist_ok=True) + episodes_df.to_parquet(episodes_path, index=False) + + # Update metadata info + new_meta.info["total_episodes"] = len(episode_indices) + new_meta.info["total_frames"] = sum(ep["length"] for ep in all_episode_metadata) + new_meta.info["total_tasks"] = dataset.meta.total_tasks + new_meta.info["splits"] = {"train": f"0:{len(episode_indices)}"} + + # Update video info for all image keys (now videos) + # We need to manually set video info since update_video_info() checks video_keys first + for img_key in img_keys: + if not new_meta.features[img_key].get("info", None): + video_path = new_meta.root / new_meta.video_path.format( + video_key=img_key, chunk_index=0, file_index=0 + ) + new_meta.info["features"][img_key]["info"] = get_video_info(video_path) + + from lerobot.datasets.utils import write_info + + write_info(new_meta.info, new_meta.root) + + # Copy stats and tasks + if dataset.meta.stats is not None: + # Remove image stats + new_stats = {k: v for k, v in dataset.meta.stats.items() if k not in img_keys} + write_stats(new_stats, new_meta.root) + + if dataset.meta.tasks is not None: + write_tasks(dataset.meta.tasks, new_meta.root) + + finally: + # Clean up temporary directory + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + logging.info(f"āœ“ Completed converting {dataset.repo_id} to video format") + logging.info(f"New dataset saved to: {output_dir}") + + # Return new dataset + return LeRobotDataset(repo_id=repo_id, root=output_dir) + + +def _copy_data_without_images( + src_dataset: LeRobotDataset, + dst_meta: LeRobotDatasetMetadata, + episode_indices: list[int], + img_keys: list[str], +) -> None: + """Copy data files without image columns. + + Args: + src_dataset: Source dataset + dst_meta: Destination metadata + episode_indices: Episodes to include + img_keys: Image keys to remove + """ + from lerobot.datasets.utils import DATA_DIR + + data_dir = src_dataset.root / DATA_DIR + parquet_files = sorted(data_dir.glob("*/*.parquet")) + + if not parquet_files: + raise ValueError(f"No parquet files found in {data_dir}") + + episode_set = set(episode_indices) + + for src_path in tqdm(parquet_files, desc="Processing data files"): + df = pd.read_parquet(src_path).reset_index(drop=True) + + # Filter to only include selected episodes + df = df[df["episode_index"].isin(episode_set)].copy() + + if len(df) == 0: + continue + + # Remove image columns + columns_to_drop = [col for col in img_keys if col in df.columns] + if columns_to_drop: + df = df.drop(columns=columns_to_drop) + + # Get chunk and file indices from path + relative_path = src_path.relative_to(src_dataset.root) + chunk_dir = relative_path.parts[1] + file_name = relative_path.parts[2] + chunk_idx = int(chunk_dir.split("-")[1]) + file_idx = int(file_name.split("-")[1].split(".")[0]) + + # Write to destination without pandas index + dst_path = dst_meta.root / f"data/chunk-{chunk_idx:03d}/file-{file_idx:03d}.parquet" + dst_path.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(dst_path, index=False) def handle_convert_to_video(cfg: EditDatasetConfig) -> None: - if not isinstance(cfg.operation, ConvertToVideoConfig): - raise ValueError("Operation config must be ConvertToVideoConfig") - + # Note: Parser may create any config type with the right fields, so we access fields directly + # instead of checking isinstance() dataset = LeRobotDataset(cfg.repo_id, root=cfg.root) - output_dir = Path(cfg.operation.output_dir) + + # Determine output directory and repo_id + # Priority: 1) new_repo_id, 2) operation.output_dir, 3) auto-generated name + output_dir_config = getattr(cfg.operation, "output_dir", None) + + if cfg.new_repo_id: + # Use new_repo_id for both local storage and hub push + output_repo_id = cfg.new_repo_id + output_dir = Path(cfg.root) / cfg.new_repo_id if cfg.root else HF_LEROBOT_HOME / cfg.new_repo_id + logging.info(f"Saving to new dataset: {cfg.new_repo_id}") + elif output_dir_config: + # Use custom output directory for local-only storage + output_dir = Path(output_dir_config) + # Extract repo name from output_dir for the dataset + output_repo_id = output_dir.name + logging.info(f"Saving to local directory: {output_dir}") + else: + # Auto-generate name: append "_video" to original repo_id + output_repo_id = f"{cfg.repo_id}_video" + output_dir = Path(cfg.root) / output_repo_id if cfg.root else HF_LEROBOT_HOME / output_repo_id + logging.info(f"Saving to auto-generated location: {output_dir}") logging.info(f"Converting dataset {cfg.repo_id} to video format") - convert_dataset_to_videos( + + new_dataset = convert_dataset_to_videos( dataset=dataset, output_dir=output_dir, - vcodec=cfg.operation.vcodec, - pix_fmt=cfg.operation.pix_fmt, - g=cfg.operation.g, - crf=cfg.operation.crf, - fast_decode=cfg.operation.fast_decode, - episode_indices=cfg.operation.episode_indices, - num_workers=cfg.operation.num_workers, - overwrite=cfg.operation.overwrite, + repo_id=output_repo_id, + vcodec=getattr(cfg.operation, "vcodec", "libsvtav1"), + pix_fmt=getattr(cfg.operation, "pix_fmt", "yuv420p"), + g=getattr(cfg.operation, "g", 2), + crf=getattr(cfg.operation, "crf", 30), + fast_decode=getattr(cfg.operation, "fast_decode", 0), + episode_indices=getattr(cfg.operation, "episode_indices", None), + num_workers=getattr(cfg.operation, "num_workers", 4), ) + logging.info("Video dataset created successfully!") + logging.info(f"Location: {output_dir}") + logging.info(f"Episodes: {new_dataset.meta.total_episodes}") + logging.info(f"Frames: {new_dataset.meta.total_frames}") + + if cfg.push_to_hub: + logging.info(f"Pushing to hub as {output_repo_id}...") + new_dataset.push_to_hub() + logging.info("āœ“ Successfully pushed to hub!") + else: + logging.info("Dataset saved locally (not pushed to hub)") + @parser.wrap() def edit_dataset(cfg: EditDatasetConfig) -> None: diff --git a/tests/datasets/test_dataset_tools.py b/tests/datasets/test_dataset_tools.py index 8bc1dbf6b..3a4516fc8 100644 --- a/tests/datasets/test_dataset_tools.py +++ b/tests/datasets/test_dataset_tools.py @@ -29,6 +29,7 @@ from lerobot.datasets.dataset_tools import ( remove_feature, split_dataset, ) +from lerobot.scripts.lerobot_edit_dataset import convert_dataset_to_videos @pytest.fixture @@ -1047,3 +1048,107 @@ def test_modify_features_preserves_file_structure(sample_dataset, tmp_path): assert new_chunk_indices == original_chunk_indices, "Chunk indices should be preserved" assert new_file_indices == original_file_indices, "File indices should be preserved" assert "reward" in modified_dataset.meta.features + + +def test_convert_dataset_to_videos(tmp_path): + """Test converting lerobot/pusht_image dataset to video format.""" + from lerobot.datasets.lerobot_dataset import LeRobotDataset + + # Load the actual lerobot/pusht_image dataset (only first 2 episodes for speed) + source_dataset = LeRobotDataset("lerobot/pusht_image", episodes=[0, 1]) + + output_dir = tmp_path / "pusht_video" + + with ( + patch("lerobot.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version, + patch("lerobot.datasets.lerobot_dataset.snapshot_download") as mock_snapshot_download, + ): + mock_get_safe_version.return_value = "v3.0" + mock_snapshot_download.return_value = str(output_dir) + + # Verify source dataset has images, not videos + assert len(source_dataset.meta.video_keys) == 0 + assert "observation.image" in source_dataset.meta.features + + # Convert to video dataset (only first 2 episodes for speed) + video_dataset = convert_dataset_to_videos( + dataset=source_dataset, + output_dir=output_dir, + repo_id="lerobot/pusht_video", + vcodec="libsvtav1", + pix_fmt="yuv420p", + g=2, + crf=30, + episode_indices=[0, 1], + num_workers=2, + ) + + # Verify new dataset has videos + assert len(video_dataset.meta.video_keys) > 0 + assert "observation.image" in video_dataset.meta.video_keys + + # Verify correct number of episodes and frames (2 episodes) + assert video_dataset.meta.total_episodes == 2 + # Compare against the actual number of frames in the loaded episodes, not metadata total + assert len(video_dataset) == len(source_dataset) + + # Verify video files exist + for ep_idx in range(video_dataset.meta.total_episodes): + for video_key in video_dataset.meta.video_keys: + video_path = video_dataset.root / video_dataset.meta.get_video_file_path(ep_idx, video_key) + assert video_path.exists(), f"Video file should exist: {video_path}" + + # Verify we can load the dataset and access it + assert len(video_dataset) == video_dataset.meta.total_frames + + # Test that we can actually get an item from the video dataset + item = video_dataset[0] + assert "observation.image" in item + assert "action" in item + + # Cleanup + import shutil + + if output_dir.exists(): + shutil.rmtree(output_dir) + + +def test_convert_dataset_to_videos_subset_episodes(tmp_path): + """Test converting only specific episodes from lerobot/pusht_image to video format.""" + from lerobot.datasets.lerobot_dataset import LeRobotDataset + + # Load the actual lerobot/pusht_image dataset (only first 3 episodes) + source_dataset = LeRobotDataset("lerobot/pusht_image", episodes=[0, 1, 2]) + + output_dir = tmp_path / "pusht_video_subset" + + with ( + patch("lerobot.datasets.lerobot_dataset.get_safe_version") as mock_get_safe_version, + patch("lerobot.datasets.lerobot_dataset.snapshot_download") as mock_snapshot_download, + ): + mock_get_safe_version.return_value = "v3.0" + mock_snapshot_download.return_value = str(output_dir) + + # Convert only episode 0 to video (subset of loaded episodes) + episode_indices = [0] + + video_dataset = convert_dataset_to_videos( + dataset=source_dataset, + output_dir=output_dir, + repo_id="lerobot/pusht_video_subset", + episode_indices=episode_indices, + num_workers=2, + ) + + # Verify correct number of episodes + assert video_dataset.meta.total_episodes == len(episode_indices) + + # Verify video files exist for selected episodes + assert len(video_dataset.meta.video_keys) > 0 + assert "observation.image" in video_dataset.meta.video_keys + + # Cleanup + import shutil + + if output_dir.exists(): + shutil.rmtree(output_dir)