From 9f00d2c3a20fd77978e185c32c8ea35f9df61fb0 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Fri, 24 Oct 2025 17:06:21 +0200 Subject: [PATCH] Modify convert_to_lerobot_v3 script for behaviours dataset to take a single task id and create a dataset outof it --- .../behavior_lerobot_dataset_v3.py | 121 ++++---- .../behavior_1k/behaviour_1k_constants.py | 33 +- examples/behavior_1k/convert_to_lerobot_v3.py | 287 +++++++----------- 3 files changed, 203 insertions(+), 238 deletions(-) diff --git a/examples/behavior_1k/behavior_lerobot_dataset_v3.py b/examples/behavior_1k/behavior_lerobot_dataset_v3.py index 74f2b0fa2..c867f9cfa 100644 --- a/examples/behavior_1k/behavior_lerobot_dataset_v3.py +++ b/examples/behavior_1k/behavior_lerobot_dataset_v3.py @@ -1,46 +1,49 @@ #!/usr/bin/env python import json +import logging +from pathlib import Path +from typing import Any + import numpy as np import torch as th -from pathlib import Path -from typing import Dict, Any from lerobot.datasets.lerobot_dataset import LeRobotDataset - -from .behaviour_1k_constants import ( - TASK_INDICES_TO_NAMES, - ROBOT_CAMERA_NAMES, - PROPRIOCEPTION_INDICES, - BEHAVIOR_DATASET_FEATURES, -) - -import logging from lerobot.utils.utils import init_logging +from .behaviour_1k_constants import ( + PROPRIOCEPTION_INDICES, + ROBOT_CAMERA_NAMES, + TASK_INDICES_TO_NAMES, +) + init_logging() + class BehaviorLeRobotDatasetV3(LeRobotDataset): """ Extends LeRobotDataset v3.0 for BEHAVIOR-1K specific requirements. Handles task-based episode organization and BEHAVIOR-1K metadata. """ + @classmethod def create( cls, repo_id: str, - fps: int = 30, + fps: int, + features: dict, root: str | Path | None = None, - robot_type: str = "R1Pro", + robot_type: str | None = None, use_videos: bool = True, - video_backend: str = "pyav", - batch_encoding_size: int = 1, + tolerance_s: float = 1e-4, image_writer_processes: int = 0, - image_writer_threads: int = 4, + image_writer_threads: int = 0, + video_backend: str | None = None, + batch_encoding_size: int = 1, ) -> "BehaviorLeRobotDatasetV3": """ Create a new BEHAVIOR-1K dataset in v3.0 format. - + Args: repo_id: HuggingFace repository ID fps: Frames per second (default: 30) @@ -51,7 +54,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): batch_encoding_size: Number of episodes to batch before encoding videos image_writer_processes: Number of processes for async image writing image_writer_threads: Number of threads per process for image writing - + Returns: BehaviorLeRobotDatasetV3 instance """ @@ -59,7 +62,7 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): obj = super().create( repo_id=repo_id, fps=fps, - features=BEHAVIOR_DATASET_FEATURES, + features=features, root=root, robot_type=robot_type, use_videos=use_videos, @@ -69,14 +72,14 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): video_backend=video_backend, batch_encoding_size=batch_encoding_size, ) - + # Convert to BehaviorLeRobotDatasetV3 instance obj.__class__ = cls - + # Initialize BEHAVIOR-1K specific attributes obj.task_episode_mapping = {} # Maps task_id to list of episode indices obj.episode_task_mapping = {} # Maps episode_index to task info - + # Additional metadata for BEHAVIOR-1K obj.behavior_metadata = { "robot_type": robot_type, @@ -84,41 +87,41 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): "proprioception_indices": PROPRIOCEPTION_INDICES[robot_type], "camera_names": ROBOT_CAMERA_NAMES[robot_type], } - + logging.info(f"Created BehaviorLeRobotDatasetV3 with repo_id: {repo_id}") return obj - + def __init__(self, *args, **kwargs): """ Initialize from existing dataset. Use the create() classmethod to create a new dataset. """ super().__init__(*args, **kwargs) - + # Initialize BEHAVIOR-1K specific attributes for loading existing datasets self.task_episode_mapping = {} self.episode_task_mapping = {} self.behavior_metadata = {} - + # Try to load BEHAVIOR-1K metadata if it exists metadata_path = self.root / "meta" / "behavior_metadata.json" if metadata_path.exists(): - with open(metadata_path, "r") as f: + with open(metadata_path) as f: stored_metadata = json.load(f) self.behavior_metadata = stored_metadata self.task_episode_mapping = stored_metadata.get("task_episode_mapping", {}) self.episode_task_mapping = stored_metadata.get("episode_task_mapping", {}) - + def add_episode_from_hdf5( self, - hdf5_data: Dict[str, Any], + hdf5_data: dict[str, Any], task_id: int, episode_id: int, include_videos: bool = True, ) -> None: """ Add an episode from HDF5 data to the dataset. - + Args: hdf5_data: Dictionary containing the HDF5 episode data task_id: Task ID for this episode @@ -127,9 +130,9 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): """ task_name = TASK_INDICES_TO_NAMES[task_id] num_frames = len(hdf5_data["action"]) - + logging.info(f"Adding episode {episode_id} (task: {task_name}) with {num_frames} frames") - + # Process each frame for frame_idx in range(num_frames): frame_data = { @@ -140,44 +143,44 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): "task": task_name, "timestamp": frame_idx / self.fps, } - + # Add video frames if requested if include_videos: for modality in ["rgb", "depth_linear", "seg_instance_id"]: # Map depth_linear to depth for consistency output_modality = "depth" if modality == "depth_linear" else modality - + for camera_name, robot_camera_name in ROBOT_CAMERA_NAMES[self.robot_type].items(): key = f"observation.images.{output_modality}.{camera_name}" hdf5_key = f"{robot_camera_name}::{modality}" - + if hdf5_key in hdf5_data["obs"]: # Get the frame data frame = hdf5_data["obs"][hdf5_key][frame_idx] - + # Handle different data types if isinstance(frame, th.Tensor): frame = frame.numpy() - + # Ensure correct shape if modality == "seg_instance_id" and len(frame.shape) == 2: # Add channel dimension for grayscale frame = np.expand_dims(frame, axis=-1) elif modality == "depth_linear" and len(frame.shape) == 2: frame = np.expand_dims(frame, axis=-1) - + frame_data[key] = frame - + # Add frame to dataset self.add_frame(frame_data) - + # Save episode with metadata episode_metadata = { "task_id": task_id, "task_name": task_name, "original_episode_id": episode_id, } - + # Add any additional HDF5 attributes as metadata if "attrs" in hdf5_data: for attr_name, attr_value in hdf5_data["attrs"].items(): @@ -185,10 +188,10 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): episode_metadata[attr_name] = list(attr_value) else: episode_metadata[attr_name] = attr_value - + # Save the episode self.save_episode(episode_data=None) - + # Track task-episode mapping if task_id not in self.task_episode_mapping: self.task_episode_mapping[task_id] = [] @@ -198,26 +201,30 @@ class BehaviorLeRobotDatasetV3(LeRobotDataset): "task_name": task_name, "original_episode_id": episode_id, } - + def finalize(self) -> None: """Finalize the dataset and save additional BEHAVIOR-1K metadata.""" # Save BEHAVIOR-1K specific metadata metadata_path = self.root / "meta" / "behavior_metadata.json" metadata_path.parent.mkdir(parents=True, exist_ok=True) - - self.behavior_metadata.update({ - "task_episode_mapping": self.task_episode_mapping, - "episode_task_mapping": self.episode_task_mapping, - "total_tasks": len(self.task_episode_mapping), - "total_episodes": self.num_episodes, - "total_frames": self.num_frames, - }) - + + self.behavior_metadata.update( + { + "task_episode_mapping": self.task_episode_mapping, + "episode_task_mapping": self.episode_task_mapping, + "total_tasks": len(self.task_episode_mapping), + "total_episodes": self.num_episodes, + "total_frames": self.num_frames, + } + ) + with open(metadata_path, "w") as f: json.dump(self.behavior_metadata, f, indent=2) - + # Finalize the parent dataset super().finalize() - - logging.info(f"Finalized dataset with {self.num_episodes} episodes " - f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks") + + logging.info( + f"Finalized dataset with {self.num_episodes} episodes " + f"and {self.num_frames} frames across {len(self.task_episode_mapping)} tasks" + ) diff --git a/examples/behavior_1k/behaviour_1k_constants.py b/examples/behavior_1k/behaviour_1k_constants.py index c0f8191d5..a0601c55b 100644 --- a/examples/behavior_1k/behaviour_1k_constants.py +++ b/examples/behavior_1k/behaviour_1k_constants.py @@ -1,7 +1,10 @@ -import numpy as np -import torch as th from collections import OrderedDict +import numpy as np +import torch as th + +ROBOT_TYPE = "R1Pro" +FPS = 30 ROBOT_CAMERA_NAMES = { "A1": { @@ -21,13 +24,17 @@ WRIST_RESOLUTION = (480, 480) # TODO: Fix A1 CAMERA_INTRINSICS = { "A1": { - "external": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 240x240 + "external": np.array( + [[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32 + ), # 240x240 "wrist": np.array( [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32 ), # 240x240 }, "R1Pro": { - "head": np.array([[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32), # 720x720 + "head": np.array( + [[306.0, 0.0, 360.0], [0.0, 306.0, 360.0], [0.0, 0.0, 1.0]], dtype=np.float32 + ), # 720x720 "left_wrist": np.array( [[388.6639, 0.0, 240.0], [0.0, 388.6639, 240.0], [0.0, 0.0, 1.0]], dtype=np.float32 ), # 480x480 @@ -48,7 +55,7 @@ BEHAVIOR_DATASET_FEATURES = { }, # Proprioception "observation.state": { - "dtype": "float32", + "dtype": "float32", "shape": (256,), # Full proprioception state "names": None, }, @@ -229,7 +236,10 @@ JOINT_RANGE = { "gripper": (th.tensor([0.00], dtype=th.float32), th.tensor([0.03], dtype=th.float32)), }, "R1Pro": { - "base": (th.tensor([-0.75, -0.75, -1.0], dtype=th.float32), th.tensor([0.75, 0.75, 1.0], dtype=th.float32)), + "base": ( + th.tensor([-0.75, -0.75, -1.0], dtype=th.float32), + th.tensor([0.75, 0.75, 1.0], dtype=th.float32), + ), "torso": ( th.tensor([-1.1345, -2.7925, -1.8326, -3.0543], dtype=th.float32), th.tensor([1.8326, 2.5307, 1.5708, 3.0543], dtype=th.float32), @@ -253,8 +263,14 @@ EEF_POSITION_RANGE = { "0": (th.tensor([0.0, -0.7, 0.0], dtype=th.float32), th.tensor([0.7, 0.7, 0.7], dtype=th.float32)), }, "R1Pro": { - "left": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)), - "right": (th.tensor([0.0, -0.65, 0.0], dtype=th.float32), th.tensor([0.65, 0.65, 2.5], dtype=th.float32)), + "left": ( + th.tensor([0.0, -0.65, 0.0], dtype=th.float32), + th.tensor([0.65, 0.65, 2.5], dtype=th.float32), + ), + "right": ( + th.tensor([0.0, -0.65, 0.0], dtype=th.float32), + th.tensor([0.65, 0.65, 2.5], dtype=th.float32), + ), }, } @@ -317,4 +333,3 @@ TASK_NAMES_TO_INDICES = { "make_pizza": 49, } TASK_INDICES_TO_NAMES = {v: k for k, v in TASK_NAMES_TO_INDICES.items()} - diff --git a/examples/behavior_1k/convert_to_lerobot_v3.py b/examples/behavior_1k/convert_to_lerobot_v3.py index 4c9c4865e..9927af27d 100755 --- a/examples/behavior_1k/convert_to_lerobot_v3.py +++ b/examples/behavior_1k/convert_to_lerobot_v3.py @@ -1,69 +1,82 @@ #!/usr/bin/env python +""" +Convert a single BEHAVIOR-1K task from HDF5 to LeRobotDataset v3.0 format. + +Usage examples: +# Convert a single task +python convert_to_lerobot_v3.py \ + --data-folder /path/to/data \ + --repo-id "username/behavior-1k-assembling-gift-baskets" \ + --task-id 0 \ + --push-to-hub + +""" + import argparse +import logging +import os +from pathlib import Path + import h5py import numpy as np -import os -import torch as th -from pathlib import Path from tqdm import tqdm -import logging - -from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3 -from .behaviour_1k_constants import TASK_NAMES_TO_INDICES, TASK_INDICES_TO_NAMES, BEHAVIOR_DATASET_FEATURES from lerobot.utils.utils import init_logging +from .behavior_lerobot_dataset_v3 import BehaviorLeRobotDatasetV3 +from .behaviour_1k_constants import BEHAVIOR_DATASET_FEATURES, FPS, ROBOT_TYPE, TASK_INDICES_TO_NAMES + init_logging() + def load_hdf5_episode(hdf5_path: str, episode_id: int = 0) -> dict: """ Load episode data from HDF5 file. - + Args: hdf5_path: Path to the HDF5 file episode_id: Episode ID to load (default: 0) - + Returns: Dictionary containing episode data """ episode_data = {} - + with h5py.File(hdf5_path, "r") as f: # Find the episode with most samples if episode_id not specified if episode_id == -1: - num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"].keys()] + num_samples = [f["data"][key].attrs["num_samples"] for key in f["data"]] episode_id = num_samples.index(max(num_samples)) - + demo_key = f"demo_{episode_id}" if demo_key not in f["data"]: raise ValueError(f"Episode {episode_id} not found in {hdf5_path}") - + demo_data = f["data"][demo_key] - + # Load actions episode_data["action"] = np.array(demo_data["action"][:]) - + # Load observations episode_data["obs"] = {} - for key in demo_data["obs"].keys(): + for key in demo_data["obs"]: episode_data["obs"][key] = np.array(demo_data["obs"][key][:]) - + # Load attributes episode_data["attrs"] = {} for attr_name in demo_data.attrs: episode_data["attrs"][attr_name] = demo_data.attrs[attr_name] - + # Add global attributes for attr_name in f["data"].attrs: episode_data["attrs"][f"global_{attr_name}"] = f["data"].attrs[attr_name] - + return episode_data def convert_episode( data_folder: str, - output_repo_id: str, task_id: int, demo_id: int, dataset: BehaviorLeRobotDatasetV3, @@ -72,10 +85,10 @@ def convert_episode( ) -> None: """ Convert a single episode from HDF5 to LeRobotDataset v3.0 format. - + Args: data_folder: Base data folder containing HDF5 files - output_repo_id: Output repository ID for the dataset + repo_id: Repository ID for the dataset task_id: Task ID demo_id: Demo ID (episode ID) dataset: BehaviorLeRobotDatasetV3 instance to add data to @@ -85,26 +98,22 @@ def convert_episode( # Construct paths task_name = TASK_INDICES_TO_NAMES[task_id] hdf5_path = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}/episode_{demo_id:08d}.hdf5" - + if not os.path.exists(hdf5_path): logging.error(f"HDF5 file not found: {hdf5_path}") return - + logging.info(f"Converting episode {demo_id} from task {task_name}") - + # Load episode data - try: - episode_data = load_hdf5_episode(hdf5_path, episode_id=0) - except Exception as e: - logging.error(f"Failed to load episode data: {e}") - return - + episode_data = load_hdf5_episode(hdf5_path, episode_id=0) + # Filter out segmentation if not requested if not include_segmentation: - keys_to_remove = [k for k in episode_data["obs"].keys() if "seg_instance_id" in k] + keys_to_remove = [k for k in episode_data["obs"] if "seg_instance_id" in k] for key in keys_to_remove: del episode_data["obs"][key] - + # Add episode to dataset dataset.add_episode_from_hdf5( hdf5_data=episode_data, @@ -114,169 +123,103 @@ def convert_episode( ) -def convert_dataset( +def convert_task_to_dataset( data_folder: str, - output_repo_id: str, - task_names: list = None, - episode_ids: list = None, - max_episodes_per_task: int = None, - include_videos: bool = True, - include_segmentation: bool = True, - fps: int = 30, - batch_encoding_size: int = 1, - image_writer_processes: int = 0, - image_writer_threads: int = 4, + repo_id: str, + task_id: int, push_to_hub: bool = False, ) -> None: """ - Convert BEHAVIOR-1K dataset from HDF5 to LeRobotDataset v3.0 format. - + Convert a single BEHAVIOR-1K task from HDF5 to LeRobotDataset v3.0 format. + Args: data_folder: Base folder containing HDF5 data - output_repo_id: Output repository ID (e.g., "username/dataset-name") - task_names: List of task names to convert (None = all tasks) - episode_ids: Specific episode IDs to convert (None = all episodes) - max_episodes_per_task: Maximum episodes per task to convert - include_videos: Whether to include video data - include_segmentation: Whether to include segmentation data - fps: Frames per second - batch_encoding_size: Number of episodes to batch before encoding - image_writer_processes: Number of processes for image writing - image_writer_threads: Number of threads for image writing + repo_id: Repository ID (e.g., "username/behavior-1k-task-name") + task_id: Task ID to convert push_to_hub: Whether to push to HuggingFace Hub """ + task_name = TASK_INDICES_TO_NAMES[task_id] + task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}" + + if not os.path.exists(task_folder): + raise ValueError(f"Task folder not found: {task_folder}") + # Create output directory - output_dir = Path.home() / ".cache/huggingface/lerobot" / output_repo_id + output_dir = Path.home() / ".cache/huggingface/lerobot" / repo_id output_dir.mkdir(parents=True, exist_ok=True) - - logging.info(f"Converting dataset to: {output_dir}") - - # Initialize dataset + + logging.info(f"Converting task '{task_name}' (ID: {task_id}) to: {output_dir}") + + # Initialize dataset for this task dataset = BehaviorLeRobotDatasetV3.create( - repo_id=output_repo_id, - root=output_dir, - fps=fps, - robot_type="R1Pro", - use_videos=include_videos, - video_backend="pyav", - batch_encoding_size=batch_encoding_size, - image_writer_processes=image_writer_processes, - image_writer_threads=image_writer_threads, + repo_id=repo_id, + fps=FPS, + features=BEHAVIOR_DATASET_FEATURES, + robot_type=ROBOT_TYPE, ) - - # Determine which tasks to process - if task_names is None: - task_names = list(TASK_NAMES_TO_INDICES.keys()) - - task_ids = [TASK_NAMES_TO_INDICES[name] for name in task_names] - - # Process each task - total_episodes = 0 - for task_id in tqdm(task_ids, desc="Processing tasks"): - task_name = TASK_INDICES_TO_NAMES[task_id] - task_folder = f"{data_folder}/2025-challenge-rawdata/task-{task_id:04d}" - - if not os.path.exists(task_folder): - logging.warning(f"Task folder not found: {task_folder}") - continue - - # Find all episodes for this task - if episode_ids is not None: - # Use specified episode IDs - task_episode_ids = [eid for eid in episode_ids if eid // 10000 == task_id] - else: - # Find all episodes in the task folder - task_episode_ids = [] - for filename in os.listdir(task_folder): - if filename.startswith("episode_") and filename.endswith(".hdf5"): - eid = int(filename.split("_")[1].split(".")[0]) - task_episode_ids.append(eid) - task_episode_ids.sort() - - # Limit episodes if requested - if max_episodes_per_task is not None: - task_episode_ids = task_episode_ids[:max_episodes_per_task] - - logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}") - - # Convert each episode - for demo_id in tqdm(task_episode_ids, desc=f"Task {task_name}", leave=False): - try: - convert_episode( - data_folder=data_folder, - output_repo_id=output_repo_id, - task_id=task_id, - demo_id=demo_id, - dataset=dataset, - include_videos=include_videos, - include_segmentation=include_segmentation, - ) - total_episodes += 1 - except Exception as e: - logging.error(f"Failed to convert episode {demo_id}: {e}") - continue - - logging.info(f"Converted {total_episodes} episodes total") - + + # Find all episodes in the task folder + task_episode_ids = [] + for filename in os.listdir(task_folder): + if filename.startswith("episode_") and filename.endswith(".hdf5"): + eid = int(filename.split("_")[1].split(".")[0]) + task_episode_ids.append(eid) + task_episode_ids.sort() + + logging.info(f"Processing {len(task_episode_ids)} episodes for task {task_name}") + + # Convert each episode + episodes_converted = 0 + for demo_id in tqdm(task_episode_ids, desc="Converting episodes"): + convert_episode( + data_folder=data_folder, + task_id=task_id, + demo_id=demo_id, + dataset=dataset, + include_videos=True, + include_segmentation=True, + ) + episodes_converted += 1 + + logging.info(f"Converted {episodes_converted} episodes for task {task_name}") + # Finalize dataset - logging.info("Finalizing dataset...") + logging.info(f"Finalizing dataset for task {task_name}...") dataset.finalize() - + # Push to hub if requested if push_to_hub: - logging.info("Pushing dataset to HuggingFace Hub...") - dataset.push_to_hub( - private=True, - license="apache-2.0", - ) - + logging.info(f"Pushing task {task_name} dataset to HuggingFace Hub...") + dataset.push_to_hub() + logging.info("Conversion complete!") def main(): - parser = argparse.ArgumentParser(description="Convert BEHAVIOR-1K data to LeRobotDataset v3.0") - parser.add_argument("--data_folder", type=str, required=True, help="Path to the data folder") - parser.add_argument("--output_repo_id", type=str, required=True, - help="Output repository ID (e.g., 'username/behavior-dataset-v3')") - parser.add_argument("--task_names", type=str, nargs="+", default=None, - help="Task names to convert (default: all)") - parser.add_argument("--episode_ids", type=int, nargs="+", default=None, - help="Specific episode IDs to convert") - parser.add_argument("--max_episodes_per_task", type=int, default=None, - help="Maximum episodes per task to convert") - parser.add_argument("--no_videos", action="store_true", - help="Exclude video data") - parser.add_argument("--no_segmentation", action="store_true", - help="Exclude segmentation data") - parser.add_argument("--fps", type=int, default=30, - help="Frames per second (default: 30)") - parser.add_argument("--batch_encoding_size", type=int, default=1, - help="Number of episodes to batch before encoding videos") - parser.add_argument("--image_writer_processes", type=int, default=0, - help="Number of processes for async image writing") - parser.add_argument("--image_writer_threads", type=int, default=4, - help="Number of threads for image writing") - parser.add_argument("--push_to_hub", action="store_true", - help="Push dataset to HuggingFace Hub") - + parser = argparse.ArgumentParser(description="Convert a single BEHAVIOR-1K task to LeRobotDataset v3.0") + parser.add_argument("--data-folder", type=str, required=True, help="Path to the data folder") + parser.add_argument( + "--repo-id", + type=str, + required=True, + help="Output repository ID (e.g., 'username/behavior-1k-assembling-gift-baskets')", + ) + parser.add_argument( + "--task-id", type=int, required=True, help="Task ID to convert (e.g., 0 for assembling_gift_baskets)" + ) + parser.add_argument( + "--push-to-hub", action="store_true", help="Push dataset to HuggingFace Hub after conversion" + ) + args = parser.parse_args() - - # Convert dataset - convert_dataset( + + # Convert single task to dataset + convert_task_to_dataset( data_folder=args.data_folder, - output_repo_id=args.output_repo_id, - task_names=args.task_names, - episode_ids=args.episode_ids, - max_episodes_per_task=args.max_episodes_per_task, - include_videos=not args.no_videos, - include_segmentation=not args.no_segmentation, - fps=args.fps, - batch_encoding_size=args.batch_encoding_size, - image_writer_processes=args.image_writer_processes, - image_writer_threads=args.image_writer_threads, + repo_id=args.repo_id, + task_id=args.task_id, push_to_hub=args.push_to_hub, ) - if __name__ == "__main__":