diff --git a/benchmarks/video/benchmark.py b/benchmarks/video/benchmark.py deleted file mode 100644 index d9e5e62bb..000000000 --- a/benchmarks/video/benchmark.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import threading -import time -from contextlib import ContextDecorator - - -class TimeBenchmark(ContextDecorator): - """ - Measures execution time using a context manager or decorator. - - This class supports both context manager and decorator usage, and is thread-safe for multithreaded - environments. - - Args: - print: If True, prints the elapsed time upon exiting the context or completing the function. Defaults - to False. - - Examples: - - Using as a context manager: - - >>> benchmark = TimeBenchmark() - >>> with benchmark: - ... time.sleep(1) - >>> print(f"Block took {benchmark.result:.4f} seconds") - Block took approximately 1.0000 seconds - - Using with multithreading: - - ```python - import threading - - benchmark = TimeBenchmark() - - - def context_manager_example(): - with benchmark: - time.sleep(0.01) - print(f"Block took {benchmark.result_ms:.2f} milliseconds") - - - threads = [] - for _ in range(3): - t1 = threading.Thread(target=context_manager_example) - threads.append(t1) - - for t in threads: - t.start() - - for t in threads: - t.join() - ``` - Expected output: - Block took approximately 10.00 milliseconds - Block took approximately 10.00 milliseconds - Block took approximately 10.00 milliseconds - """ - - def __init__(self, print=False): - self.local = threading.local() - self.print_time = print - - def __enter__(self): - self.local.start_time = time.perf_counter() - return self - - def __exit__(self, *exc): - self.local.end_time = time.perf_counter() - self.local.elapsed_time = self.local.end_time - self.local.start_time - if self.print_time: - print(f"Elapsed time: {self.local.elapsed_time:.4f} seconds") - return False - - @property - def result(self): - return getattr(self.local, "elapsed_time", None) - - @property - def result_ms(self): - return self.result * 1e3 diff --git a/benchmarks/video/capture_camera_feed.py b/benchmarks/video/capture_camera_feed.py deleted file mode 100755 index 8f8530532..000000000 --- a/benchmarks/video/capture_camera_feed.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Capture video feed from a camera as raw images.""" - -import argparse -import datetime as dt -import os -import time -from pathlib import Path - -import cv2 -import rerun as rr - -# see https://rerun.io/docs/howto/visualization/limit-ram -RERUN_MEMORY_LIMIT = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "5%") - - -def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int, duration: int): - rr.init("lerobot_capture_camera_feed") - rr.spawn(memory_limit=RERUN_MEMORY_LIMIT) - - now = dt.datetime.now() - capture_dir = output_dir / f"{now:%Y-%m-%d}" / f"{now:%H-%M-%S}" - if not capture_dir.exists(): - capture_dir.mkdir(parents=True, exist_ok=True) - - # Opens the default webcam - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - print("Error: Could not open video stream.") - return - - cap.set(cv2.CAP_PROP_FPS, fps) - cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) - - frame_index = 0 - start_time = time.time() - while time.time() - start_time < duration: - ret, frame = cap.read() - - if not ret: - print("Error: Could not read frame.") - break - rr.log("video/stream", rr.Image(frame), static=True) - cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame) - frame_index += 1 - - # Release the capture - cap.release() - - # TODO(Steven): Add a graceful shutdown via a close() method for the Viewer context, though not currently supported in the Rerun API. - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--output-dir", - type=Path, - default=Path("outputs/cam_capture/"), - help="Directory where the capture images are written. A subfolder named with the current date & time will be created inside it for each capture.", - ) - parser.add_argument( - "--fps", - type=int, - default=30, - help="Frames Per Second of the capture.", - ) - parser.add_argument( - "--width", - type=int, - default=1280, - help="Width of the captured images.", - ) - parser.add_argument( - "--height", - type=int, - default=720, - help="Height of the captured images.", - ) - parser.add_argument( - "--duration", - type=int, - default=20, - help="Duration in seconds for which the video stream should be captured.", - ) - args = parser.parse_args() - display_and_save_video_stream(**vars(args)) diff --git a/benchmarks/video/run_video_benchmark.py b/benchmarks/video/run_video_benchmark.py index 9f34b2273..064a84b48 100644 --- a/benchmarks/video/run_video_benchmark.py +++ b/benchmarks/video/run_video_benchmark.py @@ -21,11 +21,13 @@ See the provided README.md or run `python benchmark/video/run_video_benchmark.py import argparse import datetime as dt +import itertools import random import shutil from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path +from threading import Lock import einops import numpy as np @@ -35,13 +37,13 @@ import torch from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity from tqdm import tqdm -from benchmarks.video.benchmark import TimeBenchmark from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.video_utils import ( - decode_video_frames_torchvision, + decode_video_frames, encode_video_frames, ) from lerobot.utils.constants import OBS_IMAGE +from lerobot.utils.utils import TimerManager BASE_ENCODING = OrderedDict( [ @@ -86,7 +88,7 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t frames = [] for ts in timestamps: idx = int(ts * fps) - frame = PIL.Image.open(imgs_dir / f"frame_{idx:06d}.png") + frame = PIL.Image.open(imgs_dir / f"frame-{idx:06d}.png") frame = torch.from_numpy(np.array(frame)) frame = frame.type(torch.float32) / 255 frame = einops.rearrange(frame, "h w c -> c h w") @@ -97,21 +99,21 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t def save_decoded_frames( imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int ) -> None: - if save_dir.exists() and len(list(save_dir.glob("frame_*.png"))) == len(timestamps): + if save_dir.exists() and len(list(save_dir.glob("frame-*.png"))) == len(timestamps): return save_dir.mkdir(parents=True, exist_ok=True) for i, ts in enumerate(timestamps): idx = int(ts * fps) frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy() - PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame_{idx:06d}_decoded.png") - shutil.copyfile(imgs_dir / f"frame_{idx:06d}.png", save_dir / f"frame_{idx:06d}_original.png") + PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame-{idx:06d}_decoded.png") + shutil.copyfile(imgs_dir / f"frame-{idx:06d}.png", save_dir / f"frame-{idx:06d}_original.png") def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None: episode_index = 0 ep_num_images = dataset.meta.episodes["length"][episode_index] - if imgs_dir.exists() and len(list(imgs_dir.glob("frame_*.png"))) == ep_num_images: + if imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images: return imgs_dir.mkdir(parents=True, exist_ok=True) @@ -125,7 +127,7 @@ def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None: tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False) ): img = item[img_keys[0]] - img.save(str(imgs_dir / f"frame_{i:06d}.png"), quality=100) + img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100) if i >= ep_num_images - 1: break @@ -149,18 +151,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis return [idx / fps for idx in frame_indexes] -def decode_video_frames( - video_path: str, - timestamps: list[float], - tolerance_s: float, - backend: str, -) -> torch.Tensor: - if backend in ["pyav", "video_reader"]: - return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend) - else: - raise NotImplementedError(backend) - - def benchmark_decoding( imgs_dir: Path, video_path: Path, @@ -172,8 +162,8 @@ def benchmark_decoding( num_workers: int = 4, save_frames: bool = False, ) -> dict: - def process_sample(sample: int): - time_benchmark = TimeBenchmark() + def process_sample(sample: int, lock: Lock): + time_benchmark = TimerManager(log=False) timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps) num_frames = len(timestamps) result = { @@ -182,13 +172,13 @@ def benchmark_decoding( "mse_values": [], } - with time_benchmark: + with time_benchmark, lock: frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend) - result["load_time_video_ms"] = time_benchmark.result_ms / num_frames + result["load_time_video_ms"] = (time_benchmark.last * 1000) / num_frames with time_benchmark: original_frames = load_original_frames(imgs_dir, timestamps, fps) - result["load_time_images_ms"] = time_benchmark.result_ms / num_frames + result["load_time_images_ms"] = (time_benchmark.last * 1000) / num_frames frames_np, original_frames_np = frames.numpy(), original_frames.numpy() for i in range(num_frames): @@ -215,8 +205,10 @@ def benchmark_decoding( # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.). # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples. # As these samples are independent, we run them in parallel threads to speed up the benchmark. + # Use a single shared lock for all worker threads + shared_lock = Lock() with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = [executor.submit(process_sample, i) for i in range(num_samples)] + futures = [executor.submit(process_sample, i, shared_lock) for i in range(num_samples)] for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False): result = future.result() load_times_video_ms.append(result["load_time_video_ms"]) @@ -358,24 +350,27 @@ def main( imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") # We only use the first episode save_first_episode(imgs_dir, dataset) - for key, values in tqdm(encoding_benchmarks.items(), desc="encodings (g, crf)", leave=False): - for value in tqdm(values, desc=f"encodings ({key})", leave=False): - encoding_cfg = BASE_ENCODING.copy() - encoding_cfg["vcodec"] = video_codec - encoding_cfg["pix_fmt"] = pixel_format + for duet in [ + dict(zip(encoding_benchmarks.keys(), unique_combination, strict=False)) + for unique_combination in itertools.product(*encoding_benchmarks.values()) + ]: + encoding_cfg = BASE_ENCODING.copy() + encoding_cfg["vcodec"] = video_codec + encoding_cfg["pix_fmt"] = pixel_format + for key, value in duet.items(): encoding_cfg[key] = value - args_path = Path("_".join(str(value) for value in encoding_cfg.values())) - video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4" - benchmark_table += benchmark_encoding_decoding( - dataset, - video_path, - imgs_dir, - encoding_cfg, - decoding_benchmarks, - num_samples, - num_workers, - save_frames, - ) + args_path = Path("_".join(str(value) for value in encoding_cfg.values())) + video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4" + benchmark_table += benchmark_encoding_decoding( + dataset, + video_path, + imgs_dir, + encoding_cfg, + decoding_benchmarks, + num_samples, + num_workers, + save_frames, + ) # Save intermediate results benchmark_df = pd.DataFrame(benchmark_table, columns=headers) @@ -409,9 +404,9 @@ if __name__ == "__main__": nargs="*", default=[ "lerobot/pusht_image", - "aliberts/aloha_mobile_shrimp_image", - "aliberts/paris_street", - "aliberts/kitchen", + "lerobot/aloha_mobile_shrimp_image", + "lerobot/paris_street", + "lerobot/kitchen", ], help="Datasets repo-ids to test against. First episodes only are used. Must be images.", ) @@ -419,7 +414,7 @@ if __name__ == "__main__": "--vcodec", type=str, nargs="*", - default=["libx264", "hevc", "libsvtav1"], + default=["h264", "hevc", "libsvtav1"], help="Video codecs to be tested", ) parser.add_argument( @@ -468,7 +463,7 @@ if __name__ == "__main__": "--backends", type=str, nargs="*", - default=["pyav", "video_reader"], + default=["torchcodec", "pyav"], help="Torchvision decoding backend to be tested.", ) parser.add_argument( diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 765f6698d..2f9715ce1 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -47,8 +47,8 @@ - sections: - local: envhub title: Environments from the Hub - - local: il_sim - title: Imitation Learning in Sim + - local: envhub_leisaac + title: Control & Train Robots in Sim (LeIsaac) - local: libero title: Using Libero - local: metaworld diff --git a/docs/source/async.mdx b/docs/source/async.mdx index be10f8baf..e3a11609c 100644 --- a/docs/source/async.mdx +++ b/docs/source/async.mdx @@ -196,7 +196,7 @@ client_cfg = RobotClientConfig( server_address="localhost:8080", policy_device="mps", policy_type="smolvla", - pretrained_name_or_path="fracapuano/smolvla_async", + pretrained_name_or_path="/smolvla_async", chunk_size_threshold=0.5, actions_per_chunk=50, # make sure this is less than the max actions of the policy ) diff --git a/docs/source/envhub_leisaac.mdx b/docs/source/envhub_leisaac.mdx new file mode 100644 index 000000000..ff848d415 --- /dev/null +++ b/docs/source/envhub_leisaac.mdx @@ -0,0 +1,301 @@ +# LeIsaac × LeRobot EnvHub + +LeRobot EnvHub now supports **imitation learning in simulation** with LeIsaac. +Spin up everyday manipulation tasks, teleoperate the robot, collect demos, push them to the Hub, and train policies in LeRobot — all in one loop. + +[LeIsaac](https://github.com/LightwheelAI/leisaac) integrates with IsaacLab and the SO101 Leader/Follower setup to provide: + +- 🕹️ **Teleoperation-first workflows** for data collection +- 📦 **Built-in data conversion** ready for LeRobot training +- 🤖 **Everyday skills** like picking oranges, lifting cubes, cleaning tables, and folding cloth +- ☁️ **Ongoing upgrades** from [LightWheel](https://lightwheel.ai/): cloud simulation, EnvHub support, Sim2Real tooling, and more + +Below you’ll find the currently supported LeIsaac tasks exposed through LeRobot EnvHub. + +# Available Environments + +The following table lists all available tasks and environments in LeIsaac x LeRobot Envhub. You can also get the latest list of environments by running the following command: + +```bash +python scripts/environments/list_envs.py +``` + +| Task | Environment ID | Task Description | Related Robot | +| :-------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------- | +| | [LeIsaac-SO101-PickOrange-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/pick_orange_env_cfg.py)

[LeIsaac-SO101-PickOrange-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/direct/pick_orange_env.py) | Pick three oranges and put them into the plate, then reset the arm to rest state. | Single-Arm SO101 Follower | +| | [LeIsaac-SO101-LiftCube-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/lift_cube_env_cfg.py)

[LeIsaac-SO101-LiftCube-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/direct/lift_cube_env.py) | Lift the red cube up. | Single-Arm SO101 Follower | +| | [LeIsaac-SO101-CleanToyTable-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_env_cfg.py)

[LeIsaac-SO101-CleanToyTable-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_bi_arm_env_cfg.py)

[LeIsaac-SO101-CleanToyTable-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/direct/clean_toy_table_bi_arm_env.py) | Pick two letter e objects into the box, and reset the arm to rest state. | Single-Arm SO101 Follower

Bi-Arm SO101 Follower | +| | [LeIsaac-SO101-FoldCloth-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/fold_cloth_bi_arm_env_cfg.py)

[LeIsaac-SO101-FoldCloth-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/direct/fold_cloth_bi_arm_env.py) | Fold the cloth, and reset the arm to rest state.

_Note: Only the DirectEnv support check_success in this task._ | Bi-Arm SO101 Follower | + +# Load LeIsaac directly in LeRobot with one line of code + +> EnvHub: Share LeIsaac environments through HuggingFace + +[EnvHub](https://huggingface.co/docs/lerobot/envhub) is our reproducible environment hub, spin up a packaged simulation with one line, experiment immediately, and publish your own tasks for the community. + +LeIsaac offers EnvHub support so you can consume or share tasks with only a few commands. + +