diff --git a/benchmarks/video/benchmark.py b/benchmarks/video/benchmark.py deleted file mode 100644 index d9e5e62bb..000000000 --- a/benchmarks/video/benchmark.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import threading -import time -from contextlib import ContextDecorator - - -class TimeBenchmark(ContextDecorator): - """ - Measures execution time using a context manager or decorator. - - This class supports both context manager and decorator usage, and is thread-safe for multithreaded - environments. - - Args: - print: If True, prints the elapsed time upon exiting the context or completing the function. Defaults - to False. - - Examples: - - Using as a context manager: - - >>> benchmark = TimeBenchmark() - >>> with benchmark: - ... time.sleep(1) - >>> print(f"Block took {benchmark.result:.4f} seconds") - Block took approximately 1.0000 seconds - - Using with multithreading: - - ```python - import threading - - benchmark = TimeBenchmark() - - - def context_manager_example(): - with benchmark: - time.sleep(0.01) - print(f"Block took {benchmark.result_ms:.2f} milliseconds") - - - threads = [] - for _ in range(3): - t1 = threading.Thread(target=context_manager_example) - threads.append(t1) - - for t in threads: - t.start() - - for t in threads: - t.join() - ``` - Expected output: - Block took approximately 10.00 milliseconds - Block took approximately 10.00 milliseconds - Block took approximately 10.00 milliseconds - """ - - def __init__(self, print=False): - self.local = threading.local() - self.print_time = print - - def __enter__(self): - self.local.start_time = time.perf_counter() - return self - - def __exit__(self, *exc): - self.local.end_time = time.perf_counter() - self.local.elapsed_time = self.local.end_time - self.local.start_time - if self.print_time: - print(f"Elapsed time: {self.local.elapsed_time:.4f} seconds") - return False - - @property - def result(self): - return getattr(self.local, "elapsed_time", None) - - @property - def result_ms(self): - return self.result * 1e3 diff --git a/benchmarks/video/capture_camera_feed.py b/benchmarks/video/capture_camera_feed.py deleted file mode 100755 index 8f8530532..000000000 --- a/benchmarks/video/capture_camera_feed.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Capture video feed from a camera as raw images.""" - -import argparse -import datetime as dt -import os -import time -from pathlib import Path - -import cv2 -import rerun as rr - -# see https://rerun.io/docs/howto/visualization/limit-ram -RERUN_MEMORY_LIMIT = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "5%") - - -def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int, duration: int): - rr.init("lerobot_capture_camera_feed") - rr.spawn(memory_limit=RERUN_MEMORY_LIMIT) - - now = dt.datetime.now() - capture_dir = output_dir / f"{now:%Y-%m-%d}" / f"{now:%H-%M-%S}" - if not capture_dir.exists(): - capture_dir.mkdir(parents=True, exist_ok=True) - - # Opens the default webcam - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - print("Error: Could not open video stream.") - return - - cap.set(cv2.CAP_PROP_FPS, fps) - cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) - - frame_index = 0 - start_time = time.time() - while time.time() - start_time < duration: - ret, frame = cap.read() - - if not ret: - print("Error: Could not read frame.") - break - rr.log("video/stream", rr.Image(frame), static=True) - cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame) - frame_index += 1 - - # Release the capture - cap.release() - - # TODO(Steven): Add a graceful shutdown via a close() method for the Viewer context, though not currently supported in the Rerun API. - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--output-dir", - type=Path, - default=Path("outputs/cam_capture/"), - help="Directory where the capture images are written. A subfolder named with the current date & time will be created inside it for each capture.", - ) - parser.add_argument( - "--fps", - type=int, - default=30, - help="Frames Per Second of the capture.", - ) - parser.add_argument( - "--width", - type=int, - default=1280, - help="Width of the captured images.", - ) - parser.add_argument( - "--height", - type=int, - default=720, - help="Height of the captured images.", - ) - parser.add_argument( - "--duration", - type=int, - default=20, - help="Duration in seconds for which the video stream should be captured.", - ) - args = parser.parse_args() - display_and_save_video_stream(**vars(args)) diff --git a/benchmarks/video/run_video_benchmark.py b/benchmarks/video/run_video_benchmark.py index 9f34b2273..064a84b48 100644 --- a/benchmarks/video/run_video_benchmark.py +++ b/benchmarks/video/run_video_benchmark.py @@ -21,11 +21,13 @@ See the provided README.md or run `python benchmark/video/run_video_benchmark.py import argparse import datetime as dt +import itertools import random import shutil from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path +from threading import Lock import einops import numpy as np @@ -35,13 +37,13 @@ import torch from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity from tqdm import tqdm -from benchmarks.video.benchmark import TimeBenchmark from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.video_utils import ( - decode_video_frames_torchvision, + decode_video_frames, encode_video_frames, ) from lerobot.utils.constants import OBS_IMAGE +from lerobot.utils.utils import TimerManager BASE_ENCODING = OrderedDict( [ @@ -86,7 +88,7 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t frames = [] for ts in timestamps: idx = int(ts * fps) - frame = PIL.Image.open(imgs_dir / f"frame_{idx:06d}.png") + frame = PIL.Image.open(imgs_dir / f"frame-{idx:06d}.png") frame = torch.from_numpy(np.array(frame)) frame = frame.type(torch.float32) / 255 frame = einops.rearrange(frame, "h w c -> c h w") @@ -97,21 +99,21 @@ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> t def save_decoded_frames( imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int ) -> None: - if save_dir.exists() and len(list(save_dir.glob("frame_*.png"))) == len(timestamps): + if save_dir.exists() and len(list(save_dir.glob("frame-*.png"))) == len(timestamps): return save_dir.mkdir(parents=True, exist_ok=True) for i, ts in enumerate(timestamps): idx = int(ts * fps) frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy() - PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame_{idx:06d}_decoded.png") - shutil.copyfile(imgs_dir / f"frame_{idx:06d}.png", save_dir / f"frame_{idx:06d}_original.png") + PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame-{idx:06d}_decoded.png") + shutil.copyfile(imgs_dir / f"frame-{idx:06d}.png", save_dir / f"frame-{idx:06d}_original.png") def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None: episode_index = 0 ep_num_images = dataset.meta.episodes["length"][episode_index] - if imgs_dir.exists() and len(list(imgs_dir.glob("frame_*.png"))) == ep_num_images: + if imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images: return imgs_dir.mkdir(parents=True, exist_ok=True) @@ -125,7 +127,7 @@ def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None: tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False) ): img = item[img_keys[0]] - img.save(str(imgs_dir / f"frame_{i:06d}.png"), quality=100) + img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100) if i >= ep_num_images - 1: break @@ -149,18 +151,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis return [idx / fps for idx in frame_indexes] -def decode_video_frames( - video_path: str, - timestamps: list[float], - tolerance_s: float, - backend: str, -) -> torch.Tensor: - if backend in ["pyav", "video_reader"]: - return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend) - else: - raise NotImplementedError(backend) - - def benchmark_decoding( imgs_dir: Path, video_path: Path, @@ -172,8 +162,8 @@ def benchmark_decoding( num_workers: int = 4, save_frames: bool = False, ) -> dict: - def process_sample(sample: int): - time_benchmark = TimeBenchmark() + def process_sample(sample: int, lock: Lock): + time_benchmark = TimerManager(log=False) timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps) num_frames = len(timestamps) result = { @@ -182,13 +172,13 @@ def benchmark_decoding( "mse_values": [], } - with time_benchmark: + with time_benchmark, lock: frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend) - result["load_time_video_ms"] = time_benchmark.result_ms / num_frames + result["load_time_video_ms"] = (time_benchmark.last * 1000) / num_frames with time_benchmark: original_frames = load_original_frames(imgs_dir, timestamps, fps) - result["load_time_images_ms"] = time_benchmark.result_ms / num_frames + result["load_time_images_ms"] = (time_benchmark.last * 1000) / num_frames frames_np, original_frames_np = frames.numpy(), original_frames.numpy() for i in range(num_frames): @@ -215,8 +205,10 @@ def benchmark_decoding( # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.). # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples. # As these samples are independent, we run them in parallel threads to speed up the benchmark. + # Use a single shared lock for all worker threads + shared_lock = Lock() with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = [executor.submit(process_sample, i) for i in range(num_samples)] + futures = [executor.submit(process_sample, i, shared_lock) for i in range(num_samples)] for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False): result = future.result() load_times_video_ms.append(result["load_time_video_ms"]) @@ -358,24 +350,27 @@ def main( imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") # We only use the first episode save_first_episode(imgs_dir, dataset) - for key, values in tqdm(encoding_benchmarks.items(), desc="encodings (g, crf)", leave=False): - for value in tqdm(values, desc=f"encodings ({key})", leave=False): - encoding_cfg = BASE_ENCODING.copy() - encoding_cfg["vcodec"] = video_codec - encoding_cfg["pix_fmt"] = pixel_format + for duet in [ + dict(zip(encoding_benchmarks.keys(), unique_combination, strict=False)) + for unique_combination in itertools.product(*encoding_benchmarks.values()) + ]: + encoding_cfg = BASE_ENCODING.copy() + encoding_cfg["vcodec"] = video_codec + encoding_cfg["pix_fmt"] = pixel_format + for key, value in duet.items(): encoding_cfg[key] = value - args_path = Path("_".join(str(value) for value in encoding_cfg.values())) - video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4" - benchmark_table += benchmark_encoding_decoding( - dataset, - video_path, - imgs_dir, - encoding_cfg, - decoding_benchmarks, - num_samples, - num_workers, - save_frames, - ) + args_path = Path("_".join(str(value) for value in encoding_cfg.values())) + video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4" + benchmark_table += benchmark_encoding_decoding( + dataset, + video_path, + imgs_dir, + encoding_cfg, + decoding_benchmarks, + num_samples, + num_workers, + save_frames, + ) # Save intermediate results benchmark_df = pd.DataFrame(benchmark_table, columns=headers) @@ -409,9 +404,9 @@ if __name__ == "__main__": nargs="*", default=[ "lerobot/pusht_image", - "aliberts/aloha_mobile_shrimp_image", - "aliberts/paris_street", - "aliberts/kitchen", + "lerobot/aloha_mobile_shrimp_image", + "lerobot/paris_street", + "lerobot/kitchen", ], help="Datasets repo-ids to test against. First episodes only are used. Must be images.", ) @@ -419,7 +414,7 @@ if __name__ == "__main__": "--vcodec", type=str, nargs="*", - default=["libx264", "hevc", "libsvtav1"], + default=["h264", "hevc", "libsvtav1"], help="Video codecs to be tested", ) parser.add_argument( @@ -468,7 +463,7 @@ if __name__ == "__main__": "--backends", type=str, nargs="*", - default=["pyav", "video_reader"], + default=["torchcodec", "pyav"], help="Torchvision decoding backend to be tested.", ) parser.add_argument( diff --git a/docs/source/async.mdx b/docs/source/async.mdx index be10f8baf..e3a11609c 100644 --- a/docs/source/async.mdx +++ b/docs/source/async.mdx @@ -196,7 +196,7 @@ client_cfg = RobotClientConfig( server_address="localhost:8080", policy_device="mps", policy_type="smolvla", - pretrained_name_or_path="fracapuano/smolvla_async", + pretrained_name_or_path="/smolvla_async", chunk_size_threshold=0.5, actions_per_chunk=50, # make sure this is less than the max actions of the policy ) diff --git a/docs/source/envhub_leisaac.mdx b/docs/source/envhub_leisaac.mdx index 5c951b564..ff848d415 100644 --- a/docs/source/envhub_leisaac.mdx +++ b/docs/source/envhub_leisaac.mdx @@ -139,7 +139,7 @@ from lerobot.teleoperators import ( # noqa: F401 make_teleoperator_from_config, so101_leader, ) -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import init_logging from lerobot.envs.factory import make_env @@ -196,7 +196,7 @@ def teleop_loop(teleop: Teleoperator, env: gym.Env, fps: int): obs, info = env.reset() dt_s = time.perf_counter() - loop_start - busy_wait(1 / fps - dt_s) + precise_sleep(1 / fps - dt_s) loop_s = time.perf_counter() - loop_start print(f"\ntime: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)") diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx index 000df2a19..a7b4fe570 100644 --- a/docs/source/il_robots.mdx +++ b/docs/source/il_robots.mdx @@ -393,7 +393,7 @@ import time from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig from lerobot.robots.so100_follower.so100_follower import SO100Follower -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import log_say episode_idx = 0 @@ -415,7 +415,7 @@ for idx in range(dataset.num_frames): } robot.send_action(action) - busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0)) + precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0)) robot.disconnect() ``` diff --git a/examples/backward_compatibility/replay.py b/examples/backward_compatibility/replay.py index 6bca0570f..ed52a24c9 100644 --- a/examples/backward_compatibility/replay.py +++ b/examples/backward_compatibility/replay.py @@ -45,7 +45,7 @@ from lerobot.robots import ( # noqa: F401 so101_follower, ) from lerobot.utils.constants import ACTION -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import ( init_logging, log_say, @@ -97,7 +97,7 @@ def replay(cfg: ReplayConfig): robot.send_action(action) dt_s = time.perf_counter() - start_episode_t - busy_wait(1 / dataset.fps - dt_s) + precise_sleep(1 / dataset.fps - dt_s) robot.disconnect() diff --git a/examples/dataset/load_lerobot_dataset.py b/examples/dataset/load_lerobot_dataset.py index a69169814..4fda25884 100644 --- a/examples/dataset/load_lerobot_dataset.py +++ b/examples/dataset/load_lerobot_dataset.py @@ -34,105 +34,106 @@ from huggingface_hub import HfApi import lerobot from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata -# We ported a number of existing datasets ourselves, use this to see the list: -print("List of available datasets:") -pprint(lerobot.available_datasets) -# You can also browse through the datasets created/ported by the community on the hub using the hub api: -hub_api = HfApi() -repo_ids = [info.id for info in hub_api.list_datasets(task_categories="robotics", tags=["LeRobot"])] -pprint(repo_ids) +def main(): + # We ported a number of existing datasets ourselves, use this to see the list: + print("List of available datasets:") + pprint(lerobot.available_datasets) -# Or simply explore them in your web browser directly at: -# https://huggingface.co/datasets?other=LeRobot + # You can also browse through the datasets created/ported by the community on the hub using the hub api: + hub_api = HfApi() + repo_ids = [info.id for info in hub_api.list_datasets(task_categories="robotics", tags=["LeRobot"])] + pprint(repo_ids) -# Let's take this one for this example -repo_id = "lerobot/aloha_mobile_cabinet" -# We can have a look and fetch its metadata to know more about it: -ds_meta = LeRobotDatasetMetadata(repo_id) + # Or simply explore them in your web browser directly at: + # https://huggingface.co/datasets?other=LeRobot -# By instantiating just this class, you can quickly access useful information about the content and the -# structure of the dataset without downloading the actual data yet (only metadata files — which are -# lightweight). -print(f"Total number of episodes: {ds_meta.total_episodes}") -print(f"Average number of frames per episode: {ds_meta.total_frames / ds_meta.total_episodes:.3f}") -print(f"Frames per second used during data collection: {ds_meta.fps}") -print(f"Robot type: {ds_meta.robot_type}") -print(f"keys to access images from cameras: {ds_meta.camera_keys=}\n") + # Let's take this one for this example + repo_id = "lerobot/aloha_mobile_cabinet" + # We can have a look and fetch its metadata to know more about it: + ds_meta = LeRobotDatasetMetadata(repo_id) -print("Tasks:") -print(ds_meta.tasks) -print("Features:") -pprint(ds_meta.features) + # By instantiating just this class, you can quickly access useful information about the content and the + # structure of the dataset without downloading the actual data yet (only metadata files — which are + # lightweight). + print(f"Total number of episodes: {ds_meta.total_episodes}") + print(f"Average number of frames per episode: {ds_meta.total_frames / ds_meta.total_episodes:.3f}") + print(f"Frames per second used during data collection: {ds_meta.fps}") + print(f"Robot type: {ds_meta.robot_type}") + print(f"keys to access images from cameras: {ds_meta.camera_keys=}\n") -# You can also get a short summary by simply printing the object: -print(ds_meta) + print("Tasks:") + print(ds_meta.tasks) + print("Features:") + pprint(ds_meta.features) -# You can then load the actual dataset from the hub. -# Either load any subset of episodes: -dataset = LeRobotDataset(repo_id, episodes=[0, 10, 11, 23]) + # You can also get a short summary by simply printing the object: + print(ds_meta) -# And see how many frames you have: -print(f"Selected episodes: {dataset.episodes}") -print(f"Number of episodes selected: {dataset.num_episodes}") -print(f"Number of frames selected: {dataset.num_frames}") + # You can then load the actual dataset from the hub. + # Either load any subset of episodes: + dataset = LeRobotDataset(repo_id, episodes=[0, 10, 11, 23]) -# Or simply load the entire dataset: -dataset = LeRobotDataset(repo_id) -print(f"Number of episodes selected: {dataset.num_episodes}") -print(f"Number of frames selected: {dataset.num_frames}") + # And see how many frames you have: + print(f"Selected episodes: {dataset.episodes}") + print(f"Number of episodes selected: {dataset.num_episodes}") + print(f"Number of frames selected: {dataset.num_frames}") -# The previous metadata class is contained in the 'meta' attribute of the dataset: -print(dataset.meta) + # Or simply load the entire dataset: + dataset = LeRobotDataset(repo_id) + print(f"Number of episodes selected: {dataset.num_episodes}") + print(f"Number of frames selected: {dataset.num_frames}") -# LeRobotDataset actually wraps an underlying Hugging Face dataset -# (see https://huggingface.co/docs/datasets for more information). -print(dataset.hf_dataset) + # The previous metadata class is contained in the 'meta' attribute of the dataset: + print(dataset.meta) -# LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working -# with the latter, like iterating through the dataset. -# The __getitem__ iterates over the frames of the dataset. Since our datasets are also structured by -# episodes, you can access the frame indices of any episode using dataset.meta.episodes. Here, we access -# frame indices associated to the first episode: -episode_index = 0 -from_idx = dataset.meta.episodes["dataset_from_index"][episode_index] -to_idx = dataset.meta.episodes["dataset_to_index"][episode_index] + # LeRobotDataset actually wraps an underlying Hugging Face dataset + # (see https://huggingface.co/docs/datasets for more information). + print(dataset.hf_dataset) -# Then we grab all the image frames from the first camera: -camera_key = dataset.meta.camera_keys[0] -frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)] + # LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working + # with the latter, like iterating through the dataset. + # The __getitem__ iterates over the frames of the dataset. Since our datasets are also structured by + # episodes, you can access the frame indices of any episode using dataset.meta.episodes. Here, we access + # frame indices associated to the first episode: + episode_index = 0 + from_idx = dataset.meta.episodes["dataset_from_index"][episode_index] + to_idx = dataset.meta.episodes["dataset_to_index"][episode_index] -# The objects returned by the dataset are all torch.Tensors -print(type(frames[0])) -print(frames[0].shape) + # Then we grab all the image frames from the first camera: + camera_key = dataset.meta.camera_keys[0] + frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)] -# Since we're using pytorch, the shape is in pytorch, channel-first convention (c, h, w). -# We can compare this shape with the information available for that feature -pprint(dataset.features[camera_key]) -# In particular: -print(dataset.features[camera_key]["shape"]) -# The shape is in (h, w, c) which is a more universal format. + # The objects returned by the dataset are all torch.Tensors + print(type(frames[0])) + print(frames[0].shape) -# For many machine learning applications we need to load the history of past observations or trajectories of -# future actions. Our datasets can load previous and future frames for each key/modality, using timestamps -# differences with the current loaded frame. For instance: -delta_timestamps = { - # loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame - camera_key: [-1, -0.5, -0.20, 0], - # loads 6 state vectors: 1.5 seconds before, 1 second before, ... 200 ms, 100 ms, and current frame - "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0], - # loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future - "action": [t / dataset.fps for t in range(64)], -} -# Note that in any case, these delta_timestamps values need to be multiples of (1/fps) so that added to any -# timestamp, you still get a valid timestamp. + # Since we're using pytorch, the shape is in pytorch, channel-first convention (c, h, w). + # We can compare this shape with the information available for that feature + pprint(dataset.features[camera_key]) + # In particular: + print(dataset.features[camera_key]["shape"]) + # The shape is in (h, w, c) which is a more universal format. -dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps) -print(f"\n{dataset[0][camera_key].shape=}") # (4, c, h, w) -print(f"{dataset[0]['observation.state'].shape=}") # (6, c) -print(f"{dataset[0]['action'].shape=}\n") # (64, c) + # For many machine learning applications we need to load the history of past observations or trajectories of + # future actions. Our datasets can load previous and future frames for each key/modality, using timestamps + # differences with the current loaded frame. For instance: + delta_timestamps = { + # loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame + camera_key: [-1, -0.5, -0.20, 0], + # loads 6 state vectors: 1.5 seconds before, 1 second before, ... 200 ms, 100 ms, and current frame + "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0], + # loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future + "action": [t / dataset.fps for t in range(64)], + } + # Note that in any case, these delta_timestamps values need to be multiples of (1/fps) so that added to any + # timestamp, you still get a valid timestamp. + + dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps) + print(f"\n{dataset[0][camera_key].shape=}") # (4, c, h, w) + print(f"{dataset[0]['observation.state'].shape=}") # (6, c) + print(f"{dataset[0]['action'].shape=}\n") # (64, c) -if __name__ == "__main__": dataloader = torch.utils.data.DataLoader( dataset, num_workers=4, @@ -144,3 +145,7 @@ if __name__ == "__main__": print(f"{batch['observation.state'].shape=}") # (32, 6, c) print(f"{batch['action'].shape=}") # (32, 64, c) break + + +if __name__ == "__main__": + main() diff --git a/examples/lekiwi/evaluate.py b/examples/lekiwi/evaluate.py index 4501008d0..2f7f9f95f 100644 --- a/examples/lekiwi/evaluate.py +++ b/examples/lekiwi/evaluate.py @@ -33,83 +33,68 @@ TASK_DESCRIPTION = "My task description" HF_MODEL_ID = "/" HF_DATASET_ID = "/" -# Create the robot configuration & robot -robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") -robot = LeKiwiClient(robot_config) +def main(): + # Create the robot configuration & robot + robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") -# Create policy -policy = ACTPolicy.from_pretrained(HF_MODEL_ID) + robot = LeKiwiClient(robot_config) -# Configure the dataset features -action_features = hw_to_dataset_features(robot.action_features, ACTION) -obs_features = hw_to_dataset_features(robot.observation_features, OBS_STR) -dataset_features = {**action_features, **obs_features} + # Create policy + policy = ACTPolicy.from_pretrained(HF_MODEL_ID) -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_DATASET_ID, - fps=FPS, - features=dataset_features, - robot_type=robot.name, - use_videos=True, - image_writer_threads=4, -) + # Configure the dataset features + action_features = hw_to_dataset_features(robot.action_features, ACTION) + obs_features = hw_to_dataset_features(robot.observation_features, OBS_STR) + dataset_features = {**action_features, **obs_features} -# Build Policy Processors -preprocessor, postprocessor = make_pre_post_processors( - policy_cfg=policy, - pretrained_path=HF_MODEL_ID, - dataset_stats=dataset.meta.stats, - # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. - preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, -) - -# Connect the robot -# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` -robot.connect() - -# TODO(Steven): Update this example to use pipelines -teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="lekiwi_evaluate") - -if not robot.is_connected: - raise ValueError("Robot is not connected!") - -print("Starting evaluate loop...") -recorded_episodes = 0 -while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: - log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}") - - # Main record loop - record_loop( - robot=robot, - events=events, + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_DATASET_ID, fps=FPS, - policy=policy, - preprocessor=preprocessor, # Pass the pre and post policy processors - postprocessor=postprocessor, - dataset=dataset, - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=teleop_action_processor, - robot_action_processor=robot_action_processor, - robot_observation_processor=robot_observation_processor, + features=dataset_features, + robot_type=robot.name, + use_videos=True, + image_writer_threads=4, ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and ( - (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] - ): - log_say("Reset the environment") + # Build Policy Processors + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=policy, + pretrained_path=HF_MODEL_ID, + dataset_stats=dataset.meta.stats, + # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. + preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, + ) + + # Connect the robot + # To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` + robot.connect() + + # TODO(Steven): Update this example to use pipelines + teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="lekiwi_evaluate") + + if not robot.is_connected: + raise ValueError("Robot is not connected!") + + print("Starting evaluate loop...") + recorded_episodes = 0 + while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: + log_say(f"Running inference, recording eval episode {recorded_episodes} of {NUM_EPISODES}") + + # Main record loop record_loop( robot=robot, events=events, fps=FPS, + policy=policy, + preprocessor=preprocessor, # Pass the pre and post policy processors + postprocessor=postprocessor, + dataset=dataset, control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, @@ -118,21 +103,42 @@ while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: robot_observation_processor=robot_observation_processor, ) - if events["rerecord_episode"]: - log_say("Re-record episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and ( + (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] + ): + log_say("Reset the environment") + record_loop( + robot=robot, + events=events, + fps=FPS, + control_time_s=EPISODE_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=teleop_action_processor, + robot_action_processor=robot_action_processor, + robot_observation_processor=robot_observation_processor, + ) - # Save episode - dataset.save_episode() - recorded_episodes += 1 + if events["rerecord_episode"]: + log_say("Re-record episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -robot.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + recorded_episodes += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + robot.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/lekiwi/record.py b/examples/lekiwi/record.py index 491e1c386..67d826ccb 100644 --- a/examples/lekiwi/record.py +++ b/examples/lekiwi/record.py @@ -34,78 +34,62 @@ RESET_TIME_SEC = 10 TASK_DESCRIPTION = "My task description" HF_REPO_ID = "/" -# Create the robot and teleoperator configurations -robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") -leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm") -keyboard_config = KeyboardTeleopConfig() -# Initialize the robot and teleoperator -robot = LeKiwiClient(robot_config) -leader_arm = SO100Leader(leader_arm_config) -keyboard = KeyboardTeleop(keyboard_config) +def main(): + # Create the robot and teleoperator configurations + robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") + leader_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm") + keyboard_config = KeyboardTeleopConfig() -# TODO(Steven): Update this example to use pipelines -teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() + # Initialize the robot and teleoperator + robot = LeKiwiClient(robot_config) + leader_arm = SO100Leader(leader_arm_config) + keyboard = KeyboardTeleop(keyboard_config) -# Configure the dataset features -action_features = hw_to_dataset_features(robot.action_features, ACTION) -obs_features = hw_to_dataset_features(robot.observation_features, OBS_STR) -dataset_features = {**action_features, **obs_features} + # TODO(Steven): Update this example to use pipelines + teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors() -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_REPO_ID, - fps=FPS, - features=dataset_features, - robot_type=robot.name, - use_videos=True, - image_writer_threads=4, -) + # Configure the dataset features + action_features = hw_to_dataset_features(robot.action_features, ACTION) + obs_features = hw_to_dataset_features(robot.observation_features, OBS_STR) + dataset_features = {**action_features, **obs_features} -# Connect the robot and teleoperator -# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` -robot.connect() -leader_arm.connect() -keyboard.connect() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="lekiwi_record") - -if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected: - raise ValueError("Robot or teleop is not connected!") - -print("Starting record loop...") -recorded_episodes = 0 -while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: - log_say(f"Recording episode {recorded_episodes}") - - # Main record loop - record_loop( - robot=robot, - events=events, + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_REPO_ID, fps=FPS, - dataset=dataset, - teleop=[leader_arm, keyboard], - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=teleop_action_processor, - robot_action_processor=robot_action_processor, - robot_observation_processor=robot_observation_processor, + features=dataset_features, + robot_type=robot.name, + use_videos=True, + image_writer_threads=4, ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and ( - (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] - ): - log_say("Reset the environment") + # Connect the robot and teleoperator + # To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` + robot.connect() + leader_arm.connect() + keyboard.connect() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="lekiwi_record") + + if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected: + raise ValueError("Robot or teleop is not connected!") + + print("Starting record loop...") + recorded_episodes = 0 + while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: + log_say(f"Recording episode {recorded_episodes}") + + # Main record loop record_loop( robot=robot, events=events, fps=FPS, + dataset=dataset, teleop=[leader_arm, keyboard], - control_time_s=RESET_TIME_SEC, + control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, teleop_action_processor=teleop_action_processor, @@ -113,23 +97,45 @@ while recorded_episodes < NUM_EPISODES and not events["stop_recording"]: robot_observation_processor=robot_observation_processor, ) - if events["rerecord_episode"]: - log_say("Re-record episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and ( + (recorded_episodes < NUM_EPISODES - 1) or events["rerecord_episode"] + ): + log_say("Reset the environment") + record_loop( + robot=robot, + events=events, + fps=FPS, + teleop=[leader_arm, keyboard], + control_time_s=RESET_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=teleop_action_processor, + robot_action_processor=robot_action_processor, + robot_observation_processor=robot_observation_processor, + ) - # Save episode - dataset.save_episode() - recorded_episodes += 1 + if events["rerecord_episode"]: + log_say("Re-record episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -robot.disconnect() -leader_arm.disconnect() -keyboard.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + recorded_episodes += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + robot.disconnect() + leader_arm.disconnect() + keyboard.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/lekiwi/replay.py b/examples/lekiwi/replay.py index 3ae915286..872dacf27 100644 --- a/examples/lekiwi/replay.py +++ b/examples/lekiwi/replay.py @@ -20,42 +20,48 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.robots.lekiwi.config_lekiwi import LeKiwiClientConfig from lerobot.robots.lekiwi.lekiwi_client import LeKiwiClient from lerobot.utils.constants import ACTION -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import log_say EPISODE_IDX = 0 -# Initialize the robot config -robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") -# Initialize the robot -robot = LeKiwiClient(robot_config) +def main(): + # Initialize the robot config + robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="lekiwi") -# Fetch the dataset to replay -dataset = LeRobotDataset("/", episodes=[EPISODE_IDX]) -# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 -episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) -actions = episode_frames.select_columns(ACTION) + # Initialize the robot + robot = LeKiwiClient(robot_config) -# Connect to the robot -robot.connect() + # Fetch the dataset to replay + dataset = LeRobotDataset("/", episodes=[EPISODE_IDX]) + # Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 + episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) + actions = episode_frames.select_columns(ACTION) -if not robot.is_connected: - raise ValueError("Robot is not connected!") + # Connect to the robot + robot.connect() -print("Starting replay loop...") -log_say(f"Replaying episode {EPISODE_IDX}") -for idx in range(len(episode_frames)): - t0 = time.perf_counter() + if not robot.is_connected: + raise ValueError("Robot is not connected!") - # Get recorded action from dataset - action = { - name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) - } + print("Starting replay loop...") + log_say(f"Replaying episode {EPISODE_IDX}") + for idx in range(len(episode_frames)): + t0 = time.perf_counter() - # Send action to robot - _ = robot.send_action(action) + # Get recorded action from dataset + action = { + name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) + } - busy_wait(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0)) + # Send action to robot + _ = robot.send_action(action) -robot.disconnect() + precise_sleep(max(1.0 / dataset.fps - (time.perf_counter() - t0), 0.0)) + + robot.disconnect() + + +if __name__ == "__main__": + main() diff --git a/examples/lekiwi/teleoperate.py b/examples/lekiwi/teleoperate.py index 6b430df48..c4d20ebbe 100644 --- a/examples/lekiwi/teleoperate.py +++ b/examples/lekiwi/teleoperate.py @@ -19,54 +19,60 @@ import time from lerobot.robots.lekiwi import LeKiwiClient, LeKiwiClientConfig from lerobot.teleoperators.keyboard.teleop_keyboard import KeyboardTeleop, KeyboardTeleopConfig from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.visualization_utils import init_rerun, log_rerun_data FPS = 30 -# Create the robot and teleoperator configurations -robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi") -teleop_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm") -keyboard_config = KeyboardTeleopConfig(id="my_laptop_keyboard") -# Initialize the robot and teleoperator -robot = LeKiwiClient(robot_config) -leader_arm = SO100Leader(teleop_arm_config) -keyboard = KeyboardTeleop(keyboard_config) +def main(): + # Create the robot and teleoperator configurations + robot_config = LeKiwiClientConfig(remote_ip="172.18.134.136", id="my_lekiwi") + teleop_arm_config = SO100LeaderConfig(port="/dev/tty.usbmodem585A0077581", id="my_awesome_leader_arm") + keyboard_config = KeyboardTeleopConfig(id="my_laptop_keyboard") -# Connect to the robot and teleoperator -# To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` -robot.connect() -leader_arm.connect() -keyboard.connect() + # Initialize the robot and teleoperator + robot = LeKiwiClient(robot_config) + leader_arm = SO100Leader(teleop_arm_config) + keyboard = KeyboardTeleop(keyboard_config) -# Init rerun viewer -init_rerun(session_name="lekiwi_teleop") + # Connect to the robot and teleoperator + # To connect you already should have this script running on LeKiwi: `python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi` + robot.connect() + leader_arm.connect() + keyboard.connect() -if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected: - raise ValueError("Robot or teleop is not connected!") + # Init rerun viewer + init_rerun(session_name="lekiwi_teleop") -print("Starting teleop loop...") -while True: - t0 = time.perf_counter() + if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected: + raise ValueError("Robot or teleop is not connected!") - # Get robot observation - observation = robot.get_observation() + print("Starting teleop loop...") + while True: + t0 = time.perf_counter() - # Get teleop action - # Arm - arm_action = leader_arm.get_action() - arm_action = {f"arm_{k}": v for k, v in arm_action.items()} - # Keyboard - keyboard_keys = keyboard.get_action() - base_action = robot._from_keyboard_to_base_action(keyboard_keys) + # Get robot observation + observation = robot.get_observation() - action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action + # Get teleop action + # Arm + arm_action = leader_arm.get_action() + arm_action = {f"arm_{k}": v for k, v in arm_action.items()} + # Keyboard + keyboard_keys = keyboard.get_action() + base_action = robot._from_keyboard_to_base_action(keyboard_keys) - # Send action to robot - _ = robot.send_action(action) + action = {**arm_action, **base_action} if len(base_action) > 0 else arm_action - # Visualize - log_rerun_data(observation=observation, action=action) + # Send action to robot + _ = robot.send_action(action) - busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + # Visualize + log_rerun_data(observation=observation, action=action) + + precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + + +if __name__ == "__main__": + main() diff --git a/examples/phone_to_so100/evaluate.py b/examples/phone_to_so100/evaluate.py index ff8dbddd2..5a47b8ffa 100644 --- a/examples/phone_to_so100/evaluate.py +++ b/examples/phone_to_so100/evaluate.py @@ -52,125 +52,114 @@ TASK_DESCRIPTION = "My task description" HF_MODEL_ID = "/" HF_DATASET_ID = "/" -# Create the robot configuration & robot -camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem58760434471", - id="my_awesome_follower_arm", - cameras=camera_config, - use_degrees=True, -) -robot = SO100Follower(robot_config) - -# Create policy -policy = ACTPolicy.from_pretrained(HF_MODEL_ID) - -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) - -# Build pipeline to convert EE action to joints action -robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=True, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Build pipeline to convert joints observation to EE observation -robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation]( - steps=[ - ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())) - ], - to_transition=observation_to_transition, - to_output=transition_to_observation, -) - -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_DATASET_ID, - fps=FPS, - features=combine_feature_dicts( - aggregate_pipeline_dataset_features( - pipeline=robot_joints_to_ee_pose_processor, - initial_features=create_initial_features(observation=robot.observation_features), - use_videos=True, - ), - # User for now should be explicit on the feature keys that were used for record - # Alternatively, the user can pass the processor step that has the right features - aggregate_pipeline_dataset_features( - pipeline=make_default_teleop_action_processor(), - initial_features=create_initial_features( - action={ - f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) - for k in ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"] - } - ), - use_videos=True, - ), - ), - robot_type=robot.name, - use_videos=True, - image_writer_threads=4, -) - -# Build Policy Processors -preprocessor, postprocessor = make_pre_post_processors( - policy_cfg=policy, - pretrained_path=HF_MODEL_ID, - dataset_stats=dataset.meta.stats, - # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. - preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, -) - -# Connect the robot -robot.connect() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="phone_so100_evaluate") - -if not robot.is_connected: - raise ValueError("Robot is not connected!") - -print("Starting evaluate loop...") -episode_idx = 0 -for episode_idx in range(NUM_EPISODES): - log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") - - # Main record loop - record_loop( - robot=robot, - events=events, - fps=FPS, - policy=policy, - preprocessor=preprocessor, # Pass the pre and post policy processors - postprocessor=postprocessor, - dataset=dataset, - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=make_default_teleop_action_processor(), - robot_action_processor=robot_ee_to_joints_processor, - robot_observation_processor=robot_joints_to_ee_pose_processor, +def main(): + # Create the robot configuration & robot + camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem58760434471", + id="my_awesome_follower_arm", + cameras=camera_config, + use_degrees=True, ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]): - log_say("Reset the environment") + robot = SO100Follower(robot_config) + + # Create policy + policy = ACTPolicy.from_pretrained(HF_MODEL_ID) + + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), + ) + + # Build pipeline to convert EE action to joints action + robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=True, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Build pipeline to convert joints observation to EE observation + robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()) + ) + ], + to_transition=observation_to_transition, + to_output=transition_to_observation, + ) + + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_DATASET_ID, + fps=FPS, + features=combine_feature_dicts( + aggregate_pipeline_dataset_features( + pipeline=robot_joints_to_ee_pose_processor, + initial_features=create_initial_features(observation=robot.observation_features), + use_videos=True, + ), + # User for now should be explicit on the feature keys that were used for record + # Alternatively, the user can pass the processor step that has the right features + aggregate_pipeline_dataset_features( + pipeline=make_default_teleop_action_processor(), + initial_features=create_initial_features( + action={ + f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) + for k in ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"] + } + ), + use_videos=True, + ), + ), + robot_type=robot.name, + use_videos=True, + image_writer_threads=4, + ) + + # Build Policy Processors + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=policy, + pretrained_path=HF_MODEL_ID, + dataset_stats=dataset.meta.stats, + # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. + preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, + ) + + # Connect the robot + robot.connect() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="phone_so100_evaluate") + + if not robot.is_connected: + raise ValueError("Robot is not connected!") + + print("Starting evaluate loop...") + episode_idx = 0 + for episode_idx in range(NUM_EPISODES): + log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") + + # Main record loop record_loop( robot=robot, events=events, fps=FPS, + policy=policy, + preprocessor=preprocessor, # Pass the pre and post policy processors + postprocessor=postprocessor, + dataset=dataset, control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, @@ -179,21 +168,40 @@ for episode_idx in range(NUM_EPISODES): robot_observation_processor=robot_joints_to_ee_pose_processor, ) - if events["rerecord_episode"]: - log_say("Re-record episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]): + log_say("Reset the environment") + record_loop( + robot=robot, + events=events, + fps=FPS, + control_time_s=EPISODE_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=make_default_teleop_action_processor(), + robot_action_processor=robot_ee_to_joints_processor, + robot_observation_processor=robot_joints_to_ee_pose_processor, + ) - # Save episode - dataset.save_episode() - episode_idx += 1 + if events["rerecord_episode"]: + log_say("Re-record episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -robot.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + episode_idx += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + robot.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 880f9c9b4..e563d8eb3 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -50,133 +50,122 @@ RESET_TIME_SEC = 30 TASK_DESCRIPTION = "My task description" HF_REPO_ID = "/" -# Create the robot and teleoperator configurations -camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", - id="my_awesome_follower_arm", - cameras=camera_config, - use_degrees=True, -) -teleop_config = PhoneConfig(phone_os=PhoneOS.IOS) # or PhoneOS.ANDROID -# Initialize the robot and teleoperator -robot = SO100Follower(robot_config) -phone = Phone(teleop_config) +def main(): + # Create the robot and teleoperator configurations + camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", + id="my_awesome_follower_arm", + cameras=camera_config, + use_degrees=True, + ) + teleop_config = PhoneConfig(phone_os=PhoneOS.IOS) # or PhoneOS.ANDROID -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) + # Initialize the robot and teleoperator + robot = SO100Follower(robot_config) + phone = Phone(teleop_config) -# Build pipeline to convert phone action to EE action -phone_to_robot_ee_pose_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - MapPhoneActionToRobotAction(platform=teleop_config.phone_os), - EEReferenceAndDelta( - kinematics=kinematics_solver, - end_effector_step_sizes={"x": 0.5, "y": 0.5, "z": 0.5}, - motor_names=list(robot.bus.motors.keys()), - use_latched_reference=True, - ), - EEBoundsAndSafety( - end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, - max_ee_step_m=0.20, - ), - GripperVelocityToJoint(speed_factor=20.0), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Build pipeline to convert EE action to joints action -robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=True, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Build pipeline to convert joint observation to EE observation -robot_joints_to_ee_pose = RobotProcessorPipeline[RobotObservation, RobotObservation]( - steps=[ - ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())) - ], - to_transition=observation_to_transition, - to_output=transition_to_observation, -) - -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_REPO_ID, - fps=FPS, - features=combine_feature_dicts( - # Run the feature contract of the pipelines - # This tells you how the features would look like after the pipeline steps - aggregate_pipeline_dataset_features( - pipeline=phone_to_robot_ee_pose_processor, - initial_features=create_initial_features(action=phone.action_features), - use_videos=True, - ), - aggregate_pipeline_dataset_features( - pipeline=robot_joints_to_ee_pose, - initial_features=create_initial_features(observation=robot.observation_features), - use_videos=True, - ), - ), - robot_type=robot.name, - use_videos=True, - image_writer_threads=4, -) - -# Connect the robot and teleoperator -robot.connect() -phone.connect() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="phone_so100_record") - -if not robot.is_connected or not phone.is_connected: - raise ValueError("Robot or teleop is not connected!") - - -print("Starting record loop. Move your phone to teleoperate the robot...") -episode_idx = 0 -while episode_idx < NUM_EPISODES and not events["stop_recording"]: - log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") - - # Main record loop - record_loop( - robot=robot, - events=events, - fps=FPS, - teleop=phone, - dataset=dataset, - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=phone_to_robot_ee_pose_processor, - robot_action_processor=robot_ee_to_joints_processor, - robot_observation_processor=robot_joints_to_ee_pose, + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]): - log_say("Reset the environment") + # Build pipeline to convert phone action to EE action + phone_to_robot_ee_pose_processor = RobotProcessorPipeline[ + tuple[RobotAction, RobotObservation], RobotAction + ]( + steps=[ + MapPhoneActionToRobotAction(platform=teleop_config.phone_os), + EEReferenceAndDelta( + kinematics=kinematics_solver, + end_effector_step_sizes={"x": 0.5, "y": 0.5, "z": 0.5}, + motor_names=list(robot.bus.motors.keys()), + use_latched_reference=True, + ), + EEBoundsAndSafety( + end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, + max_ee_step_m=0.20, + ), + GripperVelocityToJoint(speed_factor=20.0), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Build pipeline to convert EE action to joints action + robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=True, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Build pipeline to convert joint observation to EE observation + robot_joints_to_ee_pose = RobotProcessorPipeline[RobotObservation, RobotObservation]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()) + ) + ], + to_transition=observation_to_transition, + to_output=transition_to_observation, + ) + + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_REPO_ID, + fps=FPS, + features=combine_feature_dicts( + # Run the feature contract of the pipelines + # This tells you how the features would look like after the pipeline steps + aggregate_pipeline_dataset_features( + pipeline=phone_to_robot_ee_pose_processor, + initial_features=create_initial_features(action=phone.action_features), + use_videos=True, + ), + aggregate_pipeline_dataset_features( + pipeline=robot_joints_to_ee_pose, + initial_features=create_initial_features(observation=robot.observation_features), + use_videos=True, + ), + ), + robot_type=robot.name, + use_videos=True, + image_writer_threads=4, + ) + + # Connect the robot and teleoperator + robot.connect() + phone.connect() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="phone_so100_record") + + if not robot.is_connected or not phone.is_connected: + raise ValueError("Robot or teleop is not connected!") + + print("Starting record loop. Move your phone to teleoperate the robot...") + episode_idx = 0 + while episode_idx < NUM_EPISODES and not events["stop_recording"]: + log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") + + # Main record loop record_loop( robot=robot, events=events, fps=FPS, teleop=phone, - control_time_s=RESET_TIME_SEC, + dataset=dataset, + control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, teleop_action_processor=phone_to_robot_ee_pose_processor, @@ -184,22 +173,42 @@ while episode_idx < NUM_EPISODES and not events["stop_recording"]: robot_observation_processor=robot_joints_to_ee_pose, ) - if events["rerecord_episode"]: - log_say("Re-recording episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]): + log_say("Reset the environment") + record_loop( + robot=robot, + events=events, + fps=FPS, + teleop=phone, + control_time_s=RESET_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=phone_to_robot_ee_pose_processor, + robot_action_processor=robot_ee_to_joints_processor, + robot_observation_processor=robot_joints_to_ee_pose, + ) - # Save episode - dataset.save_episode() - episode_idx += 1 + if events["rerecord_episode"]: + log_say("Re-recording episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -robot.disconnect() -phone.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + episode_idx += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + robot.disconnect() + phone.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/phone_to_so100/replay.py b/examples/phone_to_so100/replay.py index f1181143c..a7b18a53c 100644 --- a/examples/phone_to_so100/replay.py +++ b/examples/phone_to_so100/replay.py @@ -29,72 +29,78 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import ( ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.utils.constants import ACTION -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import log_say EPISODE_IDX = 0 HF_REPO_ID = "/" -# Initialize the robot config -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True -) -# Initialize the robot -robot = SO100Follower(robot_config) +def main(): + # Initialize the robot config + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True + ) -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) + # Initialize the robot + robot = SO100Follower(robot_config) -# Build pipeline to convert EE action to joints action -robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=False, # Because replay is open loop - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), + ) -# Fetch the dataset to replay -dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX]) -# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 -episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) -actions = episode_frames.select_columns(ACTION) + # Build pipeline to convert EE action to joints action + robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=False, # Because replay is open loop + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) -# Connect to the robot -robot.connect() + # Fetch the dataset to replay + dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX]) + # Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 + episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) + actions = episode_frames.select_columns(ACTION) -if not robot.is_connected: - raise ValueError("Robot is not connected!") + # Connect to the robot + robot.connect() -print("Starting replay loop...") -log_say(f"Replaying episode {EPISODE_IDX}") -for idx in range(len(episode_frames)): - t0 = time.perf_counter() + if not robot.is_connected: + raise ValueError("Robot is not connected!") - # Get recorded action from dataset - ee_action = { - name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) - } + print("Starting replay loop...") + log_say(f"Replaying episode {EPISODE_IDX}") + for idx in range(len(episode_frames)): + t0 = time.perf_counter() - # Get robot observation - robot_obs = robot.get_observation() + # Get recorded action from dataset + ee_action = { + name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) + } - # Dataset EE -> robot joints - joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) + # Get robot observation + robot_obs = robot.get_observation() - # Send action to robot - _ = robot.send_action(joint_action) + # Dataset EE -> robot joints + joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) - busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0)) + # Send action to robot + _ = robot.send_action(joint_action) -# Clean up -robot.disconnect() + precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0)) + + # Clean up + robot.disconnect() + + +if __name__ == "__main__": + main() diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py index 783dce242..2ac8b3cce 100644 --- a/examples/phone_to_so100/teleoperate.py +++ b/examples/phone_to_so100/teleoperate.py @@ -32,82 +32,90 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction from lerobot.teleoperators.phone.teleop_phone import Phone -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.visualization_utils import init_rerun, log_rerun_data FPS = 30 -# Initialize the robot and teleoperator -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True -) -teleop_config = PhoneConfig(phone_os=PhoneOS.IOS) # or PhoneOS.ANDROID -# Initialize the robot and teleoperator -robot = SO100Follower(robot_config) -teleop_device = Phone(teleop_config) +def main(): + # Initialize the robot and teleoperator + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True + ) + teleop_config = PhoneConfig(phone_os=PhoneOS.IOS) # or PhoneOS.ANDROID -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) + # Initialize the robot and teleoperator + robot = SO100Follower(robot_config) + teleop_device = Phone(teleop_config) -# Build pipeline to convert phone action to ee pose action to joint action -phone_to_robot_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - MapPhoneActionToRobotAction(platform=teleop_config.phone_os), - EEReferenceAndDelta( - kinematics=kinematics_solver, - end_effector_step_sizes={"x": 0.5, "y": 0.5, "z": 0.5}, - motor_names=list(robot.bus.motors.keys()), - use_latched_reference=True, - ), - EEBoundsAndSafety( - end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, - max_ee_step_m=0.10, - ), - GripperVelocityToJoint( - speed_factor=20.0, - ), - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=True, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), + ) -# Connect to the robot and teleoperator -robot.connect() -teleop_device.connect() + # Build pipeline to convert phone action to ee pose action to joint action + phone_to_robot_joints_processor = RobotProcessorPipeline[ + tuple[RobotAction, RobotObservation], RobotAction + ]( + steps=[ + MapPhoneActionToRobotAction(platform=teleop_config.phone_os), + EEReferenceAndDelta( + kinematics=kinematics_solver, + end_effector_step_sizes={"x": 0.5, "y": 0.5, "z": 0.5}, + motor_names=list(robot.bus.motors.keys()), + use_latched_reference=True, + ), + EEBoundsAndSafety( + end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, + max_ee_step_m=0.10, + ), + GripperVelocityToJoint( + speed_factor=20.0, + ), + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=True, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) -# Init rerun viewer -init_rerun(session_name="phone_so100_teleop") + # Connect to the robot and teleoperator + robot.connect() + teleop_device.connect() -if not robot.is_connected or not teleop_device.is_connected: - raise ValueError("Robot or teleop is not connected!") + # Init rerun viewer + init_rerun(session_name="phone_so100_teleop") -print("Starting teleop loop. Move your phone to teleoperate the robot...") -while True: - t0 = time.perf_counter() + if not robot.is_connected or not teleop_device.is_connected: + raise ValueError("Robot or teleop is not connected!") - # Get robot observation - robot_obs = robot.get_observation() + print("Starting teleop loop. Move your phone to teleoperate the robot...") + while True: + t0 = time.perf_counter() - # Get teleop action - phone_obs = teleop_device.get_action() + # Get robot observation + robot_obs = robot.get_observation() - # Phone -> EE pose -> Joints transition - joint_action = phone_to_robot_joints_processor((phone_obs, robot_obs)) + # Get teleop action + phone_obs = teleop_device.get_action() - # Send action to robot - _ = robot.send_action(joint_action) + # Phone -> EE pose -> Joints transition + joint_action = phone_to_robot_joints_processor((phone_obs, robot_obs)) - # Visualize - log_rerun_data(observation=phone_obs, action=joint_action) + # Send action to robot + _ = robot.send_action(joint_action) - busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + # Visualize + log_rerun_data(observation=phone_obs, action=joint_action) + + precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + + +if __name__ == "__main__": + main() diff --git a/examples/so100_to_so100_EE/evaluate.py b/examples/so100_to_so100_EE/evaluate.py index 60489b3cf..90973d373 100644 --- a/examples/so100_to_so100_EE/evaluate.py +++ b/examples/so100_to_so100_EE/evaluate.py @@ -52,126 +52,114 @@ TASK_DESCRIPTION = "My task description" HF_MODEL_ID = "/" HF_DATASET_ID = "/" -# Create the robot configuration & robot -camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", - id="my_awesome_follower_arm", - cameras=camera_config, - use_degrees=True, -) -robot = SO100Follower(robot_config) - -# Create policy -policy = ACTPolicy.from_pretrained(HF_MODEL_ID) - -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) - -# Build pipeline to convert EE action to joints action -robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=True, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Build pipeline to convert joints observation to EE observation -robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation]( - steps=[ - ForwardKinematicsJointsToEE(kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys())) - ], - to_transition=observation_to_transition, - to_output=transition_to_observation, -) - - -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_DATASET_ID, - fps=FPS, - features=combine_feature_dicts( - aggregate_pipeline_dataset_features( - pipeline=robot_joints_to_ee_pose_processor, - initial_features=create_initial_features(observation=robot.observation_features), - use_videos=True, - ), - # User for now should be explicit on the feature keys that were used for record - # Alternatively, the user can pass the processor step that has the right features - aggregate_pipeline_dataset_features( - pipeline=make_default_teleop_action_processor(), - initial_features=create_initial_features( - action={ - f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) - for k in ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"] - } - ), - use_videos=True, - ), - ), - robot_type=robot.name, - use_videos=True, - image_writer_threads=4, -) - -# Build Policy Processors -preprocessor, postprocessor = make_pre_post_processors( - policy_cfg=policy, - pretrained_path=HF_MODEL_ID, - dataset_stats=dataset.meta.stats, - # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. - preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, -) - -# Connect the robot and teleoperator -robot.connect() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="so100_so100_evaluate") - -if not robot.is_connected: - raise ValueError("Robot is not connected!") - -print("Starting evaluate loop...") -episode_idx = 0 -for episode_idx in range(NUM_EPISODES): - log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") - - # Main record loop - record_loop( - robot=robot, - events=events, - fps=FPS, - policy=policy, - preprocessor=preprocessor, # Pass the pre and post policy processors - postprocessor=postprocessor, - dataset=dataset, - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=make_default_teleop_action_processor(), - robot_action_processor=robot_ee_to_joints_processor, - robot_observation_processor=robot_joints_to_ee_pose_processor, +def main(): + # Create the robot configuration & robot + camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", + id="my_awesome_follower_arm", + cameras=camera_config, + use_degrees=True, ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]): - log_say("Reset the environment") + robot = SO100Follower(robot_config) + + # Create policy + policy = ACTPolicy.from_pretrained(HF_MODEL_ID) + + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), + ) + + # Build pipeline to convert EE action to joints action + robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=True, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Build pipeline to convert joints observation to EE observation + robot_joints_to_ee_pose_processor = RobotProcessorPipeline[RobotObservation, RobotObservation]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=kinematics_solver, motor_names=list(robot.bus.motors.keys()) + ) + ], + to_transition=observation_to_transition, + to_output=transition_to_observation, + ) + + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_DATASET_ID, + fps=FPS, + features=combine_feature_dicts( + aggregate_pipeline_dataset_features( + pipeline=robot_joints_to_ee_pose_processor, + initial_features=create_initial_features(observation=robot.observation_features), + use_videos=True, + ), + # User for now should be explicit on the feature keys that were used for record + # Alternatively, the user can pass the processor step that has the right features + aggregate_pipeline_dataset_features( + pipeline=make_default_teleop_action_processor(), + initial_features=create_initial_features( + action={ + f"ee.{k}": PolicyFeature(type=FeatureType.ACTION, shape=(1,)) + for k in ["x", "y", "z", "wx", "wy", "wz", "gripper_pos"] + } + ), + use_videos=True, + ), + ), + robot_type=robot.name, + use_videos=True, + image_writer_threads=4, + ) + + # Build Policy Processors + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=policy, + pretrained_path=HF_MODEL_ID, + dataset_stats=dataset.meta.stats, + # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility. + preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}}, + ) + + # Connect the robot and teleoperator + robot.connect() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="so100_so100_evaluate") + + if not robot.is_connected: + raise ValueError("Robot is not connected!") + + print("Starting evaluate loop...") + episode_idx = 0 + for episode_idx in range(NUM_EPISODES): + log_say(f"Running inference, recording eval episode {episode_idx + 1} of {NUM_EPISODES}") + + # Main record loop record_loop( robot=robot, events=events, fps=FPS, + policy=policy, + preprocessor=preprocessor, # Pass the pre and post policy processors + postprocessor=postprocessor, + dataset=dataset, control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, @@ -180,21 +168,40 @@ for episode_idx in range(NUM_EPISODES): robot_observation_processor=robot_joints_to_ee_pose_processor, ) - if events["rerecord_episode"]: - log_say("Re-record episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and ((episode_idx < NUM_EPISODES - 1) or events["rerecord_episode"]): + log_say("Reset the environment") + record_loop( + robot=robot, + events=events, + fps=FPS, + control_time_s=EPISODE_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=make_default_teleop_action_processor(), + robot_action_processor=robot_ee_to_joints_processor, + robot_observation_processor=robot_joints_to_ee_pose_processor, + ) - # Save episode - dataset.save_episode() - episode_idx += 1 + if events["rerecord_episode"]: + log_say("Re-record episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -robot.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + episode_idx += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + robot.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/so100_to_so100_EE/record.py b/examples/so100_to_so100_EE/record.py index 5ff1c286f..6bfdfe32d 100644 --- a/examples/so100_to_so100_EE/record.py +++ b/examples/so100_to_so100_EE/record.py @@ -48,134 +48,122 @@ RESET_TIME_SEC = 30 TASK_DESCRIPTION = "My task description" HF_REPO_ID = "/" -# Create the robot and teleoperator configurations -camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} -follower_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", cameras=camera_config, use_degrees=True -) -leader_config = SO100LeaderConfig(port="/dev/tty.usbmodem5A460819811", id="my_awesome_leader_arm") -# Initialize the robot and teleoperator -follower = SO100Follower(follower_config) -leader = SO100Leader(leader_config) +def main(): + # Create the robot and teleoperator configurations + camera_config = {"front": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=FPS)} + follower_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", + id="my_awesome_follower_arm", + cameras=camera_config, + use_degrees=True, + ) + leader_config = SO100LeaderConfig(port="/dev/tty.usbmodem5A460819811", id="my_awesome_leader_arm") -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -follower_kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(follower.bus.motors.keys()), -) + # Initialize the robot and teleoperator + follower = SO100Follower(follower_config) + leader = SO100Leader(leader_config) -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -leader_kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(leader.bus.motors.keys()), -) - -# Build pipeline to convert follower joints to EE observation -follower_joints_to_ee = RobotProcessorPipeline[RobotObservation, RobotObservation]( - steps=[ - ForwardKinematicsJointsToEE( - kinematics=follower_kinematics_solver, motor_names=list(follower.bus.motors.keys()) - ), - ], - to_transition=observation_to_transition, - to_output=transition_to_observation, -) - -# Build pipeline to convert leader joints to EE action -leader_joints_to_ee = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - ForwardKinematicsJointsToEE( - kinematics=leader_kinematics_solver, motor_names=list(leader.bus.motors.keys()) - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Build pipeline to convert EE action to follower joints -ee_to_follower_joints = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - [ - EEBoundsAndSafety( - end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, - max_ee_step_m=0.10, - ), - InverseKinematicsEEToJoints( - kinematics=follower_kinematics_solver, - motor_names=list(follower.bus.motors.keys()), - initial_guess_current_joints=True, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) - -# Create the dataset -dataset = LeRobotDataset.create( - repo_id=HF_REPO_ID, - fps=FPS, - features=combine_feature_dicts( - # Run the feature contract of the pipelines - # This tells you how the features would look like after the pipeline steps - aggregate_pipeline_dataset_features( - pipeline=leader_joints_to_ee, - initial_features=create_initial_features(action=leader.action_features), - use_videos=True, - ), - aggregate_pipeline_dataset_features( - pipeline=follower_joints_to_ee, - initial_features=create_initial_features(observation=follower.observation_features), - use_videos=True, - ), - ), - robot_type=follower.name, - use_videos=True, - image_writer_threads=4, -) - - -# Connect the robot and teleoperator -leader.connect() -follower.connect() - -# Initialize the keyboard listener and rerun visualization -listener, events = init_keyboard_listener() -init_rerun(session_name="recording_phone") - -if not leader.is_connected or not follower.is_connected: - raise ValueError("Robot or teleop is not connected!") - -print("Starting record loop...") -episode_idx = 0 -while episode_idx < NUM_EPISODES and not events["stop_recording"]: - log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") - - # Main record loop - record_loop( - robot=follower, - events=events, - fps=FPS, - teleop=leader, - dataset=dataset, - control_time_s=EPISODE_TIME_SEC, - single_task=TASK_DESCRIPTION, - display_data=True, - teleop_action_processor=leader_joints_to_ee, - robot_action_processor=ee_to_follower_joints, - robot_observation_processor=follower_joints_to_ee, + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + follower_kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(follower.bus.motors.keys()), ) - # Reset the environment if not stopping or re-recording - if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]): - log_say("Reset the environment") + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + leader_kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(leader.bus.motors.keys()), + ) + + # Build pipeline to convert follower joints to EE observation + follower_joints_to_ee = RobotProcessorPipeline[RobotObservation, RobotObservation]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=follower_kinematics_solver, motor_names=list(follower.bus.motors.keys()) + ), + ], + to_transition=observation_to_transition, + to_output=transition_to_observation, + ) + + # Build pipeline to convert leader joints to EE action + leader_joints_to_ee = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=leader_kinematics_solver, motor_names=list(leader.bus.motors.keys()) + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Build pipeline to convert EE action to follower joints + ee_to_follower_joints = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + [ + EEBoundsAndSafety( + end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, + max_ee_step_m=0.10, + ), + InverseKinematicsEEToJoints( + kinematics=follower_kinematics_solver, + motor_names=list(follower.bus.motors.keys()), + initial_guess_current_joints=True, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) + + # Create the dataset + dataset = LeRobotDataset.create( + repo_id=HF_REPO_ID, + fps=FPS, + features=combine_feature_dicts( + # Run the feature contract of the pipelines + # This tells you how the features would look like after the pipeline steps + aggregate_pipeline_dataset_features( + pipeline=leader_joints_to_ee, + initial_features=create_initial_features(action=leader.action_features), + use_videos=True, + ), + aggregate_pipeline_dataset_features( + pipeline=follower_joints_to_ee, + initial_features=create_initial_features(observation=follower.observation_features), + use_videos=True, + ), + ), + robot_type=follower.name, + use_videos=True, + image_writer_threads=4, + ) + + # Connect the robot and teleoperator + leader.connect() + follower.connect() + + # Initialize the keyboard listener and rerun visualization + listener, events = init_keyboard_listener() + init_rerun(session_name="recording_phone") + + if not leader.is_connected or not follower.is_connected: + raise ValueError("Robot or teleop is not connected!") + + print("Starting record loop...") + episode_idx = 0 + while episode_idx < NUM_EPISODES and not events["stop_recording"]: + log_say(f"Recording episode {episode_idx + 1} of {NUM_EPISODES}") + + # Main record loop record_loop( robot=follower, events=events, fps=FPS, teleop=leader, - control_time_s=RESET_TIME_SEC, + dataset=dataset, + control_time_s=EPISODE_TIME_SEC, single_task=TASK_DESCRIPTION, display_data=True, teleop_action_processor=leader_joints_to_ee, @@ -183,22 +171,42 @@ while episode_idx < NUM_EPISODES and not events["stop_recording"]: robot_observation_processor=follower_joints_to_ee, ) - if events["rerecord_episode"]: - log_say("Re-recording episode") - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Reset the environment if not stopping or re-recording + if not events["stop_recording"] and (episode_idx < NUM_EPISODES - 1 or events["rerecord_episode"]): + log_say("Reset the environment") + record_loop( + robot=follower, + events=events, + fps=FPS, + teleop=leader, + control_time_s=RESET_TIME_SEC, + single_task=TASK_DESCRIPTION, + display_data=True, + teleop_action_processor=leader_joints_to_ee, + robot_action_processor=ee_to_follower_joints, + robot_observation_processor=follower_joints_to_ee, + ) - # Save episode - dataset.save_episode() - episode_idx += 1 + if events["rerecord_episode"]: + log_say("Re-recording episode") + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue -# Clean up -log_say("Stop recording") -leader.disconnect() -follower.disconnect() -listener.stop() + # Save episode + dataset.save_episode() + episode_idx += 1 -dataset.finalize() -dataset.push_to_hub() + # Clean up + log_say("Stop recording") + leader.disconnect() + follower.disconnect() + listener.stop() + + dataset.finalize() + dataset.push_to_hub() + + +if __name__ == "__main__": + main() diff --git a/examples/so100_to_so100_EE/replay.py b/examples/so100_to_so100_EE/replay.py index ea78d4e66..9951b139d 100644 --- a/examples/so100_to_so100_EE/replay.py +++ b/examples/so100_to_so100_EE/replay.py @@ -30,72 +30,78 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import ( ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.utils.constants import ACTION -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import log_say EPISODE_IDX = 0 HF_REPO_ID = "/" -# Initialize the robot config -robot_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True -) -# Initialize the robot -robot = SO100Follower(robot_config) +def main(): + # Initialize the robot config + robot_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True + ) -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(robot.bus.motors.keys()), -) + # Initialize the robot + robot = SO100Follower(robot_config) -# Build pipeline to convert EE action to joints action -robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - steps=[ - InverseKinematicsEEToJoints( - kinematics=kinematics_solver, - motor_names=list(robot.bus.motors.keys()), - initial_guess_current_joints=False, # Because replay is open loop - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(robot.bus.motors.keys()), + ) -# Fetch the dataset to replay -dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX]) -# Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 -episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) -actions = episode_frames.select_columns(ACTION) + # Build pipeline to convert EE action to joints action + robot_ee_to_joints_processor = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + steps=[ + InverseKinematicsEEToJoints( + kinematics=kinematics_solver, + motor_names=list(robot.bus.motors.keys()), + initial_guess_current_joints=False, # Because replay is open loop + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) -# Connect to the robot -robot.connect() + # Fetch the dataset to replay + dataset = LeRobotDataset(HF_REPO_ID, episodes=[EPISODE_IDX]) + # Filter dataset to only include frames from the specified episode since episodes are chunked in dataset V3.0 + episode_frames = dataset.hf_dataset.filter(lambda x: x["episode_index"] == EPISODE_IDX) + actions = episode_frames.select_columns(ACTION) -if not robot.is_connected: - raise ValueError("Robot is not connected!") + # Connect to the robot + robot.connect() -print("Starting replay loop...") -log_say(f"Replaying episode {EPISODE_IDX}") -for idx in range(len(episode_frames)): - t0 = time.perf_counter() + if not robot.is_connected: + raise ValueError("Robot is not connected!") - # Get recorded action from dataset - ee_action = { - name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) - } + print("Starting replay loop...") + log_say(f"Replaying episode {EPISODE_IDX}") + for idx in range(len(episode_frames)): + t0 = time.perf_counter() - # Get robot observation - robot_obs = robot.get_observation() + # Get recorded action from dataset + ee_action = { + name: float(actions[idx][ACTION][i]) for i, name in enumerate(dataset.features[ACTION]["names"]) + } - # Dataset EE -> robot joints - joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) + # Get robot observation + robot_obs = robot.get_observation() - # Send action to robot - _ = robot.send_action(joint_action) + # Dataset EE -> robot joints + joint_action = robot_ee_to_joints_processor((ee_action, robot_obs)) - busy_wait(1.0 / dataset.fps - (time.perf_counter() - t0)) + # Send action to robot + _ = robot.send_action(joint_action) -# Clean up -robot.disconnect() + precise_sleep(1.0 / dataset.fps - (time.perf_counter() - t0)) + + # Clean up + robot.disconnect() + + +if __name__ == "__main__": + main() diff --git a/examples/so100_to_so100_EE/teleoperate.py b/examples/so100_to_so100_EE/teleoperate.py index b1a8c8c27..21299103b 100644 --- a/examples/so100_to_so100_EE/teleoperate.py +++ b/examples/so100_to_so100_EE/teleoperate.py @@ -32,90 +32,96 @@ from lerobot.robots.so100_follower.robot_kinematic_processor import ( from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig from lerobot.teleoperators.so100_leader.so100_leader import SO100Leader -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.visualization_utils import init_rerun, log_rerun_data FPS = 30 -# Initialize the robot and teleoperator config -follower_config = SO100FollowerConfig( - port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True -) -leader_config = SO100LeaderConfig(port="/dev/tty.usbmodem5A460819811", id="my_awesome_leader_arm") -# Initialize the robot and teleoperator -follower = SO100Follower(follower_config) -leader = SO100Leader(leader_config) +def main(): + # Initialize the robot and teleoperator config + follower_config = SO100FollowerConfig( + port="/dev/tty.usbmodem5A460814411", id="my_awesome_follower_arm", use_degrees=True + ) + leader_config = SO100LeaderConfig(port="/dev/tty.usbmodem5A460819811", id="my_awesome_leader_arm") -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -follower_kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(follower.bus.motors.keys()), -) + # Initialize the robot and teleoperator + follower = SO100Follower(follower_config) + leader = SO100Leader(leader_config) -# NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf -leader_kinematics_solver = RobotKinematics( - urdf_path="./SO101/so101_new_calib.urdf", - target_frame_name="gripper_frame_link", - joint_names=list(leader.bus.motors.keys()), -) + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + follower_kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(follower.bus.motors.keys()), + ) -# Build pipeline to convert teleop joints to EE action -leader_to_ee = RobotProcessorPipeline[RobotAction, RobotAction]( - steps=[ - ForwardKinematicsJointsToEE( - kinematics=leader_kinematics_solver, motor_names=list(leader.bus.motors.keys()) - ), - ], - to_transition=robot_action_to_transition, - to_output=transition_to_robot_action, -) + # NOTE: It is highly recommended to use the urdf in the SO-ARM100 repo: https://github.com/TheRobotStudio/SO-ARM100/blob/main/Simulation/SO101/so101_new_calib.urdf + leader_kinematics_solver = RobotKinematics( + urdf_path="./SO101/so101_new_calib.urdf", + target_frame_name="gripper_frame_link", + joint_names=list(leader.bus.motors.keys()), + ) -# build pipeline to convert EE action to robot joints -ee_to_follower_joints = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( - [ - EEBoundsAndSafety( - end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, - max_ee_step_m=0.10, - ), - InverseKinematicsEEToJoints( - kinematics=follower_kinematics_solver, - motor_names=list(follower.bus.motors.keys()), - initial_guess_current_joints=False, - ), - ], - to_transition=robot_action_observation_to_transition, - to_output=transition_to_robot_action, -) + # Build pipeline to convert teleop joints to EE action + leader_to_ee = RobotProcessorPipeline[RobotAction, RobotAction]( + steps=[ + ForwardKinematicsJointsToEE( + kinematics=leader_kinematics_solver, motor_names=list(leader.bus.motors.keys()) + ), + ], + to_transition=robot_action_to_transition, + to_output=transition_to_robot_action, + ) -# Connect to the robot and teleoperator -follower.connect() -leader.connect() + # build pipeline to convert EE action to robot joints + ee_to_follower_joints = RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction]( + [ + EEBoundsAndSafety( + end_effector_bounds={"min": [-1.0, -1.0, -1.0], "max": [1.0, 1.0, 1.0]}, + max_ee_step_m=0.10, + ), + InverseKinematicsEEToJoints( + kinematics=follower_kinematics_solver, + motor_names=list(follower.bus.motors.keys()), + initial_guess_current_joints=False, + ), + ], + to_transition=robot_action_observation_to_transition, + to_output=transition_to_robot_action, + ) -# Init rerun viewer -init_rerun(session_name="so100_so100_EE_teleop") + # Connect to the robot and teleoperator + follower.connect() + leader.connect() -print("Starting teleop loop...") -while True: - t0 = time.perf_counter() + # Init rerun viewer + init_rerun(session_name="so100_so100_EE_teleop") - # Get robot observation - robot_obs = follower.get_observation() + print("Starting teleop loop...") + while True: + t0 = time.perf_counter() - # Get teleop observation - leader_joints_obs = leader.get_action() + # Get robot observation + robot_obs = follower.get_observation() - # teleop joints -> teleop EE action - leader_ee_act = leader_to_ee(leader_joints_obs) + # Get teleop observation + leader_joints_obs = leader.get_action() - # teleop EE -> robot joints - follower_joints_act = ee_to_follower_joints((leader_ee_act, robot_obs)) + # teleop joints -> teleop EE action + leader_ee_act = leader_to_ee(leader_joints_obs) - # Send action to robot - _ = follower.send_action(follower_joints_act) + # teleop EE -> robot joints + follower_joints_act = ee_to_follower_joints((leader_ee_act, robot_obs)) - # Visualize - log_rerun_data(observation=leader_ee_act, action=follower_joints_act) + # Send action to robot + _ = follower.send_action(follower_joints_act) - busy_wait(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + # Visualize + log_rerun_data(observation=leader_ee_act, action=follower_joints_act) + + precise_sleep(max(1.0 / FPS - (time.perf_counter() - t0), 0.0)) + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/act/act_training_example.py b/examples/tutorial/act/act_training_example.py index 6db495ca2..fe70f3023 100644 --- a/examples/tutorial/act/act_training_example.py +++ b/examples/tutorial/act/act_training_example.py @@ -19,80 +19,86 @@ def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[flo return [i / fps for i in delta_indices] -output_directory = Path("outputs/robot_learning_tutorial/act") -output_directory.mkdir(parents=True, exist_ok=True) +def main(): + output_directory = Path("outputs/robot_learning_tutorial/act") + output_directory.mkdir(parents=True, exist_ok=True) -# Select your device -device = torch.device("mps") # or "cuda" or "cpu" + # Select your device + device = torch.device("mps") # or "cuda" or "cpu" -dataset_id = "lerobot/svla_so101_pickplace" + dataset_id = "lerobot/svla_so101_pickplace" -# This specifies the inputs the model will be expecting and the outputs it will produce -dataset_metadata = LeRobotDatasetMetadata(dataset_id) -features = dataset_to_policy_features(dataset_metadata.features) + # This specifies the inputs the model will be expecting and the outputs it will produce + dataset_metadata = LeRobotDatasetMetadata(dataset_id) + features = dataset_to_policy_features(dataset_metadata.features) -output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION} -input_features = {key: ft for key, ft in features.items() if key not in output_features} + output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION} + input_features = {key: ft for key, ft in features.items() if key not in output_features} -cfg = ACTConfig(input_features=input_features, output_features=output_features) -policy = ACTPolicy(cfg) -preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats) + cfg = ACTConfig(input_features=input_features, output_features=output_features) + policy = ACTPolicy(cfg) + preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats) -policy.train() -policy.to(device) + policy.train() + policy.to(device) -# To perform action chunking, ACT expects a given number of actions as targets -delta_timestamps = { - "action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps), -} + # To perform action chunking, ACT expects a given number of actions as targets + delta_timestamps = { + "action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps), + } -# add image features if they are present -delta_timestamps |= { - k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features -} + # add image features if they are present + delta_timestamps |= { + k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) + for k in cfg.image_features + } -# Instantiate the dataset -dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps) + # Instantiate the dataset + dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps) -# Create the optimizer and dataloader for offline training -optimizer = cfg.get_optimizer_preset().build(policy.parameters()) -batch_size = 32 -dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=batch_size, - shuffle=True, - pin_memory=device.type != "cpu", - drop_last=True, -) + # Create the optimizer and dataloader for offline training + optimizer = cfg.get_optimizer_preset().build(policy.parameters()) + batch_size = 32 + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=device.type != "cpu", + drop_last=True, + ) -# Number of training steps and logging frequency -training_steps = 1 -log_freq = 1 + # Number of training steps and logging frequency + training_steps = 1 + log_freq = 1 -# Run training loop -step = 0 -done = False -while not done: - for batch in dataloader: - batch = preprocessor(batch) - loss, _ = policy.forward(batch) - loss.backward() - optimizer.step() - optimizer.zero_grad() + # Run training loop + step = 0 + done = False + while not done: + for batch in dataloader: + batch = preprocessor(batch) + loss, _ = policy.forward(batch) + loss.backward() + optimizer.step() + optimizer.zero_grad() - if step % log_freq == 0: - print(f"step: {step} loss: {loss.item():.3f}") - step += 1 - if step >= training_steps: - done = True - break + if step % log_freq == 0: + print(f"step: {step} loss: {loss.item():.3f}") + step += 1 + if step >= training_steps: + done = True + break -# Save the policy checkpoint, alongside the pre/post processors -policy.save_pretrained(output_directory) -preprocessor.save_pretrained(output_directory) -postprocessor.save_pretrained(output_directory) + # Save the policy checkpoint, alongside the pre/post processors + policy.save_pretrained(output_directory) + preprocessor.save_pretrained(output_directory) + postprocessor.save_pretrained(output_directory) -# Save all assets to the Hub -policy.push_to_hub("fracapuano/robot_learning_tutorial_act") -preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act") -postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_act") + # Save all assets to the Hub + policy.push_to_hub("/robot_learning_tutorial_act") + preprocessor.push_to_hub("/robot_learning_tutorial_act") + postprocessor.push_to_hub("/robot_learning_tutorial_act") + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/act/act_using_example.py b/examples/tutorial/act/act_using_example.py index 8beef7654..b268e8790 100644 --- a/examples/tutorial/act/act_using_example.py +++ b/examples/tutorial/act/act_using_example.py @@ -8,50 +8,56 @@ from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig from lerobot.robots.so100_follower.so100_follower import SO100Follower -device = torch.device("mps") # or "cuda" or "cpu" -model_id = "fracapuano/robot_learning_tutorial_act" -model = ACTPolicy.from_pretrained(model_id) - -dataset_id = "lerobot/svla_so101_pickplace" -# This only downloads the metadata for the dataset, ~10s of MB even for large-scale datasets -dataset_metadata = LeRobotDatasetMetadata(dataset_id) -preprocess, postprocess = make_pre_post_processors(model.config, dataset_stats=dataset_metadata.stats) - -# # find ports using lerobot-find-port -follower_port = ... # something like "/dev/tty.usbmodem58760431631" - -# # the robot ids are used the load the right calibration files -follower_id = ... # something like "follower_so100" - MAX_EPISODES = 5 MAX_STEPS_PER_EPISODE = 20 -# Robot and environment configuration -# Camera keys must match the name and resolutions of the ones used for training! -# You can check the camera keys expected by a model in the info.json card on the model card on the Hub -camera_config = { - "side": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), - "up": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), -} -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) -robot = SO100Follower(robot_cfg) -robot.connect() +def main(): + device = torch.device("mps") # or "cuda" or "cpu" + model_id = "/robot_learning_tutorial_act" + model = ACTPolicy.from_pretrained(model_id) -for _ in range(MAX_EPISODES): - for _ in range(MAX_STEPS_PER_EPISODE): - obs = robot.get_observation() - obs_frame = build_inference_frame( - observation=obs, ds_features=dataset_metadata.features, device=device - ) + dataset_id = "lerobot/svla_so101_pickplace" + # This only downloads the metadata for the dataset, ~10s of MB even for large-scale datasets + dataset_metadata = LeRobotDatasetMetadata(dataset_id) + preprocess, postprocess = make_pre_post_processors(model.config, dataset_stats=dataset_metadata.stats) - obs = preprocess(obs_frame) + # # find ports using lerobot-find-port + follower_port = ... # something like "/dev/tty.usbmodem58760431631" - action = model.select_action(obs) - action = postprocess(action) + # # the robot ids are used the load the right calibration files + follower_id = ... # something like "follower_so100" - action = make_robot_action(action, dataset_metadata.features) + # Robot and environment configuration + # Camera keys must match the name and resolutions of the ones used for training! + # You can check the camera keys expected by a model in the info.json card on the model card on the Hub + camera_config = { + "side": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "up": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), + } - robot.send_action(action) + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) + robot = SO100Follower(robot_cfg) + robot.connect() - print("Episode finished! Starting new episode...") + for _ in range(MAX_EPISODES): + for _ in range(MAX_STEPS_PER_EPISODE): + obs = robot.get_observation() + obs_frame = build_inference_frame( + observation=obs, ds_features=dataset_metadata.features, device=device + ) + + obs = preprocess(obs_frame) + + action = model.select_action(obs) + action = postprocess(action) + + action = make_robot_action(action, dataset_metadata.features) + + robot.send_action(action) + + print("Episode finished! Starting new episode...") + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/async-inf/policy_server.py b/examples/tutorial/async-inf/policy_server.py index cd2b1f04c..244205bcf 100644 --- a/examples/tutorial/async-inf/policy_server.py +++ b/examples/tutorial/async-inf/policy_server.py @@ -1,11 +1,17 @@ from lerobot.async_inference.configs import PolicyServerConfig from lerobot.async_inference.policy_server import serve -host = ... # something like "127.0.0.1" if you're exposing to localhost -port = ... # something like 8080 -config = PolicyServerConfig( - host=host, - port=port, -) -serve(config) +def main(): + host = ... # something like "127.0.0.1" if you're exposing to localhost + port = ... # something like 8080 + + config = PolicyServerConfig( + host=host, + port=port, + ) + serve(config) + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/async-inf/robot_client.py b/examples/tutorial/async-inf/robot_client.py index 3e46e5e31..fff7b15b3 100644 --- a/examples/tutorial/async-inf/robot_client.py +++ b/examples/tutorial/async-inf/robot_client.py @@ -6,50 +6,56 @@ from lerobot.async_inference.robot_client import RobotClient from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.robots.so100_follower import SO100FollowerConfig -# these cameras must match the ones expected by the policy - find your cameras with lerobot-find-cameras -# check the config.json on the Hub for the policy you are using to see the expected camera specs -camera_cfg = { - "up": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), - "side": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), -} -# # find ports using lerobot-find-port -follower_port = ... # something like "/dev/tty.usbmodem58760431631" +def main(): + # these cameras must match the ones expected by the policy - find your cameras with lerobot-find-cameras + # check the config.json on the Hub for the policy you are using to see the expected camera specs + camera_cfg = { + "up": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "side": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), + } -# # the robot ids are used the load the right calibration files -follower_id = ... # something like "follower_so100" + # # find ports using lerobot-find-port + follower_port = ... # something like "/dev/tty.usbmodem58760431631" -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_cfg) + # # the robot ids are used the load the right calibration files + follower_id = ... # something like "follower_so100" -server_address = ... # something like "127.0.0.1:8080" if using localhost + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_cfg) -# 3. Create client configuration -client_cfg = RobotClientConfig( - robot=robot_cfg, - server_address=server_address, - policy_device="mps", - policy_type="act", - pretrained_name_or_path="fracapuano/robot_learning_tutorial_act", - chunk_size_threshold=0.5, # g - actions_per_chunk=50, # make sure this is less than the max actions of the policy -) + server_address = ... # something like "127.0.0.1:8080" if using localhost -# 4. Create and start client -client = RobotClient(client_cfg) + # 3. Create client configuration + client_cfg = RobotClientConfig( + robot=robot_cfg, + server_address=server_address, + policy_device="mps", + policy_type="act", + pretrained_name_or_path="/robot_learning_tutorial_act", + chunk_size_threshold=0.5, # g + actions_per_chunk=50, # make sure this is less than the max actions of the policy + ) -# 5. Provide a textual description of the task -task = ... + # 4. Create and start client + client = RobotClient(client_cfg) -if client.start(): - # Start action receiver thread - action_receiver_thread = threading.Thread(target=client.receive_actions, daemon=True) - action_receiver_thread.start() + # 5. Provide a textual description of the task + task = ... - try: - # Run the control loop - client.control_loop(task) - except KeyboardInterrupt: - client.stop() - action_receiver_thread.join() - # (Optionally) plot the action queue size - visualize_action_queue_size(client.action_queue_size) + if client.start(): + # Start action receiver thread + action_receiver_thread = threading.Thread(target=client.receive_actions, daemon=True) + action_receiver_thread.start() + + try: + # Run the control loop + client.control_loop(task) + except KeyboardInterrupt: + client.stop() + action_receiver_thread.join() + # (Optionally) plot the action queue size + visualize_action_queue_size(client.action_queue_size) + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/diffusion/diffusion_training_example.py b/examples/tutorial/diffusion/diffusion_training_example.py index 4c3fac09e..6db081450 100644 --- a/examples/tutorial/diffusion/diffusion_training_example.py +++ b/examples/tutorial/diffusion/diffusion_training_example.py @@ -19,81 +19,87 @@ def make_delta_timestamps(delta_indices: list[int] | None, fps: int) -> list[flo return [i / fps for i in delta_indices] -output_directory = Path("outputs/robot_learning_tutorial/diffusion") -output_directory.mkdir(parents=True, exist_ok=True) +def main(): + output_directory = Path("outputs/robot_learning_tutorial/diffusion") + output_directory.mkdir(parents=True, exist_ok=True) -# Select your device -device = torch.device("mps") # or "cuda" or "cpu" + # Select your device + device = torch.device("mps") # or "cuda" or "cpu" -dataset_id = "lerobot/svla_so101_pickplace" + dataset_id = "lerobot/svla_so101_pickplace" -# This specifies the inputs the model will be expecting and the outputs it will produce -dataset_metadata = LeRobotDatasetMetadata(dataset_id) -features = dataset_to_policy_features(dataset_metadata.features) + # This specifies the inputs the model will be expecting and the outputs it will produce + dataset_metadata = LeRobotDatasetMetadata(dataset_id) + features = dataset_to_policy_features(dataset_metadata.features) -output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION} -input_features = {key: ft for key, ft in features.items() if key not in output_features} + output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION} + input_features = {key: ft for key, ft in features.items() if key not in output_features} -cfg = DiffusionConfig(input_features=input_features, output_features=output_features) -policy = DiffusionPolicy(cfg) -preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats) + cfg = DiffusionConfig(input_features=input_features, output_features=output_features) + policy = DiffusionPolicy(cfg) + preprocessor, postprocessor = make_pre_post_processors(cfg, dataset_stats=dataset_metadata.stats) -policy.train() -policy.to(device) + policy.train() + policy.to(device) -# To perform action chunking, ACT expects a given number of actions as targets -delta_timestamps = { - "observation.state": make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps), - "action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps), -} + # To perform action chunking, ACT expects a given number of actions as targets + delta_timestamps = { + "observation.state": make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps), + "action": make_delta_timestamps(cfg.action_delta_indices, dataset_metadata.fps), + } -# add image features if they are present -delta_timestamps |= { - k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) for k in cfg.image_features -} + # add image features if they are present + delta_timestamps |= { + k: make_delta_timestamps(cfg.observation_delta_indices, dataset_metadata.fps) + for k in cfg.image_features + } -# Instantiate the dataset -dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps) + # Instantiate the dataset + dataset = LeRobotDataset(dataset_id, delta_timestamps=delta_timestamps) -# Create the optimizer and dataloader for offline training -optimizer = cfg.get_optimizer_preset().build(policy.parameters()) -batch_size = 32 -dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=batch_size, - shuffle=True, - pin_memory=device.type != "cpu", - drop_last=True, -) + # Create the optimizer and dataloader for offline training + optimizer = cfg.get_optimizer_preset().build(policy.parameters()) + batch_size = 32 + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=device.type != "cpu", + drop_last=True, + ) -# Number of training steps and logging frequency -training_steps = 1 -log_freq = 1 + # Number of training steps and logging frequency + training_steps = 1 + log_freq = 1 -# Run training loop -step = 0 -done = False -while not done: - for batch in dataloader: - batch = preprocessor(batch) - loss, _ = policy.forward(batch) - loss.backward() - optimizer.step() - optimizer.zero_grad() + # Run training loop + step = 0 + done = False + while not done: + for batch in dataloader: + batch = preprocessor(batch) + loss, _ = policy.forward(batch) + loss.backward() + optimizer.step() + optimizer.zero_grad() - if step % log_freq == 0: - print(f"step: {step} loss: {loss.item():.3f}") - step += 1 - if step >= training_steps: - done = True - break + if step % log_freq == 0: + print(f"step: {step} loss: {loss.item():.3f}") + step += 1 + if step >= training_steps: + done = True + break -# Save the policy checkpoint, alongside the pre/post processors -policy.save_pretrained(output_directory) -preprocessor.save_pretrained(output_directory) -postprocessor.save_pretrained(output_directory) + # Save the policy checkpoint, alongside the pre/post processors + policy.save_pretrained(output_directory) + preprocessor.save_pretrained(output_directory) + postprocessor.save_pretrained(output_directory) -# Save all assets to the Hub -policy.push_to_hub("fracapuano/robot_learning_tutorial_diffusion") -preprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion") -postprocessor.push_to_hub("fracapuano/robot_learning_tutorial_diffusion") + # Save all assets to the Hub + policy.push_to_hub("/robot_learning_tutorial_diffusion") + preprocessor.push_to_hub("/robot_learning_tutorial_diffusion") + postprocessor.push_to_hub("/robot_learning_tutorial_diffusion") + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/diffusion/diffusion_using_example.py b/examples/tutorial/diffusion/diffusion_using_example.py index fe516383f..96cc607b6 100644 --- a/examples/tutorial/diffusion/diffusion_using_example.py +++ b/examples/tutorial/diffusion/diffusion_using_example.py @@ -8,53 +8,57 @@ from lerobot.policies.utils import build_inference_frame, make_robot_action from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig from lerobot.robots.so100_follower.so100_follower import SO100Follower -device = torch.device("mps") # or "cuda" or "cpu" -model_id = "fracapuano/robot_learning_tutorial_diffusion" - -model = DiffusionPolicy.from_pretrained(model_id) - -dataset_id = "lerobot/svla_so101_pickplace" -# This only downloads the metadata for the dataset, ~10s of MB even for large-scale datasets -dataset_metadata = LeRobotDatasetMetadata(dataset_id) -preprocess, postprocess = make_pre_post_processors( - model.config, model_id, dataset_stats=dataset_metadata.stats -) - MAX_EPISODES = 5 MAX_STEPS_PER_EPISODE = 20 -# # find ports using lerobot-find-port -follower_port = ... # something like "/dev/tty.usbmodem58760431631" +def main(): + device = torch.device("mps") # or "cuda" or "cpu" + model_id = "/robot_learning_tutorial_diffusion" -# # the robot ids are used the load the right calibration files -follower_id = ... # something like "follower_so100" + model = DiffusionPolicy.from_pretrained(model_id) -# Robot and environment configuration -# Camera keys must match the name and resolutions of the ones used for training! -# You can check the camera keys expected by a model in the info.json card on the model card on the Hub -camera_config = { - "side": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), - "up": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), -} + dataset_id = "lerobot/svla_so101_pickplace" + # This only downloads the metadata for the dataset, ~10s of MB even for large-scale datasets + dataset_metadata = LeRobotDatasetMetadata(dataset_id) + preprocess, postprocess = make_pre_post_processors( + model.config, model_id, dataset_stats=dataset_metadata.stats + ) -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) -robot = SO100Follower(robot_cfg) -robot.connect() + # # find ports using lerobot-find-port + follower_port = ... # something like "/dev/tty.usbmodem58760431631" + + # # the robot ids are used the load the right calibration files + follower_id = ... # something like "follower_so100" + + # Robot and environment configuration + # Camera keys must match the name and resolutions of the ones used for training! + # You can check the camera keys expected by a model in the info.json card on the model card on the Hub + camera_config = { + "side": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "up": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), + } + + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) + robot = SO100Follower(robot_cfg) + robot.connect() + + for _ in range(MAX_EPISODES): + for _ in range(MAX_STEPS_PER_EPISODE): + obs = robot.get_observation() + obs_frame = build_inference_frame( + observation=obs, ds_features=dataset_metadata.features, device=device + ) + + obs = preprocess(obs_frame) + + action = model.select_action(obs) + action = postprocess(action) + action = make_robot_action(action, dataset_metadata.features) + robot.send_action(action) + + print("Episode finished! Starting new episode...") -for _ in range(MAX_EPISODES): - for _ in range(MAX_STEPS_PER_EPISODE): - obs = robot.get_observation() - obs_frame = build_inference_frame( - observation=obs, ds_features=dataset_metadata.features, device=device - ) - - obs = preprocess(obs_frame) - - action = model.select_action(obs) - action = postprocess(action) - action = make_robot_action(action, dataset_metadata.features) - robot.send_action(action) - - print("Episode finished! Starting new episode...") +if __name__ == "__main__": + main() diff --git a/examples/tutorial/pi0/using_pi0_example.py b/examples/tutorial/pi0/using_pi0_example.py index 98fc4b4be..362092ccf 100644 --- a/examples/tutorial/pi0/using_pi0_example.py +++ b/examples/tutorial/pi0/using_pi0_example.py @@ -11,57 +11,63 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower MAX_EPISODES = 5 MAX_STEPS_PER_EPISODE = 20 -device = torch.device("mps") # or "cuda" or "cpu" -model_id = "lerobot/pi0_base" -model = PI0Policy.from_pretrained(model_id) +def main(): + device = torch.device("mps") # or "cuda" or "cpu" + model_id = "lerobot/pi0_base" -preprocess, postprocess = make_pre_post_processors( - model.config, - model_id, - # This overrides allows to run on MPS, otherwise defaults to CUDA (if available) - preprocessor_overrides={"device_processor": {"device": str(device)}}, -) + model = PI0Policy.from_pretrained(model_id) -# find ports using lerobot-find-port -follower_port = ... # something like "/dev/tty.usbmodem58760431631" + preprocess, postprocess = make_pre_post_processors( + model.config, + model_id, + # This overrides allows to run on MPS, otherwise defaults to CUDA (if available) + preprocessor_overrides={"device_processor": {"device": str(device)}}, + ) -# the robot ids are used the load the right calibration files -follower_id = ... # something like "follower_so100" + # find ports using lerobot-find-port + follower_port = ... # something like "/dev/tty.usbmodem58760431631" -# Robot and environment configuration -# Camera keys must match the name and resolutions of the ones used for training! -# You can check the camera keys expected by a model in the info.json card on the model card on the Hub -camera_config = { - "base_0_rgb": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), - "left_wrist_0_rgb": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), - "right_wrist_0_rgb": OpenCVCameraConfig(index_or_path=2, width=640, height=480, fps=30), -} + # the robot ids are used the load the right calibration files + follower_id = ... # something like "follower_so100" -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) -robot = SO100Follower(robot_cfg) -robot.connect() + # Robot and environment configuration + # Camera keys must match the name and resolutions of the ones used for training! + # You can check the camera keys expected by a model in the info.json card on the model card on the Hub + camera_config = { + "base_0_rgb": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "left_wrist_0_rgb": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), + "right_wrist_0_rgb": OpenCVCameraConfig(index_or_path=2, width=640, height=480, fps=30), + } -task = "" # something like "pick the red block" -robot_type = "" # something like "so100_follower" for multi-embodiment datasets + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) + robot = SO100Follower(robot_cfg) + robot.connect() -# This is used to match the raw observation keys to the keys expected by the policy -action_features = hw_to_dataset_features(robot.action_features, "action") -obs_features = hw_to_dataset_features(robot.observation_features, "observation") -dataset_features = {**action_features, **obs_features} + task = "" # something like "pick the red block" + robot_type = "" # something like "so100_follower" for multi-embodiment datasets -for _ in range(MAX_EPISODES): - for _ in range(MAX_STEPS_PER_EPISODE): - obs = robot.get_observation() - obs_frame = build_inference_frame( - observation=obs, ds_features=dataset_features, device=device, task=task, robot_type=robot_type - ) + # This is used to match the raw observation keys to the keys expected by the policy + action_features = hw_to_dataset_features(robot.action_features, "action") + obs_features = hw_to_dataset_features(robot.observation_features, "observation") + dataset_features = {**action_features, **obs_features} - obs = preprocess(obs_frame) + for _ in range(MAX_EPISODES): + for _ in range(MAX_STEPS_PER_EPISODE): + obs = robot.get_observation() + obs_frame = build_inference_frame( + observation=obs, ds_features=dataset_features, device=device, task=task, robot_type=robot_type + ) - action = model.select_action(obs) - action = postprocess(action) - action = make_robot_action(action, dataset_features) - robot.send_action(action) + obs = preprocess(obs_frame) - print("Episode finished! Starting new episode...") + action = model.select_action(obs) + action = postprocess(action) + action = make_robot_action(action, dataset_features) + robot.send_action(action) + + print("Episode finished! Starting new episode...") + + +if __name__ == "__main__": + main() diff --git a/examples/tutorial/rl/hilserl_example.py b/examples/tutorial/rl/hilserl_example.py index 865053d36..c49233ebb 100644 --- a/examples/tutorial/rl/hilserl_example.py +++ b/examples/tutorial/rl/hilserl_example.py @@ -20,6 +20,8 @@ from lerobot.teleoperators.utils import TeleopEvents LOG_EVERY = 10 SEND_EVERY = 10 +MAX_EPISODES = 5 +MAX_STEPS_PER_EPISODE = 20 def run_learner( @@ -223,123 +225,123 @@ def make_policy_obs(obs, device: torch.device = "cpu"): } -"""Main function - coordinates actor and learner processes.""" +def main(): + """Main function - coordinates actor and learner processes.""" -device = "mps" # or "cuda" or "cpu" -output_directory = Path("outputs/robot_learning_tutorial/hil_serl") -output_directory.mkdir(parents=True, exist_ok=True) + device = "mps" # or "cuda" or "cpu" + output_directory = Path("outputs/robot_learning_tutorial/hil_serl") + output_directory.mkdir(parents=True, exist_ok=True) -# find ports using lerobot-find-port -follower_port = ... -leader_port = ... + # find ports using lerobot-find-port + follower_port = ... + leader_port = ... -# the robot ids are used the load the right calibration files -follower_id = ... -leader_id = ... + # the robot ids are used the load the right calibration files + follower_id = ... + leader_id = ... -# A pretrained model (to be used in-distribution!) -reward_classifier_id = "fracapuano/reward_classifier_hil_serl_example" -reward_classifier = Classifier.from_pretrained(reward_classifier_id) + # A pretrained model (to be used in-distribution!) + reward_classifier_id = "/reward_classifier_hil_serl_example" + reward_classifier = Classifier.from_pretrained(reward_classifier_id) -reward_classifier.to(device) -reward_classifier.eval() + reward_classifier.to(device) + reward_classifier.eval() -MAX_EPISODES = 5 -MAX_STEPS_PER_EPISODE = 20 + # Robot and environment configuration + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id) + teleop_cfg = SO100LeaderConfig(port=leader_port, id=leader_id) + processor_cfg = HILSerlProcessorConfig(control_mode="leader") -# Robot and environment configuration -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id) -teleop_cfg = SO100LeaderConfig(port=leader_port, id=leader_id) -processor_cfg = HILSerlProcessorConfig(control_mode="leader") + env_cfg = HILSerlRobotEnvConfig(robot=robot_cfg, teleop=teleop_cfg, processor=processor_cfg) -env_cfg = HILSerlRobotEnvConfig(robot=robot_cfg, teleop=teleop_cfg, processor=processor_cfg) + # Create robot environment + env, teleop_device = make_robot_env(env_cfg) -# Create robot environment -env, teleop_device = make_robot_env(env_cfg) + obs_features = hw_to_dataset_features(env.robot.observation_features, "observation") + action_features = hw_to_dataset_features(env.robot.action_features, "action") -obs_features = hw_to_dataset_features(env.robot.observation_features, "observation") -action_features = hw_to_dataset_features(env.robot.action_features, "action") + # Create SAC policy for action selection + policy_cfg = SACConfig( + device=device, + input_features=obs_features, + output_features=action_features, + ) -# Create SAC policy for action selection -policy_cfg = SACConfig( - device=device, - input_features=obs_features, - output_features=action_features, -) + policy_actor = SACPolicy(policy_cfg) + policy_learner = SACPolicy(policy_cfg) -policy_actor = SACPolicy(policy_cfg) -policy_learner = SACPolicy(policy_cfg) + demonstrations_repo_id = "lerobot/example_hil_serl_dataset" + offline_dataset = LeRobotDataset(repo_id=demonstrations_repo_id) -demonstrations_repo_id = "lerobot/example_hil_serl_dataset" -offline_dataset = LeRobotDataset(repo_id=demonstrations_repo_id) + # Online buffer: initialized from scratch + online_replay_buffer = ReplayBuffer(device=device, state_keys=list(obs_features.keys())) + # Offline buffer: Created from dataset (pre-populated it with demonstrations) + offline_replay_buffer = ReplayBuffer.from_lerobot_dataset( + lerobot_dataset=offline_dataset, device=device, state_keys=list(obs_features.keys()) + ) -# Online buffer: initialized from scratch -online_replay_buffer = ReplayBuffer(device=device, state_keys=list(obs_features.keys())) -# Offline buffer: Created from dataset (pre-populated it with demonstrations) -offline_replay_buffer = ReplayBuffer.from_lerobot_dataset( - lerobot_dataset=offline_dataset, device=device, state_keys=list(obs_features.keys()) -) + # Create communication channels between learner and actor processes + transitions_queue = mp.Queue(maxsize=10) + parameters_queue = mp.Queue(maxsize=2) + shutdown_event = mp.Event() -# Create communication channels between learner and actor processes -transitions_queue = mp.Queue(maxsize=10) -parameters_queue = mp.Queue(maxsize=2) -shutdown_event = mp.Event() + # Signal handler for graceful shutdown + def signal_handler(sig): + print(f"\nSignal {sig} received, shutting down...") + shutdown_event.set() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Create processes + learner_process = mp.Process( + target=run_learner, + args=( + transitions_queue, + parameters_queue, + shutdown_event, + policy_learner, + online_replay_buffer, + offline_replay_buffer, + ), + kwargs={"device": device}, # can run on accelerated hardware for training + ) + + actor_process = mp.Process( + target=run_actor, + args=( + transitions_queue, + parameters_queue, + shutdown_event, + policy_actor, + reward_classifier, + env_cfg, + output_directory, + ), + kwargs={"device": "cpu"}, # actor is frozen, can run on CPU or accelerate for inference + ) + + learner_process.start() + actor_process.start() + + try: + # Wait for actor to finish (it controls the episode loop) + actor_process.join() + shutdown_event.set() + learner_process.join(timeout=10) + + except KeyboardInterrupt: + print("Main process interrupted") + shutdown_event.set() + actor_process.join(timeout=5) + learner_process.join(timeout=10) + + finally: + if learner_process.is_alive(): + learner_process.terminate() + if actor_process.is_alive(): + actor_process.terminate() -# Signal handler for graceful shutdown -def signal_handler(sig): - print(f"\nSignal {sig} received, shutting down...") - shutdown_event.set() - - -signal.signal(signal.SIGINT, signal_handler) -signal.signal(signal.SIGTERM, signal_handler) - -# Create processes -learner_process = mp.Process( - target=run_learner, - args=( - transitions_queue, - parameters_queue, - shutdown_event, - policy_learner, - online_replay_buffer, - offline_replay_buffer, - ), - kwargs={"device": device}, # can run on accelerated hardware for training -) - -actor_process = mp.Process( - target=run_actor, - args=( - transitions_queue, - parameters_queue, - shutdown_event, - policy_actor, - reward_classifier, - env_cfg, - output_directory, - ), - kwargs={"device": "cpu"}, # actor is frozen, can run on CPU or accelerate for inference -) - -learner_process.start() -actor_process.start() - -try: - # Wait for actor to finish (it controls the episode loop) - actor_process.join() - shutdown_event.set() - learner_process.join(timeout=10) - -except KeyboardInterrupt: - print("Main process interrupted") - shutdown_event.set() - actor_process.join(timeout=5) - learner_process.join(timeout=10) - -finally: - if learner_process.is_alive(): - learner_process.terminate() - if actor_process.is_alive(): - actor_process.terminate() +if __name__ == "__main__": + main() diff --git a/examples/tutorial/rl/reward_classifier_example.py b/examples/tutorial/rl/reward_classifier_example.py index a3d852e30..4af6b899c 100644 --- a/examples/tutorial/rl/reward_classifier_example.py +++ b/examples/tutorial/rl/reward_classifier_example.py @@ -4,59 +4,64 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.policies.factory import make_policy, make_pre_post_processors from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig -# Device to use for training -device = "mps" # or "cuda", or "cpu" -# Load the dataset used for training -repo_id = "lerobot/example_hil_serl_dataset" -dataset = LeRobotDataset(repo_id) +def main(): + # Device to use for training + device = "mps" # or "cuda", or "cpu" -# Configure the policy to extract features from the image frames -camera_keys = dataset.meta.camera_keys + # Load the dataset used for training + repo_id = "lerobot/example_hil_serl_dataset" + dataset = LeRobotDataset(repo_id) -config = RewardClassifierConfig( - num_cameras=len(camera_keys), - device=device, - # backbone model to extract features from the image frames - model_name="microsoft/resnet-18", -) + # Configure the policy to extract features from the image frames + camera_keys = dataset.meta.camera_keys -# Make policy, preprocessor, and optimizer -policy = make_policy(config, ds_meta=dataset.meta) -optimizer = config.get_optimizer_preset().build(policy.parameters()) -preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats) + config = RewardClassifierConfig( + num_cameras=len(camera_keys), + device=device, + # backbone model to extract features from the image frames + model_name="microsoft/resnet-18", + ) + + # Make policy, preprocessor, and optimizer + policy = make_policy(config, ds_meta=dataset.meta) + optimizer = config.get_optimizer_preset().build(policy.parameters()) + preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats) + + classifier_id = "/reward_classifier_hil_serl_example" + + # Instantiate a dataloader + dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True) + + # Training loop + num_epochs = 5 + for epoch in range(num_epochs): + total_loss = 0 + total_accuracy = 0 + for batch in dataloader: + # Preprocess the batch and move it to the correct device. + batch = preprocessor(batch) + + # Forward pass + loss, output_dict = policy.forward(batch) + + # Backward pass and optimization + optimizer.zero_grad() + loss.backward() + optimizer.step() + + total_loss += loss.item() + total_accuracy += output_dict["accuracy"] + + avg_loss = total_loss / len(dataloader) + avg_accuracy = total_accuracy / len(dataloader) + print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%") + + print("Training finished!") + + # You can now save the trained policy. + policy.push_to_hub(classifier_id) -classifier_id = "fracapuano/reward_classifier_hil_serl_example" - -# Instantiate a dataloader -dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True) - -# Training loop -num_epochs = 5 -for epoch in range(num_epochs): - total_loss = 0 - total_accuracy = 0 - for batch in dataloader: - # Preprocess the batch and move it to the correct device. - batch = preprocessor(batch) - - # Forward pass - loss, output_dict = policy.forward(batch) - - # Backward pass and optimization - optimizer.zero_grad() - loss.backward() - optimizer.step() - - total_loss += loss.item() - total_accuracy += output_dict["accuracy"] - - avg_loss = total_loss / len(dataloader) - avg_accuracy = total_accuracy / len(dataloader) - print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%") - -print("Training finished!") - -# You can now save the trained policy. -policy.push_to_hub(classifier_id) +if __name__ == "__main__": + main() diff --git a/examples/tutorial/smolvla/using_smolvla_example.py b/examples/tutorial/smolvla/using_smolvla_example.py index 04c327833..d4219f316 100644 --- a/examples/tutorial/smolvla/using_smolvla_example.py +++ b/examples/tutorial/smolvla/using_smolvla_example.py @@ -11,56 +11,62 @@ from lerobot.robots.so100_follower.so100_follower import SO100Follower MAX_EPISODES = 5 MAX_STEPS_PER_EPISODE = 20 -device = torch.device("mps") # or "cuda" or "cpu" -model_id = "lerobot/smolvla_base" -model = SmolVLAPolicy.from_pretrained(model_id) +def main(): + device = torch.device("mps") # or "cuda" or "cpu" + model_id = "lerobot/smolvla_base" -preprocess, postprocess = make_pre_post_processors( - model.config, - model_id, - # This overrides allows to run on MPS, otherwise defaults to CUDA (if available) - preprocessor_overrides={"device_processor": {"device": str(device)}}, -) + model = SmolVLAPolicy.from_pretrained(model_id) -# find ports using lerobot-find-port -follower_port = ... # something like "/dev/tty.usbmodem58760431631" + preprocess, postprocess = make_pre_post_processors( + model.config, + model_id, + # This overrides allows to run on MPS, otherwise defaults to CUDA (if available) + preprocessor_overrides={"device_processor": {"device": str(device)}}, + ) -# the robot ids are used the load the right calibration files -follower_id = ... # something like "follower_so100" + # find ports using lerobot-find-port + follower_port = ... # something like "/dev/tty.usbmodem58760431631" -# Robot and environment configuration -# Camera keys must match the name and resolutions of the ones used for training! -# You can check the camera keys expected by a model in the info.json card on the model card on the Hub -camera_config = { - "camera1": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), - "camera2": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), -} + # the robot ids are used the load the right calibration files + follower_id = ... # something like "follower_so100" -robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) -robot = SO100Follower(robot_cfg) -robot.connect() + # Robot and environment configuration + # Camera keys must match the name and resolutions of the ones used for training! + # You can check the camera keys expected by a model in the info.json card on the model card on the Hub + camera_config = { + "camera1": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "camera2": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30), + } -task = "" # something like "pick the red block" -robot_type = "" # something like "so100_follower" for multi-embodiment datasets + robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id, cameras=camera_config) + robot = SO100Follower(robot_cfg) + robot.connect() -# This is used to match the raw observation keys to the keys expected by the policy -action_features = hw_to_dataset_features(robot.action_features, "action") -obs_features = hw_to_dataset_features(robot.observation_features, "observation") -dataset_features = {**action_features, **obs_features} + task = "" # something like "pick the red block" + robot_type = "" # something like "so100_follower" for multi-embodiment datasets -for _ in range(MAX_EPISODES): - for _ in range(MAX_STEPS_PER_EPISODE): - obs = robot.get_observation() - obs_frame = build_inference_frame( - observation=obs, ds_features=dataset_features, device=device, task=task, robot_type=robot_type - ) + # This is used to match the raw observation keys to the keys expected by the policy + action_features = hw_to_dataset_features(robot.action_features, "action") + obs_features = hw_to_dataset_features(robot.observation_features, "observation") + dataset_features = {**action_features, **obs_features} - obs = preprocess(obs_frame) + for _ in range(MAX_EPISODES): + for _ in range(MAX_STEPS_PER_EPISODE): + obs = robot.get_observation() + obs_frame = build_inference_frame( + observation=obs, ds_features=dataset_features, device=device, task=task, robot_type=robot_type + ) - action = model.select_action(obs) - action = postprocess(action) - action = make_robot_action(action, dataset_features) - robot.send_action(action) + obs = preprocess(obs_frame) - print("Episode finished! Starting new episode...") + action = model.select_action(obs) + action = postprocess(action) + action = make_robot_action(action, dataset_features) + robot.send_action(action) + + print("Episode finished! Starting new episode...") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 14d5d9053..f17f798b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ discord = "https://discord.gg/s3KuuzsPFb" [project] name = "lerobot" -version = "0.4.2" +version = "0.4.3" description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch" readme = "README.md" license = { text = "Apache-2.0" } diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 5ea18848b..d9d4b22d0 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -16,7 +16,6 @@ import concurrent.futures import contextlib import logging -import platform import shutil import tempfile from collections.abc import Callable @@ -1149,7 +1148,7 @@ class LeRobotDataset(torch.utils.data.Dataset): def save_episode( self, episode_data: dict | None = None, - parallel_encoding: bool = platform.system() == "Linux", + parallel_encoding: bool = True, ) -> None: """ This will save to disk the current episode in self.episode_buffer. diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py index 54d0fba69..13fd66507 100644 --- a/src/lerobot/rl/actor.py +++ b/src/lerobot/rl/actor.py @@ -78,7 +78,7 @@ from lerobot.transport.utils import ( transitions_to_bytes, ) from lerobot.utils.random_utils import set_seed -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.transition import ( Transition, move_state_dict_to_device, @@ -398,7 +398,7 @@ def act_with_policy( if cfg.env.fps is not None: dt_time = time.perf_counter() - start_time - busy_wait(1 / cfg.env.fps - dt_time) + precise_sleep(1 / cfg.env.fps - dt_time) # Communication Functions - Group all gRPC/messaging functions diff --git a/src/lerobot/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py index f9c9d0d7a..ad1fdf55f 100644 --- a/src/lerobot/rl/gym_manipulator.py +++ b/src/lerobot/rl/gym_manipulator.py @@ -74,7 +74,7 @@ from lerobot.teleoperators import ( from lerobot.teleoperators.teleoperator import Teleoperator from lerobot.teleoperators.utils import TeleopEvents from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, REWARD -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import log_say logging.basicConfig(level=logging.INFO) @@ -114,7 +114,7 @@ def reset_follower_position(robot_arm: Robot, target_position: np.ndarray) -> No for pose in trajectory: action_dict = dict(zip(current_position_dict, pose, strict=False)) robot_arm.bus.sync_write("Goal_Position", action_dict) - busy_wait(0.015) + precise_sleep(0.015) class RobotEnv(gym.Env): @@ -238,7 +238,7 @@ class RobotEnv(gym.Env): reset_follower_position(self.robot, np.array(self.reset_pose)) log_say("Reset the environment done.", play_sounds=True) - busy_wait(self.reset_time_s - (time.perf_counter() - start_time)) + precise_sleep(self.reset_time_s - (time.perf_counter() - start_time)) super().reset(seed=seed, options=options) @@ -713,7 +713,7 @@ def control_loop( transition = env_processor(transition) # Maintain fps timing - busy_wait(dt - (time.perf_counter() - step_start_time)) + precise_sleep(dt - (time.perf_counter() - step_start_time)) if dataset is not None and cfg.dataset.push_to_hub: logging.info("Pushing dataset to hub") @@ -745,7 +745,7 @@ def replay_trajectory( ) transition = action_processor(transition) env.step(transition[TransitionKey.ACTION]) - busy_wait(1 / cfg.env.fps - (time.perf_counter() - start_time)) + precise_sleep(1 / cfg.env.fps - (time.perf_counter() - start_time)) @parser.wrap() diff --git a/src/lerobot/scripts/lerobot_find_joint_limits.py b/src/lerobot/scripts/lerobot_find_joint_limits.py index 07d57a760..4ea83c976 100644 --- a/src/lerobot/scripts/lerobot_find_joint_limits.py +++ b/src/lerobot/scripts/lerobot_find_joint_limits.py @@ -50,7 +50,7 @@ from lerobot.teleoperators import ( # noqa: F401 make_teleoperator_from_config, so100_leader, ) -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep @dataclass @@ -114,7 +114,7 @@ def find_joint_and_ee_bounds(cfg: FindJointLimitsConfig): print(f"Min joint pos position {np.round(min_pos, 4).tolist()}") break - busy_wait(0.01) + precise_sleep(0.01) def main(): diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py index 6df92d893..600a73022 100644 --- a/src/lerobot/scripts/lerobot_record.py +++ b/src/lerobot/scripts/lerobot_record.py @@ -119,7 +119,7 @@ from lerobot.utils.control_utils import ( sanity_check_dataset_robot_compatibility, ) from lerobot.utils.import_utils import register_third_party_devices -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import ( get_safe_torch_device, init_logging, @@ -364,7 +364,7 @@ def record_loop( log_rerun_data(observation=obs_processed, action=action_values) dt_s = time.perf_counter() - start_loop_t - busy_wait(1 / fps - dt_s) + precise_sleep(1 / fps - dt_s) timestamp = time.perf_counter() - start_episode_t diff --git a/src/lerobot/scripts/lerobot_replay.py b/src/lerobot/scripts/lerobot_replay.py index ffd7b2b22..d25fcc3bd 100644 --- a/src/lerobot/scripts/lerobot_replay.py +++ b/src/lerobot/scripts/lerobot_replay.py @@ -62,7 +62,7 @@ from lerobot.robots import ( # noqa: F401 ) from lerobot.utils.constants import ACTION from lerobot.utils.import_utils import register_third_party_devices -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import ( init_logging, log_say, @@ -121,7 +121,7 @@ def replay(cfg: ReplayConfig): _ = robot.send_action(processed_action) dt_s = time.perf_counter() - start_episode_t - busy_wait(1 / dataset.fps - dt_s) + precise_sleep(1 / dataset.fps - dt_s) robot.disconnect() diff --git a/src/lerobot/scripts/lerobot_teleoperate.py b/src/lerobot/scripts/lerobot_teleoperate.py index 0a418f3bc..632419300 100644 --- a/src/lerobot/scripts/lerobot_teleoperate.py +++ b/src/lerobot/scripts/lerobot_teleoperate.py @@ -89,7 +89,7 @@ from lerobot.teleoperators import ( # noqa: F401 so101_leader, ) from lerobot.utils.import_utils import register_third_party_devices -from lerobot.utils.robot_utils import busy_wait +from lerobot.utils.robot_utils import precise_sleep from lerobot.utils.utils import init_logging, move_cursor_up from lerobot.utils.visualization_utils import init_rerun, log_rerun_data @@ -170,12 +170,13 @@ def teleop_loop( # Display the final robot action that was sent for motor, value in robot_action_to_send.items(): print(f"{motor:<{display_len}} | {value:>7.2f}") - move_cursor_up(len(robot_action_to_send) + 5) + move_cursor_up(len(robot_action_to_send) + 3) dt_s = time.perf_counter() - loop_start - busy_wait(1 / fps - dt_s) + precise_sleep(1 / fps - dt_s) loop_s = time.perf_counter() - loop_start - print(f"\ntime: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)") + print(f"Teleop loop time: {loop_s * 1e3:.2f}ms ({1 / loop_s:.0f} Hz)") + move_cursor_up(1) if duration is not None and time.perf_counter() - start >= duration: return diff --git a/src/lerobot/utils/robot_utils.py b/src/lerobot/utils/robot_utils.py index 42abcdda4..28c8e7c49 100644 --- a/src/lerobot/utils/robot_utils.py +++ b/src/lerobot/utils/robot_utils.py @@ -16,14 +16,40 @@ import platform import time -def busy_wait(seconds): - if platform.system() == "Darwin" or platform.system() == "Windows": - # On Mac and Windows, `time.sleep` is not accurate and we need to use this while loop trick, - # but it consumes CPU cycles. +def precise_sleep(seconds: float, spin_threshold: float = 0.010, sleep_margin: float = 0.003): + """ + Wait for `seconds` with better precision than time.sleep alone at the expense of more CPU usage. + + Parameters: + - seconds: duration to wait + - spin_threshold: if remaining <= spin_threshold -> spin; otherwise sleep (seconds). Default 10ms + - sleep_margin: when sleeping leave this much time before deadline to avoid oversleep. Default 3ms + + Note: + The default parameters are chosen to prioritize timing accuracy over CPU usage for the common 30 FPS use case. + """ + if seconds <= 0: + return + + system = platform.system() + # On macOS and Windows the scheduler / sleep granularity can make + # short sleeps inaccurate. Instead of burning CPU for the whole + # duration, sleep for most of the time and spin for the final few + # milliseconds to achieve good accuracy with much lower CPU usage. + if system in ("Darwin", "Windows"): end_time = time.perf_counter() + seconds - while time.perf_counter() < end_time: - pass + while True: + remaining = end_time - time.perf_counter() + if remaining <= 0: + break + # If there's more than a couple milliseconds left, sleep most + # of the remaining time and leave a small margin for the final spin. + if remaining > spin_threshold: + # Sleep but avoid sleeping past the end by leaving a small margin. + time.sleep(max(remaining - sleep_margin, 0)) + else: + # Final short spin to hit precise timing without long sleeps. + pass else: - # On Linux time.sleep is accurate - if seconds > 0: - time.sleep(seconds) + # On Linux time.sleep is accurate enough for most uses + time.sleep(seconds)