From cf86b9300dc83fdad408cfe4787b7b09b55f12cf Mon Sep 17 00:00:00 2001 From: Caroline Pascal Date: Tue, 8 Jul 2025 18:59:13 +0200 Subject: [PATCH 01/20] fix(logging): Fixing logging levels (#1466) * fix(logging): Fixing logging levels, adding custom logging levels for console and file logging * clean(typing): Adding typing in logging formatter, use proper getter for logging message --- src/lerobot/utils/utils.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/lerobot/utils/utils.py b/src/lerobot/utils/utils.py index 6d9c0338b..2e94a9c93 100644 --- a/src/lerobot/utils/utils.py +++ b/src/lerobot/utils/utils.py @@ -111,35 +111,46 @@ def is_amp_available(device: str): raise ValueError(f"Unknown device '{device}.") -def init_logging(log_file: Path | None = None, display_pid: bool = False): - def custom_format(record): +def init_logging( + log_file: Path | None = None, + display_pid: bool = False, + console_level: str = "INFO", + file_level: str = "DEBUG", +): + def custom_format(record: logging.LogRecord) -> str: dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") fnameline = f"{record.pathname}:{record.lineno}" # NOTE: Display PID is useful for multi-process logging. if display_pid: pid_str = f"[PID: {os.getpid()}]" - message = f"{record.levelname} {pid_str} {dt} {fnameline[-15:]:>15} {record.msg}" + message = f"{record.levelname} {pid_str} {dt} {fnameline[-15:]:>15} {record.getMessage()}" else: - message = f"{record.levelname} {dt} {fnameline[-15:]:>15} {record.msg}" + message = f"{record.levelname} {dt} {fnameline[-15:]:>15} {record.getMessage()}" return message - logging.basicConfig(level=logging.INFO) - - for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - formatter = logging.Formatter() formatter.format = custom_format + + logger = logging.getLogger() + logger.setLevel(logging.NOTSET) # Set the logger to the lowest level to capture all messages + + # Remove unused default handlers + for handler in logger.handlers[:]: + logger.removeHandler(handler) + + # Write logs to console console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) - logging.getLogger().addHandler(console_handler) + console_handler.setLevel(console_level.upper()) + logger.addHandler(console_handler) + # Additionally write logs to file if log_file is not None: - # Additionally write logs to file file_handler = logging.FileHandler(log_file) file_handler.setFormatter(formatter) - logging.getLogger().addHandler(file_handler) + file_handler.setLevel(file_level.upper()) + logger.addHandler(file_handler) def format_big_number(num, precision=0): From ce2b9724bfe1b5a4c45e61b1890eef3f5ab0909c Mon Sep 17 00:00:00 2001 From: Adil Zouitine Date: Wed, 9 Jul 2025 16:22:40 +0200 Subject: [PATCH 02/20] fix(hil-serl): discrete critic send through network (#1468) Co-authored-by: Khalil Meftah Co-authored-by: jpizarrom --- pyproject.toml | 2 +- src/lerobot/scripts/rl/actor.py | 28 ++++++++++-- src/lerobot/scripts/rl/learner.py | 14 +++++- src/lerobot/transport/services.proto | 4 +- src/lerobot/transport/services_pb2.py | 32 ++++++------- src/lerobot/transport/services_pb2_grpc.py | 52 +++++++++++----------- 6 files changed, 81 insertions(+), 51 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 408e3b773..e13a9af01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ dependencies = [ [project.optional-dependencies] aloha = ["gym-aloha>=0.1.1 ; python_version < '4.0'"] docs = ["hf-doc-builder @ git+https://github.com/huggingface/doc-builder.git@main", "watchdog >= 6.0.0"] -dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1"] +dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1", "grpcio-tools==1.71.0"] dora = [ "gym-dora @ git+https://github.com/dora-rs/dora-lerobot.git#subdirectory=gym_dora ; python_version < '4.0'", ] diff --git a/src/lerobot/scripts/rl/actor.py b/src/lerobot/scripts/rl/actor.py index 0e96d3354..cd5e286c0 100644 --- a/src/lerobot/scripts/rl/actor.py +++ b/src/lerobot/scripts/rl/actor.py @@ -317,7 +317,7 @@ def act_with_policy( if done or truncated: logging.info(f"[ACTOR] Global step {interaction_step}: Episode reward: {sum_reward_episode}") - update_policy_parameters(policy=policy.actor, parameters_queue=parameters_queue, device=device) + update_policy_parameters(policy=policy, parameters_queue=parameters_queue, device=device) if len(list_transition_to_send_to_learner) > 0: push_transitions_to_transport_queue( @@ -642,9 +642,29 @@ def update_policy_parameters(policy: SACPolicy, parameters_queue: Queue, device) bytes_state_dict = get_last_item_from_queue(parameters_queue, block=False) if bytes_state_dict is not None: logging.info("[ACTOR] Load new parameters from Learner.") - state_dict = bytes_to_state_dict(bytes_state_dict) - state_dict = move_state_dict_to_device(state_dict, device=device) - policy.load_state_dict(state_dict) + state_dicts = bytes_to_state_dict(bytes_state_dict) + + # TODO: check encoder parameter synchronization possible issues: + # 1. When shared_encoder=True, we're loading stale encoder params from actor's state_dict + # instead of the updated encoder params from critic (which is optimized separately) + # 2. When freeze_vision_encoder=True, we waste bandwidth sending/loading frozen params + # 3. Need to handle encoder params correctly for both actor and discrete_critic + # Potential fixes: + # - Send critic's encoder state when shared_encoder=True + # - Skip encoder params entirely when freeze_vision_encoder=True + # - Ensure discrete_critic gets correct encoder state (currently uses encoder_critic) + + # Load actor state dict + actor_state_dict = move_state_dict_to_device(state_dicts["policy"], device=device) + policy.actor.load_state_dict(actor_state_dict) + + # Load discrete critic if present + if hasattr(policy, "discrete_critic") and "discrete_critic" in state_dicts: + discrete_critic_state_dict = move_state_dict_to_device( + state_dicts["discrete_critic"], device=device + ) + policy.discrete_critic.load_state_dict(discrete_critic_state_dict) + logging.info("[ACTOR] Loaded discrete critic parameters from Learner.") ################################################# diff --git a/src/lerobot/scripts/rl/learner.py b/src/lerobot/scripts/rl/learner.py index d8830d83e..edd2363b1 100644 --- a/src/lerobot/scripts/rl/learner.py +++ b/src/lerobot/scripts/rl/learner.py @@ -1109,8 +1109,18 @@ def check_nan_in_transition( def push_actor_policy_to_queue(parameters_queue: Queue, policy: nn.Module): logging.debug("[LEARNER] Pushing actor policy to the queue") - state_dict = move_state_dict_to_device(policy.actor.state_dict(), device="cpu") - state_bytes = state_to_bytes(state_dict) + + # Create a dictionary to hold all the state dicts + state_dicts = {"policy": move_state_dict_to_device(policy.actor.state_dict(), device="cpu")} + + # Add discrete critic if it exists + if hasattr(policy, "discrete_critic") and policy.discrete_critic is not None: + state_dicts["discrete_critic"] = move_state_dict_to_device( + policy.discrete_critic.state_dict(), device="cpu" + ) + logging.debug("[LEARNER] Including discrete critic in state dict push") + + state_bytes = state_to_bytes(state_dicts) parameters_queue.put(state_bytes) diff --git a/src/lerobot/transport/services.proto b/src/lerobot/transport/services.proto index 89bfc107a..70f39741f 100644 --- a/src/lerobot/transport/services.proto +++ b/src/lerobot/transport/services.proto @@ -11,11 +11,11 @@ // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and -// limitations under the License. +// limitations under the License.python -m grpc_tools.protoc -I src --python_out=src --grpc_python_out=src src/lerobot/transport/services.proto // To generate a classes for transport part (services_pb2.py and services_pb2_grpc.py) use the following command: // -// python -m grpc_tools.protoc -I . --python_out=. --grpc_python_out=. src/lerobot/transport/services.proto +// python -m grpc_tools.protoc -I src --python_out=src --grpc_python_out=src src/lerobot/transport/services.proto // // The command should be launched from the root of the project. diff --git a/src/lerobot/transport/services_pb2.py b/src/lerobot/transport/services_pb2.py index 8a2137687..9e66ae1e3 100644 --- a/src/lerobot/transport/services_pb2.py +++ b/src/lerobot/transport/services_pb2.py @@ -1,6 +1,6 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE -# source: src/lerobot/transport/services.proto +# source: lerobot/transport/services.proto # Protobuf Python Version: 5.29.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor @@ -14,7 +14,7 @@ _runtime_version.ValidateProtobufRuntimeVersion( 29, 0, '', - 'src/lerobot/transport/services.proto' + 'lerobot/transport/services.proto' ) # @@protoc_insertion_point(imports) @@ -23,23 +23,23 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$src/lerobot/transport/services.proto\x12\ttransport\"L\n\nTransition\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"L\n\nParameters\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"T\n\x12InteractionMessage\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"\x07\n\x05\x45mpty*`\n\rTransferState\x12\x14\n\x10TRANSFER_UNKNOWN\x10\x00\x12\x12\n\x0eTRANSFER_BEGIN\x10\x01\x12\x13\n\x0fTRANSFER_MIDDLE\x10\x02\x12\x10\n\x0cTRANSFER_END\x10\x03\x32\x81\x02\n\x0eLearnerService\x12=\n\x10StreamParameters\x12\x10.transport.Empty\x1a\x15.transport.Parameters0\x01\x12<\n\x0fSendTransitions\x12\x15.transport.Transition\x1a\x10.transport.Empty(\x01\x12\x45\n\x10SendInteractions\x12\x1d.transport.InteractionMessage\x1a\x10.transport.Empty(\x01\x12+\n\x05Ready\x12\x10.transport.Empty\x1a\x10.transport.Emptyb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n lerobot/transport/services.proto\x12\ttransport\"L\n\nTransition\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"L\n\nParameters\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"T\n\x12InteractionMessage\x12\x30\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x18.transport.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"\x07\n\x05\x45mpty*`\n\rTransferState\x12\x14\n\x10TRANSFER_UNKNOWN\x10\x00\x12\x12\n\x0eTRANSFER_BEGIN\x10\x01\x12\x13\n\x0fTRANSFER_MIDDLE\x10\x02\x12\x10\n\x0cTRANSFER_END\x10\x03\x32\x81\x02\n\x0eLearnerService\x12=\n\x10StreamParameters\x12\x10.transport.Empty\x1a\x15.transport.Parameters0\x01\x12<\n\x0fSendTransitions\x12\x15.transport.Transition\x1a\x10.transport.Empty(\x01\x12\x45\n\x10SendInteractions\x12\x1d.transport.InteractionMessage\x1a\x10.transport.Empty(\x01\x12+\n\x05Ready\x12\x10.transport.Empty\x1a\x10.transport.Emptyb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'src.lerobot.transport.services_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'lerobot.transport.services_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None - _globals['_TRANSFERSTATE']._serialized_start=302 - _globals['_TRANSFERSTATE']._serialized_end=398 - _globals['_TRANSITION']._serialized_start=51 - _globals['_TRANSITION']._serialized_end=127 - _globals['_PARAMETERS']._serialized_start=129 - _globals['_PARAMETERS']._serialized_end=205 - _globals['_INTERACTIONMESSAGE']._serialized_start=207 - _globals['_INTERACTIONMESSAGE']._serialized_end=291 - _globals['_EMPTY']._serialized_start=293 - _globals['_EMPTY']._serialized_end=300 - _globals['_LEARNERSERVICE']._serialized_start=401 - _globals['_LEARNERSERVICE']._serialized_end=658 + _globals['_TRANSFERSTATE']._serialized_start=298 + _globals['_TRANSFERSTATE']._serialized_end=394 + _globals['_TRANSITION']._serialized_start=47 + _globals['_TRANSITION']._serialized_end=123 + _globals['_PARAMETERS']._serialized_start=125 + _globals['_PARAMETERS']._serialized_end=201 + _globals['_INTERACTIONMESSAGE']._serialized_start=203 + _globals['_INTERACTIONMESSAGE']._serialized_end=287 + _globals['_EMPTY']._serialized_start=289 + _globals['_EMPTY']._serialized_end=296 + _globals['_LEARNERSERVICE']._serialized_start=397 + _globals['_LEARNERSERVICE']._serialized_end=654 # @@protoc_insertion_point(module_scope) diff --git a/src/lerobot/transport/services_pb2_grpc.py b/src/lerobot/transport/services_pb2_grpc.py index a4fe8c576..77801a340 100644 --- a/src/lerobot/transport/services_pb2_grpc.py +++ b/src/lerobot/transport/services_pb2_grpc.py @@ -3,7 +3,7 @@ import grpc import warnings -from src.lerobot.transport import services_pb2 as src_dot_lerobot_dot_transport_dot_services__pb2 +from lerobot.transport import services_pb2 as lerobot_dot_transport_dot_services__pb2 GRPC_GENERATED_VERSION = '1.71.0' GRPC_VERSION = grpc.__version__ @@ -18,7 +18,7 @@ except ImportError: if _version_not_supported: raise RuntimeError( f'The grpc package installed is at version {GRPC_VERSION},' - + f' but the generated code in src/lerobot/transport/services_pb2_grpc.py depends on' + + f' but the generated code in lerobot/transport/services_pb2_grpc.py depends on' + f' grpcio>={GRPC_GENERATED_VERSION}.' + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' @@ -38,23 +38,23 @@ class LearnerServiceStub: """ self.StreamParameters = channel.unary_stream( '/transport.LearnerService/StreamParameters', - request_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, - response_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Parameters.FromString, + request_serializer=lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + response_deserializer=lerobot_dot_transport_dot_services__pb2.Parameters.FromString, _registered_method=True) self.SendTransitions = channel.stream_unary( '/transport.LearnerService/SendTransitions', - request_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Transition.SerializeToString, - response_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + request_serializer=lerobot_dot_transport_dot_services__pb2.Transition.SerializeToString, + response_deserializer=lerobot_dot_transport_dot_services__pb2.Empty.FromString, _registered_method=True) self.SendInteractions = channel.stream_unary( '/transport.LearnerService/SendInteractions', - request_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.InteractionMessage.SerializeToString, - response_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + request_serializer=lerobot_dot_transport_dot_services__pb2.InteractionMessage.SerializeToString, + response_deserializer=lerobot_dot_transport_dot_services__pb2.Empty.FromString, _registered_method=True) self.Ready = channel.unary_unary( '/transport.LearnerService/Ready', - request_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, - response_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + request_serializer=lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + response_deserializer=lerobot_dot_transport_dot_services__pb2.Empty.FromString, _registered_method=True) @@ -93,23 +93,23 @@ def add_LearnerServiceServicer_to_server(servicer, server): rpc_method_handlers = { 'StreamParameters': grpc.unary_stream_rpc_method_handler( servicer.StreamParameters, - request_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, - response_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Parameters.SerializeToString, + request_deserializer=lerobot_dot_transport_dot_services__pb2.Empty.FromString, + response_serializer=lerobot_dot_transport_dot_services__pb2.Parameters.SerializeToString, ), 'SendTransitions': grpc.stream_unary_rpc_method_handler( servicer.SendTransitions, - request_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Transition.FromString, - response_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + request_deserializer=lerobot_dot_transport_dot_services__pb2.Transition.FromString, + response_serializer=lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, ), 'SendInteractions': grpc.stream_unary_rpc_method_handler( servicer.SendInteractions, - request_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.InteractionMessage.FromString, - response_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + request_deserializer=lerobot_dot_transport_dot_services__pb2.InteractionMessage.FromString, + response_serializer=lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, ), 'Ready': grpc.unary_unary_rpc_method_handler( servicer.Ready, - request_deserializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, - response_serializer=src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + request_deserializer=lerobot_dot_transport_dot_services__pb2.Empty.FromString, + response_serializer=lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -139,8 +139,8 @@ class LearnerService: request, target, '/transport.LearnerService/StreamParameters', - src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, - src_dot_lerobot_dot_transport_dot_services__pb2.Parameters.FromString, + lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + lerobot_dot_transport_dot_services__pb2.Parameters.FromString, options, channel_credentials, insecure, @@ -166,8 +166,8 @@ class LearnerService: request_iterator, target, '/transport.LearnerService/SendTransitions', - src_dot_lerobot_dot_transport_dot_services__pb2.Transition.SerializeToString, - src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + lerobot_dot_transport_dot_services__pb2.Transition.SerializeToString, + lerobot_dot_transport_dot_services__pb2.Empty.FromString, options, channel_credentials, insecure, @@ -193,8 +193,8 @@ class LearnerService: request_iterator, target, '/transport.LearnerService/SendInteractions', - src_dot_lerobot_dot_transport_dot_services__pb2.InteractionMessage.SerializeToString, - src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + lerobot_dot_transport_dot_services__pb2.InteractionMessage.SerializeToString, + lerobot_dot_transport_dot_services__pb2.Empty.FromString, options, channel_credentials, insecure, @@ -220,8 +220,8 @@ class LearnerService: request, target, '/transport.LearnerService/Ready', - src_dot_lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, - src_dot_lerobot_dot_transport_dot_services__pb2.Empty.FromString, + lerobot_dot_transport_dot_services__pb2.Empty.SerializeToString, + lerobot_dot_transport_dot_services__pb2.Empty.FromString, options, channel_credentials, insecure, From 30c161006d606700769fe8401b4f1bac3809ce9c Mon Sep 17 00:00:00 2001 From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Date: Thu, 10 Jul 2025 10:39:11 +0200 Subject: [PATCH 03/20] Add Async Inference (#1196) Co-authored-by: Steven Palma Co-authored-by: Michel Aractingi --- docs/source/_toctree.yml | 2 + docs/source/async.mdx | 272 ++++++++++ pyproject.toml | 3 +- src/lerobot/scripts/server/configs.py | 197 +++++++ src/lerobot/scripts/server/constants.py | 29 + src/lerobot/scripts/server/helpers.py | 386 +++++++++++++ src/lerobot/scripts/server/policy_server.py | 403 ++++++++++++++ src/lerobot/scripts/server/robot_client.py | 509 ++++++++++++++++++ src/lerobot/transport/async_inference.proto | 59 ++ src/lerobot/transport/async_inference_pb2.py | 45 ++ .../transport/async_inference_pb2_grpc.py | 277 ++++++++++ tests/async_inference/test_e2e.py | 177 ++++++ tests/async_inference/test_helpers.py | 459 ++++++++++++++++ tests/async_inference/test_policy_server.py | 215 ++++++++ tests/async_inference/test_robot_client.py | 234 ++++++++ 15 files changed, 3266 insertions(+), 1 deletion(-) create mode 100644 docs/source/async.mdx create mode 100644 src/lerobot/scripts/server/configs.py create mode 100644 src/lerobot/scripts/server/constants.py create mode 100644 src/lerobot/scripts/server/helpers.py create mode 100644 src/lerobot/scripts/server/policy_server.py create mode 100644 src/lerobot/scripts/server/robot_client.py create mode 100644 src/lerobot/transport/async_inference.proto create mode 100644 src/lerobot/transport/async_inference_pb2.py create mode 100644 src/lerobot/transport/async_inference_pb2_grpc.py create mode 100644 tests/async_inference/test_e2e.py create mode 100644 tests/async_inference/test_helpers.py create mode 100644 tests/async_inference/test_policy_server.py create mode 100644 tests/async_inference/test_robot_client.py diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 83777a3c8..1af96d79d 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -17,6 +17,8 @@ title: Train a Robot with RL - local: hilserl_sim title: Train RL in Simulation + - local: async + title: Use Async Inference title: "Tutorials" - sections: - local: smolvla diff --git a/docs/source/async.mdx b/docs/source/async.mdx new file mode 100644 index 000000000..0a0823cf0 --- /dev/null +++ b/docs/source/async.mdx @@ -0,0 +1,272 @@ +# Asynchronous Inference + +With our [SmolVLA](https://huggingface.co/papers/2506.01844) we introduced a new way to run inference on real-world robots, **decoupling action prediction from action execution**. +In this tutorial, we'll show how to use asynchronous inference (_async inference_) using a finetuned version of SmolVLA, and all the policies supported by LeRobot. +**Try async inference with all the policies** supported by LeRobot! + +**What you'll learn:** +1. Why asynchronous inference matters and how it compares to, more traditional, sequential inference. +2. How to spin-up a `PolicyServer` and connect a `RobotClient` from the same machine, and even over the network. +3. How to tune key parameters (`actions_per_chunk`, `chunk_size_threshold`) for your robot and policy. + +If you get stuck, hop into our [Discord community](https://discord.gg/s3KuuzsPFb)! + + +In a nutshell: with *async inference*, your robot keeps acting while the policy server is already busy computing the next chunk of actions---eliminating "wait-for-inference" lags and unlocking smoother, more reactive behaviours. +This is fundamentally different from synchronous inference (sync), where the robot stays idle while the policy computes the next chunk of actions. + +--- +## Getting started with async inference + +You can read more information on asynchronous inference in our [blogpost](NOTE:blogpost). Here, we report a getting started guide meant to help you setup and run asynchronous inference in your setup. + +First, install `lerobot` with the `async` tag, to install the extra dependencies required to run async inference. + +```shell +pip install -e ".[async]" +``` + +Then, spin up a policy server (in one terminal, or in a separate machine) specifying the host address and port for the client to connect to. +You can spin up a policy server running: + +```shell +python src/lerobot/scripts/server/policy_server.py \ + --host=127.0.0.1 \ + --port=8080 \ +``` + +This will start a policy server listening on `127.0.0.1:8080` (`localhost`, port 8080). At this stage, the policy server is empty, as all information related to which policy to run and with which parameters are specified during the first handshake with the client. Spin up a client with: + +```shell +python src/lerobot/scripts/server/robot_client.py \ + --server_address=127.0.0.1:8080 \ # SERVER: the host address and port of the policy server + --robot.type=so100_follower \ # ROBOT: your robot type + --robot.port=/dev/tty.usbmodem585A0076841 \ # ROBOT: your robot port + --robot.id=follower_so100 \ # ROBOT: your robot id, to load calibration file + --robot.cameras="{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \ # POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy + --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act` + --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc) + --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base) + --policy_device=mps \ # POLICY: the device to run the policy on, on the server + --actions_per_chunk=50 \ # POLICY: the number of actions to output at once + --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server + --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions + --debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime +``` +In summary, you need to specify instructions for: +- `SERVER`: the address and port of the policy server +- `ROBOT`: the type of robot to connect to, the port to connect to, and the local `id` of the robot +- `POLICY`: the type of policy to run, and the model name/path on server to the checkpoint to run. You also need to specify which device should the sever be using, and how many actions to output at once (capped at the policy max actions value). +- `CLIENT`: the threshold for the chunk size before sending a new observation to the server, and the function to aggregate actions on overlapping portions. Optionally, you can also visualize the queue size at runtime, to help you tune the `CLIENT` parameters. + +Importantly, +- `actions_per_chunk` and `chunk_size_threshold` are key parameters to tune for your setup. +- `aggregate_fn_name` is the function to aggregate actions on overlapping portions. You can either add a new one to a registry of functions, or add your own in `robot_client.py` (see [here](NOTE:addlinktoLOC)) +- `debug_visualize_queue_size` is a useful tool to tune the `CLIENT` parameters. + +Done! You should see your robot moving around by now 😉 +--- + +## Async vs. synchronous inference + +Synchronous inference relies on interleaving action chunk prediction and action execution. This inherently results in *idle frames*, frames where the robot awaits idle the policy's output: a new action chunk. +In turn, inference is plagued by evident real-time lags, where the robot simply stops acting due to the lack of available actions. +With robotics models increasing in size, this problem risks becoming only more severe. + +

+ +

+

Synchronous inference makes the robot idle while the policy is computing the next chunk of actions.

+ +To overcome this, we design async inference, a paradigm where action planning and execution are decoupled, resulting in (1) higher adaptability and, most importantly, (2) no idle frames. +Crucially, with async inference, the next action chunk is computed *before* the current one is exhausted, resulting in no idleness. +Higher adaptability is ensured by aggregating the different action chunks on overlapping portions, obtaining an up-to-date plan and a tighter control loop. + +

+ +

+

Asynchronous inference results in no idleness because the next chunk is computed before the current chunk is exhausted.

+ + +--- + +## Start the Policy Server + +Policy servers are wrappers around a `PreTrainedPolicy` interfacing them with observations coming from a robot client. +Policy servers are initialized as empty containers which are populated with the requested policy specified in the initial handshake between the robot client and the policy server. +As such, spinning up a policy server is as easy as specifying the host address and port. If you're running the policy server on the same machine as the robot client, you can use `localhost` as the host address. + + + +```bash +python -m lerobot.scripts.server.policy_server \ + --host="localhost" \ + --port=8080 +``` + + +```python +from lerobot.scripts.server.configs import PolicyServerConfig +from lerobot.scripts.server.policy_server import serve + +config = PolicyServerConfig( + host="localhost", + port=8080, +) +serve(config) +``` + + + +This listens on `localhost:8080` for an incoming connection from the associated`RobotClient`, which will communicate which policy to run during the first client-server handshake. + +--- + +## Launch the Robot Client + +`RobotClient` is a wrapper around a `Robot` instance, which `RobotClient` connects to the (possibly remote) `PolicyServer`. +The `RobotClient` streams observations to the `PolicyServer`, and receives action chunks obtained running inference on the server (which we assume to have better computational resources than the robot controller). + + + +```bash +python src/lerobot/scripts/server/robot_client.py \ + --server_address=127.0.0.1:8080 \ # SERVER: the host address and port of the policy server + --robot.type=so100_follower \ # ROBOT: your robot type + --robot.port=/dev/tty.usbmodem585A0076841 \ # ROBOT: your robot port + --robot.id=follower_so100 \ # ROBOT: your robot id, to load calibration file + --robot.cameras="{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \ # POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy + --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act` + --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc) + --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base) + --policy_device=mps \ # POLICY: the device to run the policy on, on the server + --actions_per_chunk=50 \ # POLICY: the number of actions to output at once + --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server + --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions + --debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime +``` + + +```python +import threading +from lerobot.robots.so100_follower import SO100FollowerConfig +from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.scripts.server.configs import RobotClientConfig +from lerobot.scripts.server.robot_client import RobotClient +from lerobot.scripts.server.helpers import visualize_action_queue_size + +# 1. Create the robot instance +"""Check out the cameras available in your setup by running `python lerobot/find_cameras.py`""" +# these cameras must match the ones expected by the policy +# check the config.json on the Hub for the policy you are using +camera_cfg = { + "top": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "side": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30) +} + +robot_cfg = SO100FollowerConfig( + port="/dev/tty.usbmodem585A0076841", + id="follower_so100", + cameras=camera_cfg +) + +# 3. Create client configuration +client_cfg = RobotClientConfig( + robot=robot_cfg, + server_address="localhost:8080", + policy_device="mps", + policy_type="smolvla", + pretrained_name_or_path="fracapuano/smolvla_async", + chunk_size_threshold=0.5, + actions_per_chunk=50, # make sure this is less than the max actions of the policy +) + +# 4. Create and start client +client = RobotClient(client_cfg) + +# 5. Specify the task +task = "Don't do anything, stay still" + +if client.start(): + # Start action receiver thread + action_receiver_thread = threading.Thread(target=client.receive_actions, daemon=True) + action_receiver_thread.start() + + try: + # Run the control loop + client.control_loop(task) + except KeyboardInterrupt: + client.stop() + action_receiver_thread.join() + # (Optionally) plot the action queue size + visualize_action_queue_size(client.action_queue_size) +``` + + + +The following two parameters are key in every setup: + + + + + + + + + + + + + + + + + + + + + +
HyperparameterDefaultWhat it does
actions_per_chunk50How many actions the policy outputs at once. Typical values: 10-50.
chunk_size_threshold0.7When the queue is ≤ 50% full, the client sends a fresh observation. Value in [0, 1].
+ + +Different values of `actions_per_chunk` and `chunk_size_threshold` do result in different behaviours. + + +On the one hand, increasing the value of `actions_per_chunk` will result in reducing the likelihood of ending up with no actions to execute, as more actions will be available when the new chunk is computed. +However, larger values of `actions_per_chunk` might also result in less precise actions, due to the compounding errors consequent to predicting actions over longer timespans. + +On the other hand, increasing the value of `chunk_size_threshold` will result in sending out to the `PolicyServer` observations for inference more often, resulting in a larger number of updates action chunks, overlapping on significant portions. This results in high adaptability, in the limit predicting one action chunk for each observation, which is in turn only marginally consumed while a new one is produced. +This option does also put more pressure on the inference pipeline, as a consequence of the many requests. Conversely, values of `chunk_size_threshold` close to 0.0 collapse to the synchronous edge case, whereby new observations are only sent out whenever the current chunk is exhausted. + +We found the default values of `actions_per_chunk` and `chunk_size_threshold` to work well in the experiments we developed for the [SmolVLA paper](https://huggingface.co/papers/2506.01844), but recommend experimenting with different values to find the best fit for your setup. + +### Tuning async inference for your setup + +1. **Choose your computational resources carefully.** [PI0](https://huggingface.co/lerobot/pi0) occupies 14GB of memory at inference time, while [SmolVLA](https://huggingface.co/lerobot/smolvla_base) requires only ~2GB. You should identify the best computational resource for your use case keeping in mind smaller policies require less computational resources. The combination of policy and device used (CPU-intensive, using MPS, or the number of CUDA cores on a given NVIDIA GPU) directly impacts the average inference latency you should expect. +2. **Adjust your `fps` based on inference latency.** While the server generates a new action chunk, the client is not idle and is stepping through its current action queue. If the two processes happen at fundamentally different speeds, the client might end up with an empty queue. As such, you should reduce your fps if you consistently run out of actions in queue. +3. **Adjust `chunk_size_threshold`**. + - Values closer to `0.0` result in almost sequential behavior. Values closer to `1.0` → send observation every step (more bandwidth, relies on good world-model). + - We found values around 0.5-0.6 to work well. If you want to tweak this, spin up a `RobotClient` setting the `--debug-visualize-queue-size` to `True`. This will plot the action queue size evolution at runtime, and you can use it to find the value of `chunk_size_threshold` that works best for your setup. + +

+ +

+

The action queue size is plotted at runtime when the `--debug-visualize-queue-size` flag is passed, for various levels of `chunk_size_threshold` (`g` in the SmolVLA paper).

+ + +--- + +## Conclusion + +Asynchronous inference represents a significant advancement in real-time robotics control, addressing the fundamental challenge of inference latency that has long plagued robotics applications. Through this tutorial, you've learned how to implement a complete async inference pipeline that eliminates idle frames and enables smoother, more reactive robot behaviors. + +**Key Takeaways:** + +- **Paradigm Shift**: Async inference decouples action prediction from execution, allowing robots to continue acting while new action chunks are computed in parallel +- **Performance Benefits**: Eliminates "wait-for-inference" lags that are inherent in synchronous approaches, becoming increasingly important as policy models grow larger +- **Flexible Architecture**: The server-client design enables distributed computing, where inference can run on powerful remote hardware while maintaining real-time robot control +- **Tunable Parameters**: Success depends on properly configuring `actions_per_chunk` and `chunk_size_threshold` for your specific hardware, policy, and task requirements +- **Universal Compatibility**: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA + +Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case. +If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/lerobot/lerobot/issues). diff --git a/pyproject.toml b/pyproject.toml index e13a9af01..81cb22a21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ classifiers = [ ] dependencies = [ "cmake>=3.29.0.1", - "datasets>=2.19.0", + "datasets>=2.19.0,<=3.6.0", "deepdiff>=7.0.1", "diffusers>=0.27.2", "draccus==0.10.0", @@ -105,6 +105,7 @@ hilserl = ["transformers>=4.50.3", "gym-hil>=0.1.9", "protobuf>=5.29.3", "grpcio umi = ["imagecodecs>=2024.1.1"] video_benchmark = ["scikit-image>=0.23.2", "pandas>=2.2.2"] xarm = ["gym-xarm>=0.1.1 ; python_version < '4.0'"] +async = ["grpcio==1.71.0", "matplotlib>=3.10.3"] [tool.poetry] requires-poetry = ">=2.1" diff --git a/src/lerobot/scripts/server/configs.py b/src/lerobot/scripts/server/configs.py new file mode 100644 index 000000000..7058842ae --- /dev/null +++ b/src/lerobot/scripts/server/configs.py @@ -0,0 +1,197 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass, field +from typing import Callable + +import torch + +from lerobot.robots.config import RobotConfig +from lerobot.scripts.server.constants import ( + DEFAULT_FPS, + DEFAULT_INFERENCE_LATENCY, + DEFAULT_OBS_QUEUE_TIMEOUT, +) + +# Aggregate function registry for CLI usage +AGGREGATE_FUNCTIONS = { + "weighted_average": lambda old, new: 0.3 * old + 0.7 * new, + "latest_only": lambda old, new: new, + "average": lambda old, new: 0.5 * old + 0.5 * new, + "conservative": lambda old, new: 0.7 * old + 0.3 * new, +} + + +def get_aggregate_function(name: str) -> Callable[[torch.Tensor, torch.Tensor], torch.Tensor]: + """Get aggregate function by name from registry.""" + if name not in AGGREGATE_FUNCTIONS: + available = list(AGGREGATE_FUNCTIONS.keys()) + raise ValueError(f"Unknown aggregate function '{name}'. Available: {available}") + return AGGREGATE_FUNCTIONS[name] + + +@dataclass +class PolicyServerConfig: + """Configuration for PolicyServer. + + This class defines all configurable parameters for the PolicyServer, + including networking settings and action chunking specifications. + """ + + # Networking configuration + host: str = field(default="localhost", metadata={"help": "Host address to bind the server to"}) + port: int = field(default=8080, metadata={"help": "Port number to bind the server to"}) + + # Timing configuration + fps: int = field(default=DEFAULT_FPS, metadata={"help": "Frames per second"}) + inference_latency: float = field( + default=DEFAULT_INFERENCE_LATENCY, metadata={"help": "Target inference latency in seconds"} + ) + + obs_queue_timeout: float = field( + default=DEFAULT_OBS_QUEUE_TIMEOUT, metadata={"help": "Timeout for observation queue in seconds"} + ) + + def __post_init__(self): + """Validate configuration after initialization.""" + if self.port < 1 or self.port > 65535: + raise ValueError(f"Port must be between 1 and 65535, got {self.port}") + + if self.environment_dt <= 0: + raise ValueError(f"environment_dt must be positive, got {self.environment_dt}") + + if self.inference_latency < 0: + raise ValueError(f"inference_latency must be non-negative, got {self.inference_latency}") + + if self.obs_queue_timeout < 0: + raise ValueError(f"obs_queue_timeout must be non-negative, got {self.obs_queue_timeout}") + + @classmethod + def from_dict(cls, config_dict: dict) -> "PolicyServerConfig": + """Create a PolicyServerConfig from a dictionary.""" + return cls(**config_dict) + + @property + def environment_dt(self) -> float: + """Environment time step, in seconds""" + return 1 / self.fps + + def to_dict(self) -> dict: + """Convert the configuration to a dictionary.""" + return { + "host": self.host, + "port": self.port, + "fps": self.fps, + "environment_dt": self.environment_dt, + "inference_latency": self.inference_latency, + } + + +@dataclass +class RobotClientConfig: + """Configuration for RobotClient. + + This class defines all configurable parameters for the RobotClient, + including network connection, policy settings, and control behavior. + """ + + # Policy configuration + policy_type: str = field(metadata={"help": "Type of policy to use"}) + pretrained_name_or_path: str = field(metadata={"help": "Pretrained model name or path"}) + + # Robot configuration (for CLI usage - robot instance will be created from this) + robot: RobotConfig = field(metadata={"help": "Robot configuration"}) + + # Policies typically output K actions at max, but we can use less to avoid wasting bandwidth (as actions + # would be aggregated on the client side anyway, depending on the value of `chunk_size_threshold`) + actions_per_chunk: int = field(metadata={"help": "Number of actions per chunk"}) + + # Task instruction for the robot to execute (e.g., 'fold my tshirt') + task: str = field(default="", metadata={"help": "Task instruction for the robot to execute"}) + + # Network configuration + server_address: str = field(default="localhost:8080", metadata={"help": "Server address to connect to"}) + + # Device configuration + policy_device: str = field(default="cpu", metadata={"help": "Device for policy inference"}) + + # Control behavior configuration + chunk_size_threshold: float = field(default=0.5, metadata={"help": "Threshold for chunk size control"}) + fps: int = field(default=DEFAULT_FPS, metadata={"help": "Frames per second"}) + + # Aggregate function configuration (CLI-compatible) + aggregate_fn_name: str = field( + default="weighted_average", + metadata={"help": f"Name of aggregate function to use. Options: {list(AGGREGATE_FUNCTIONS.keys())}"}, + ) + + # Debug configuration + debug_visualize_queue_size: bool = field( + default=False, metadata={"help": "Visualize the action queue size"} + ) + + # Verification configuration + verify_robot_cameras: bool = field( + default=True, metadata={"help": "Verify that the robot cameras match the policy cameras"} + ) + + @property + def environment_dt(self) -> float: + """Environment time step, in seconds""" + return 1 / self.fps + + def __post_init__(self): + """Validate configuration after initialization.""" + if not self.server_address: + raise ValueError("server_address cannot be empty") + + if not self.policy_type: + raise ValueError("policy_type cannot be empty") + + if not self.pretrained_name_or_path: + raise ValueError("pretrained_name_or_path cannot be empty") + + if not self.policy_device: + raise ValueError("policy_device cannot be empty") + + if self.chunk_size_threshold < 0 or self.chunk_size_threshold > 1: + raise ValueError(f"chunk_size_threshold must be between 0 and 1, got {self.chunk_size_threshold}") + + if self.fps <= 0: + raise ValueError(f"fps must be positive, got {self.fps}") + + if self.actions_per_chunk <= 0: + raise ValueError(f"actions_per_chunk must be positive, got {self.actions_per_chunk}") + + self.aggregate_fn = get_aggregate_function(self.aggregate_fn_name) + + @classmethod + def from_dict(cls, config_dict: dict) -> "RobotClientConfig": + """Create a RobotClientConfig from a dictionary.""" + return cls(**config_dict) + + def to_dict(self) -> dict: + """Convert the configuration to a dictionary.""" + return { + "server_address": self.server_address, + "policy_type": self.policy_type, + "pretrained_name_or_path": self.pretrained_name_or_path, + "policy_device": self.policy_device, + "chunk_size_threshold": self.chunk_size_threshold, + "fps": self.fps, + "actions_per_chunk": self.actions_per_chunk, + "task": self.task, + "debug_visualize_queue_size": self.debug_visualize_queue_size, + "aggregate_fn_name": self.aggregate_fn_name, + } diff --git a/src/lerobot/scripts/server/constants.py b/src/lerobot/scripts/server/constants.py new file mode 100644 index 000000000..af983a800 --- /dev/null +++ b/src/lerobot/scripts/server/constants.py @@ -0,0 +1,29 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Client side: The environment evolves with a time resolution equal to 1/fps""" + +DEFAULT_FPS = 30 + +"""Server side: Running inference on (at most) 1/fps""" +DEFAULT_INFERENCE_LATENCY = 1 / DEFAULT_FPS + +"""Server side: Timeout for observation queue in seconds""" +DEFAULT_OBS_QUEUE_TIMEOUT = 2 + +# All action chunking policies +SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "pi0", "tdmpc", "vqbet"] + +# TODO: Add all other robots +SUPPORTED_ROBOTS = ["so100_follower", "so101_follower"] diff --git a/src/lerobot/scripts/server/helpers.py b/src/lerobot/scripts/server/helpers.py new file mode 100644 index 000000000..7fd56e693 --- /dev/null +++ b/src/lerobot/scripts/server/helpers.py @@ -0,0 +1,386 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import logging +import logging.handlers +import os +import time +from dataclasses import dataclass +from pathlib import Path +from threading import Event +from typing import Any + +import torch + +from lerobot.configs.types import PolicyFeature +from lerobot.constants import OBS_IMAGES, OBS_STATE +from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features + +# NOTE: Configs need to be loaded for the client to be able to instantiate the policy config +from lerobot.policies import ACTConfig, DiffusionConfig, PI0Config, SmolVLAConfig, VQBeTConfig # noqa: F401 +from lerobot.robots.robot import Robot +from lerobot.transport import async_inference_pb2 +from lerobot.transport.utils import bytes_buffer_size +from lerobot.utils.utils import init_logging + +Action = torch.Tensor +ActionChunk = torch.Tensor + +# observation as received from the robot +RawObservation = dict[str, torch.Tensor] + +# observation as those recorded in LeRobot dataset (keys are different) +LeRobotObservation = dict[str, torch.Tensor] + +# observation, ready for policy inference (image keys resized) +Observation = dict[str, torch.Tensor] + + +def visualize_action_queue_size(action_queue_size: list[int]) -> None: + import matplotlib.pyplot as plt + + fig, ax = plt.subplots() + ax.set_title("Action Queue Size Over Time") + ax.set_xlabel("Environment steps") + ax.set_ylabel("Action Queue Size") + ax.set_ylim(0, max(action_queue_size) * 1.1) + ax.grid(True, alpha=0.3) + ax.plot(range(len(action_queue_size)), action_queue_size) + plt.show() + + +def validate_robot_cameras_for_policy( + lerobot_observation_features: dict[str, dict], policy_image_features: dict[str, PolicyFeature] +) -> None: + image_keys = list(filter(is_image_key, lerobot_observation_features)) + assert set(image_keys) == set(policy_image_features.keys()), ( + f"Policy image features must match robot cameras! Received {list(policy_image_features.keys())} != {image_keys}" + ) + + +def map_robot_keys_to_lerobot_features(robot: Robot) -> dict[str, dict]: + return hw_to_dataset_features(robot.observation_features, "observation", use_video=False) + + +def is_image_key(k: str) -> bool: + return k.startswith(OBS_IMAGES) + + +def resize_robot_observation_image(image: torch.tensor, resize_dims: tuple[int, int, int]) -> torch.tensor: + assert image.ndim == 3, f"Image must be (C, H, W)! Received {image.shape}" + # (H, W, C) -> (C, H, W) for resizing from robot obsevation resolution to policy image resolution + image = image.permute(2, 0, 1) + dims = (resize_dims[1], resize_dims[2]) + # Add batch dimension for interpolate: (C, H, W) -> (1, C, H, W) + image_batched = image.unsqueeze(0) + # Interpolate and remove batch dimension: (1, C, H, W) -> (C, H, W) + resized = torch.nn.functional.interpolate(image_batched, size=dims, mode="bilinear", align_corners=False) + + return resized.squeeze(0) + + +def raw_observation_to_observation( + raw_observation: RawObservation, + lerobot_features: dict[str, dict], + policy_image_features: dict[str, PolicyFeature], + device: str, +) -> Observation: + observation = {} + + observation = prepare_raw_observation(raw_observation, lerobot_features, policy_image_features) + for k, v in observation.items(): + if isinstance(v, torch.Tensor): # VLAs present natural-language instructions in observations + if "image" in k: + # Policy expects images in shape (B, C, H, W) + observation[k] = prepare_image(v).unsqueeze(0).to(device) + else: + observation[k] = v.to(device) + else: + observation[k] = v + + return observation + + +def prepare_image(image: torch.Tensor) -> torch.Tensor: + """Minimal preprocessing to turn int8 images to float32 in [0, 1], and create a memory-contiguous tensor""" + image = image.type(torch.float32) / 255 + image = image.contiguous() + + return image + + +def extract_state_from_raw_observation( + lerobot_obs: RawObservation, +) -> torch.Tensor: + """Extract the state from a raw observation.""" + state = torch.tensor(lerobot_obs[OBS_STATE]) + + if state.ndim == 1: + state = state.unsqueeze(0) + + return state + + +def extract_images_from_raw_observation( + lerobot_obs: RawObservation, + camera_key: str, +) -> dict[str, torch.Tensor]: + """Extract the images from a raw observation.""" + return torch.tensor(lerobot_obs[camera_key]) + + +def make_lerobot_observation( + robot_obs: RawObservation, + lerobot_features: dict[str, dict], +) -> LeRobotObservation: + """Make a lerobot observation from a raw observation.""" + return build_dataset_frame(lerobot_features, robot_obs, prefix="observation") + + +def prepare_raw_observation( + robot_obs: RawObservation, + lerobot_features: dict[str, dict], + policy_image_features: dict[str, PolicyFeature], +) -> Observation: + """Matches keys from the raw robot_obs dict to the keys expected by a given policy (passed as + policy_image_features).""" + # 1. {motor.pos1:value1, motor.pos2:value2, ..., laptop:np.ndarray} -> + # -> {observation.state:[value1,value2,...], observation.images.laptop:np.ndarray} + lerobot_obs = make_lerobot_observation(robot_obs, lerobot_features) + + # 2. Greps all observation.images.<> keys + image_keys = list(filter(is_image_key, lerobot_obs)) + # state's shape is expected as (B, state_dim) + state_dict = {OBS_STATE: extract_state_from_raw_observation(lerobot_obs)} + image_dict = { + image_k: extract_images_from_raw_observation(lerobot_obs, image_k) for image_k in image_keys + } + + # Turns the image features to (C, H, W) with H, W matching the policy image features. + # This reduces the resolution of the images + image_dict = { + key: resize_robot_observation_image(torch.tensor(lerobot_obs[key]), policy_image_features[key].shape) + for key in image_keys + } + + if "task" in robot_obs: + state_dict["task"] = robot_obs["task"] + + return {**state_dict, **image_dict} + + +def get_logger(name: str, log_to_file: bool = True) -> logging.Logger: + """ + Get a logger using the standardized logging setup from utils.py. + + Args: + name: Logger name (e.g., 'policy_server', 'robot_client') + log_to_file: Whether to also log to a file + + Returns: + Configured logger instance + """ + # Create logs directory if logging to file + if log_to_file: + os.makedirs("logs", exist_ok=True) + log_file = Path(f"logs/{name}_{int(time.time())}.log") + else: + log_file = None + + # Initialize the standardized logging + init_logging(log_file=log_file, display_pid=False) + + # Return a named logger + return logging.getLogger(name) + + +@dataclass +class TimedData: + """A data object with timestamp and timestep information. + + Args: + timestamp: Unix timestamp relative to data's creation. + data: The actual data to wrap a timestamp around. + timestep: The timestep of the data. + """ + + timestamp: float + timestep: int + + def get_timestamp(self): + return self.timestamp + + def get_timestep(self): + return self.timestep + + +@dataclass +class TimedAction(TimedData): + action: Action + + def get_action(self): + return self.action + + +@dataclass +class TimedObservation(TimedData): + observation: RawObservation + must_go: bool = False + + def get_observation(self): + return self.observation + + +@dataclass +class FPSTracker: + """Utility class to track FPS metrics over time.""" + + target_fps: float + first_timestamp: float = None + total_obs_count: int = 0 + + def calculate_fps_metrics(self, current_timestamp: float) -> dict[str, float]: + """Calculate average FPS vs target""" + self.total_obs_count += 1 + + # Initialize first observation time + if self.first_timestamp is None: + self.first_timestamp = current_timestamp + + # Calculate overall average FPS (since start) + total_duration = current_timestamp - self.first_timestamp + avg_fps = (self.total_obs_count - 1) / total_duration if total_duration > 1e-6 else 0.0 + + return {"avg_fps": avg_fps, "target_fps": self.target_fps} + + def reset(self): + """Reset the FPS tracker state""" + self.first_timestamp = None + self.total_obs_count = 0 + + +@dataclass +class RemotePolicyConfig: + policy_type: str + pretrained_name_or_path: str + lerobot_features: dict[str, PolicyFeature] + actions_per_chunk: int + device: str = "cpu" + + +def _compare_observation_states(obs1_state: torch.Tensor, obs2_state: torch.Tensor, atol: float) -> bool: + """Check if two observation states are similar, under a tolerance threshold""" + return bool(torch.linalg.norm(obs1_state - obs2_state) < atol) + + +def observations_similar( + obs1: TimedObservation, obs2: TimedObservation, lerobot_features: dict[str, dict], atol: float = 1 +) -> bool: + """Check if two observations are similar, under a tolerance threshold. Measures distance between + observations as the difference in joint-space between the two observations. + + NOTE(fracapuano): This is a very simple check, and it is enough for the current use case. + An immediate next step is to use (fast) perceptual difference metrics comparing some camera views, + to surpass this joint-space similarity check. + """ + obs1_state = extract_state_from_raw_observation( + make_lerobot_observation(obs1.get_observation(), lerobot_features) + ) + obs2_state = extract_state_from_raw_observation( + make_lerobot_observation(obs2.get_observation(), lerobot_features) + ) + + return _compare_observation_states(obs1_state, obs2_state, atol=atol) + + +def send_bytes_in_chunks( + buffer: bytes, + message_class: Any, + log_prefix: str = "", + silent: bool = True, + chunk_size: int = 3 * 1024 * 1024, +): + # NOTE(fracapuano): Partially copied from lerobot.common.transport.utils.send_bytes_in_chunks. Duplication can't be avoided if we + # don't use a unique class for messages sent (due to the different transfer states sent). Also, I'd want more control over the + # chunk size as I am using it to send image observations. + buffer = io.BytesIO(buffer) + size_in_bytes = bytes_buffer_size(buffer) + + sent_bytes = 0 + + logging_method = logging.info if not silent else logging.debug + + logging_method(f"{log_prefix} Buffer size {size_in_bytes / 1024 / 1024} MB with") + + while sent_bytes < size_in_bytes: + transfer_state = async_inference_pb2.TransferState.TRANSFER_MIDDLE + + if sent_bytes + chunk_size >= size_in_bytes: + transfer_state = async_inference_pb2.TransferState.TRANSFER_END + elif sent_bytes == 0: + transfer_state = async_inference_pb2.TransferState.TRANSFER_BEGIN + + size_to_read = min(chunk_size, size_in_bytes - sent_bytes) + chunk = buffer.read(size_to_read) + + yield message_class(transfer_state=transfer_state, data=chunk) + sent_bytes += size_to_read + logging_method(f"{log_prefix} Sent {sent_bytes}/{size_in_bytes} bytes with state {transfer_state}") + + logging_method(f"{log_prefix} Published {sent_bytes / 1024 / 1024} MB") + + +def receive_bytes_in_chunks( + iterator, continue_receiving: Event, logger: logging.Logger, log_prefix: str = "" +): # type: ignore + # NOTE(fracapuano): Partially copied from lerobot.common.transport.utils.receive_bytes_in_chunks. Duplication can't be avoided if we + # don't use a unique class for messages sent (due to the different transfer states sent). Also, on the server side the logic for receiving + # is opposite then the HIL-SERL design (my event showcases keeping on running instead of shutdown) + bytes_buffer = io.BytesIO() + step = 0 + + logger.info(f"{log_prefix} Starting receiver") + for item in iterator: + logger.debug(f"{log_prefix} Received item") + if not continue_receiving.is_set(): + logger.info(f"{log_prefix} Shutting down receiver") + return + + if item.transfer_state == async_inference_pb2.TransferState.TRANSFER_BEGIN: + bytes_buffer.seek(0) + bytes_buffer.truncate(0) + bytes_buffer.write(item.data) + logger.debug(f"{log_prefix} Received data at step 0") + + elif item.transfer_state == async_inference_pb2.TransferState.TRANSFER_MIDDLE: + bytes_buffer.write(item.data) + step += 1 + logger.debug(f"{log_prefix} Received data at step {step}") + + elif item.transfer_state == async_inference_pb2.TransferState.TRANSFER_END: + bytes_buffer.write(item.data) + logger.debug(f"{log_prefix} Received data at step end size {bytes_buffer_size(bytes_buffer)}") + + complete_bytes = bytes_buffer.getvalue() + + bytes_buffer.seek(0) + bytes_buffer.truncate(0) + + logger.debug(f"{log_prefix} Queue updated") + return complete_bytes + + else: + logger.warning(f"{log_prefix} Received unknown transfer state {item.transfer_state}") + raise ValueError(f"Received unknown transfer state {item.transfer_state}") diff --git a/src/lerobot/scripts/server/policy_server.py b/src/lerobot/scripts/server/policy_server.py new file mode 100644 index 000000000..669ccc58e --- /dev/null +++ b/src/lerobot/scripts/server/policy_server.py @@ -0,0 +1,403 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example: +```shell +python src/lerobot/scripts/server/policy_server.py \ + --host=127.0.0.1 \ + --port=8080 \ + --fps=30 \ + --inference_latency=0.033 \ + --obs_queue_timeout=1 +``` +""" + +import logging +import pickle # nosec +import threading +import time +from concurrent import futures +from dataclasses import asdict +from pprint import pformat +from queue import Empty, Queue + +import draccus +import grpc +import torch + +from lerobot.policies.factory import get_policy_class +from lerobot.scripts.server.configs import PolicyServerConfig +from lerobot.scripts.server.constants import SUPPORTED_POLICIES +from lerobot.scripts.server.helpers import ( + FPSTracker, + Observation, + RemotePolicyConfig, + TimedAction, + TimedObservation, + get_logger, + observations_similar, + raw_observation_to_observation, + receive_bytes_in_chunks, +) +from lerobot.transport import ( + async_inference_pb2, # type: ignore + async_inference_pb2_grpc, # type: ignore +) + + +class PolicyServer(async_inference_pb2_grpc.AsyncInferenceServicer): + prefix = "policy_server" + logger = get_logger(prefix) + + def __init__(self, config: PolicyServerConfig): + self.config = config + self._running_event = threading.Event() + + # FPS measurement + self.fps_tracker = FPSTracker(target_fps=config.fps) + + self.observation_queue = Queue(maxsize=1) + + self._predicted_timesteps_lock = threading.Lock() + self._predicted_timesteps = set() + + self.last_processed_obs = None + + # Attributes will be set by SendPolicyInstructions + self.device = None + self.policy_type = None + self.lerobot_features = None + self.actions_per_chunk = None + self.policy = None + + @property + def running(self): + return self._running_event.is_set() + + @property + def policy_image_features(self): + return self.policy.config.image_features + + def _reset_server(self) -> None: + """Flushes server state when new client connects.""" + # only running inference on the latest observation received by the server + self._running_event.clear() + self.observation_queue = Queue(maxsize=1) + + with self._predicted_timesteps_lock: + self._predicted_timesteps = set() + + def Ready(self, request, context): # noqa: N802 + client_id = context.peer() + self.logger.info(f"Client {client_id} connected and ready") + self._reset_server() + self._running_event.set() + + return async_inference_pb2.Empty() + + def SendPolicyInstructions(self, request, context): # noqa: N802 + """Receive policy instructions from the robot client""" + + if not self.running: + self.logger.warning("Server is not running. Ignoring policy instructions.") + return async_inference_pb2.Empty() + + client_id = context.peer() + + policy_specs = pickle.loads(request.data) # nosec + + if not isinstance(policy_specs, RemotePolicyConfig): + raise TypeError(f"Policy specs must be a RemotePolicyConfig. Got {type(policy_specs)}") + + if policy_specs.policy_type not in SUPPORTED_POLICIES: + raise ValueError( + f"Policy type {policy_specs.policy_type} not supported. " + f"Supported policies: {SUPPORTED_POLICIES}" + ) + + self.logger.info( + f"Receiving policy instructions from {client_id} | " + f"Policy type: {policy_specs.policy_type} | " + f"Pretrained name or path: {policy_specs.pretrained_name_or_path} | " + f"Actions per chunk: {policy_specs.actions_per_chunk} | " + f"Device: {policy_specs.device}" + ) + + self.device = policy_specs.device + self.policy_type = policy_specs.policy_type # act, pi0, etc. + self.lerobot_features = policy_specs.lerobot_features + self.actions_per_chunk = policy_specs.actions_per_chunk + + policy_class = get_policy_class(self.policy_type) + + start = time.perf_counter() + self.policy = policy_class.from_pretrained(policy_specs.pretrained_name_or_path) + self.policy.to(self.device) + end = time.perf_counter() + + self.logger.info(f"Time taken to put policy on {self.device}: {end - start:.4f} seconds") + + return async_inference_pb2.Empty() + + def SendObservations(self, request_iterator, context): # noqa: N802 + """Receive observations from the robot client""" + client_id = context.peer() + self.logger.debug(f"Receiving observations from {client_id}") + + receive_time = time.time() # comparing timestamps so need time.time() + start_deserialize = time.perf_counter() + received_bytes = receive_bytes_in_chunks( + request_iterator, self._running_event, self.logger + ) # blocking call while looping over request_iterator + timed_observation = pickle.loads(received_bytes) # nosec + deserialize_time = time.perf_counter() - start_deserialize + + self.logger.debug(f"Received observation #{timed_observation.get_timestep()}") + + obs_timestep = timed_observation.get_timestep() + obs_timestamp = timed_observation.get_timestamp() + + # Calculate FPS metrics + fps_metrics = self.fps_tracker.calculate_fps_metrics(obs_timestamp) + + self.logger.info( + f"Received observation #{obs_timestep} | " + f"Avg FPS: {fps_metrics['avg_fps']:.2f} | " # fps at which observations are received from client + f"Target: {fps_metrics['target_fps']:.2f} | " + f"One-way latency: {(receive_time - obs_timestamp) * 1000:.2f}ms" + ) + + self.logger.debug( + f"Server timestamp: {receive_time:.6f} | " + f"Client timestamp: {obs_timestamp:.6f} | " + f"Deserialization time: {deserialize_time:.6f}s" + ) + + if not self._enqueue_observation( + timed_observation # wrapping a RawObservation + ): + self.logger.info(f"Observation #{obs_timestep} has been filtered out") + + return async_inference_pb2.Empty() + + def GetActions(self, request, context): # noqa: N802 + """Returns actions to the robot client. Actions are sent as a single + chunk, containing multiple actions.""" + client_id = context.peer() + self.logger.debug(f"Client {client_id} connected for action streaming") + + # Generate action based on the most recent observation and its timestep + try: + getactions_starts = time.perf_counter() + obs = self.observation_queue.get(timeout=self.config.obs_queue_timeout) + self.logger.info( + f"Running inference for observation #{obs.get_timestep()} (must_go: {obs.must_go})" + ) + + with self._predicted_timesteps_lock: + self._predicted_timesteps.add(obs.get_timestep()) + + start_time = time.perf_counter() + action_chunk = self._predict_action_chunk(obs) + inference_time = time.perf_counter() - start_time + + start_time = time.perf_counter() + actions_bytes = pickle.dumps(action_chunk) # nosec + serialize_time = time.perf_counter() - start_time + + # Create and return the action chunk + actions = async_inference_pb2.Actions(data=actions_bytes) + + self.logger.info( + f"Action chunk #{obs.get_timestep()} generated | " + f"Total time: {(inference_time + serialize_time) * 1000:.2f}ms" + ) + + self.logger.debug( + f"Action chunk #{obs.get_timestep()} generated | " + f"Inference time: {inference_time:.2f}s |" + f"Serialize time: {serialize_time:.2f}s |" + f"Total time: {inference_time + serialize_time:.2f}s" + ) + + time.sleep( + max(0, self.config.inference_latency - max(0, time.perf_counter() - getactions_starts)) + ) # sleep controls inference latency + + return actions + + except Empty: # no observation added to queue in obs_queue_timeout + return async_inference_pb2.Empty() + + except Exception as e: + self.logger.error(f"Error in StreamActions: {e}") + + return async_inference_pb2.Empty() + + def _obs_sanity_checks(self, obs: TimedObservation, previous_obs: TimedObservation) -> bool: + """Check if the observation is valid to be processed by the policy""" + with self._predicted_timesteps_lock: + predicted_timesteps = self._predicted_timesteps + + if obs.get_timestep() in predicted_timesteps: + self.logger.debug(f"Skipping observation #{obs.get_timestep()} - Timestep predicted already!") + return False + + elif observations_similar(obs, previous_obs, lerobot_features=self.lerobot_features): + self.logger.debug( + f"Skipping observation #{obs.get_timestep()} - Observation too similar to last obs predicted!" + ) + return False + + else: + return True + + def _enqueue_observation(self, obs: TimedObservation) -> bool: + """Enqueue an observation if it must go through processing, otherwise skip it. + Observations not in queue are never run through the policy network""" + + if ( + obs.must_go + or self.last_processed_obs is None + or self._obs_sanity_checks(obs, self.last_processed_obs) + ): + last_obs = self.last_processed_obs.get_timestep() if self.last_processed_obs else "None" + self.logger.debug( + f"Enqueuing observation. Must go: {obs.must_go} | Last processed obs: {last_obs}" + ) + + # If queue is full, get the old observation to make room + if self.observation_queue.full(): + # pops from queue + _ = self.observation_queue.get_nowait() + self.logger.debug("Observation queue was full, removed oldest observation") + + # Now put the new observation (never blocks as queue is non-full here) + self.observation_queue.put(obs) + return True + + return False + + def _time_action_chunk(self, t_0: float, action_chunk: list[torch.Tensor], i_0: int) -> list[TimedAction]: + """Turn a chunk of actions into a list of TimedAction instances, + with the first action corresponding to t_0 and the rest corresponding to + t_0 + i*environment_dt for i in range(len(action_chunk)) + """ + return [ + TimedAction(timestamp=t_0 + i * self.config.environment_dt, timestep=i_0 + i, action=action) + for i, action in enumerate(action_chunk) + ] + + def _prepare_observation(self, observation_t: TimedObservation) -> Observation: + """ + Prepare observation, ready for policy inference. + E.g.: To keep observation sampling rate high (and network packet tiny) we send int8 [0,255] images from the + client and then convert them to float32 [0,1] images here, before running inference. + """ + # RawObservation from robot.get_observation() - wrong keys, wrong dtype, wrong image shape + observation: Observation = raw_observation_to_observation( + observation_t.get_observation(), + self.lerobot_features, + self.policy_image_features, + self.device, + ) + # processed Observation - right keys, right dtype, right image shape + + return observation + + def _get_action_chunk(self, observation: dict[str, torch.Tensor]) -> torch.Tensor: + """Get an action chunk from the policy. The chunk contains only""" + chunk = self.policy.predict_action_chunk(observation) + if chunk.ndim != 3: + chunk = chunk.unsqueeze(0) # adding batch dimension, now shape is (B, chunk_size, action_dim) + + return chunk[:, : self.actions_per_chunk, :] + torch.randn_like(chunk[:, : self.actions_per_chunk, :]) + + def _predict_action_chunk(self, observation_t: TimedObservation) -> list[TimedAction]: + """Predict an action chunk based on an observation""" + inference_starts = time.perf_counter() + + """1. Prepare observation""" + start_time = time.perf_counter() + observation = self._prepare_observation(observation_t) + preprocessing_time = time.perf_counter() - start_time + + self.last_processed_obs: TimedObservation = observation_t + + """2. Get action chunk""" + start_time = time.perf_counter() + action_tensor = self._get_action_chunk(observation) + inference_time = time.perf_counter() - start_time + + """3. Post-inference processing""" + start_time = time.perf_counter() + # Move to CPU before serializing + action_tensor = action_tensor.cpu().squeeze(0) + + action_chunk = self._time_action_chunk( + observation_t.get_timestamp(), list(action_tensor), observation_t.get_timestep() + ) + postprocessing_time = time.perf_counter() - start_time + inference_stops = time.perf_counter() + + self.logger.info( + f"Observation {observation_t.get_timestep()} |" + f"Inference time: {1000 * (inference_stops - inference_starts):.2f}ms" + ) + + # full-process latency breakdown for debugging purposes + self.logger.debug( + f"Observation {observation_t.get_timestep()} | " + f"Preprocessing time: {1000 * (preprocessing_time - inference_starts):.2f}ms | " + f"Inference time: {1000 * (inference_time - preprocessing_time):.2f}ms | " + f"Postprocessing time: {1000 * (postprocessing_time - inference_time):.2f}ms | " + f"Total time: {1000 * (postprocessing_time - inference_starts):.2f}ms" + ) + + return action_chunk + + def stop(self): + """Stop the server""" + self._reset_server() + self.logger.info("Server stopping...") + + +@draccus.wrap() +def serve(cfg: PolicyServerConfig): + """Start the PolicyServer with the given configuration. + + Args: + config: PolicyServerConfig instance. If None, uses default configuration. + """ + logging.info(pformat(asdict(cfg))) + + # Create the server instance first + policy_server = PolicyServer(cfg) + + # Setup and start gRPC server + server = grpc.server(futures.ThreadPoolExecutor(max_workers=4)) + async_inference_pb2_grpc.add_AsyncInferenceServicer_to_server(policy_server, server) + server.add_insecure_port(f"{cfg.host}:{cfg.port}") + + policy_server.logger.info(f"PolicyServer started on {cfg.host}:{cfg.port}") + server.start() + + server.wait_for_termination() + + policy_server.logger.info("Server terminated") + + +if __name__ == "__main__": + serve() diff --git a/src/lerobot/scripts/server/robot_client.py b/src/lerobot/scripts/server/robot_client.py new file mode 100644 index 000000000..a6d7b7242 --- /dev/null +++ b/src/lerobot/scripts/server/robot_client.py @@ -0,0 +1,509 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Example command: +```shell +python src/lerobot/scripts/server/robot_client.py \ + --robot.type=so100_follower \ + --robot.port=/dev/tty.usbmodem58760431541 \ + --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \ + --robot.id=black \ + --task="dummy" \ + --server_address=127.0.0.1:8080 \ + --policy_type=act \ + --pretrained_name_or_path=user/model \ + --policy_device=mps \ + --actions_per_chunk=50 \ + --chunk_size_threshold=0.5 \ + --aggregate_fn_name=weighted_average \ + --debug_visualize_queue_size=True +``` +""" + +import logging +import pickle # nosec +import threading +import time +from dataclasses import asdict +from pprint import pformat +from queue import Queue +from typing import Any, Callable, Optional + +import draccus +import grpc +import torch + +from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig # noqa: F401 +from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig # noqa: F401 +from lerobot.configs.policies import PreTrainedConfig +from lerobot.robots import ( # noqa: F401 + Robot, + RobotConfig, + koch_follower, + make_robot_from_config, + so100_follower, + so101_follower, +) +from lerobot.scripts.server.configs import RobotClientConfig +from lerobot.scripts.server.constants import SUPPORTED_ROBOTS +from lerobot.scripts.server.helpers import ( + Action, + FPSTracker, + Observation, + RawObservation, + RemotePolicyConfig, + TimedAction, + TimedObservation, + get_logger, + map_robot_keys_to_lerobot_features, + send_bytes_in_chunks, + validate_robot_cameras_for_policy, + visualize_action_queue_size, +) +from lerobot.transport import ( + async_inference_pb2, # type: ignore + async_inference_pb2_grpc, # type: ignore +) + + +class RobotClient: + prefix = "robot_client" + logger = get_logger(prefix) + + def __init__(self, config: RobotClientConfig): + """Initialize RobotClient with unified configuration. + + Args: + config: RobotClientConfig containing all configuration parameters + """ + # Store configuration + self.config = config + self.robot = make_robot_from_config(config.robot) + self.robot.connect() + + lerobot_features = map_robot_keys_to_lerobot_features(self.robot) + + if config.verify_robot_cameras: + # Load policy config for validation + policy_config = PreTrainedConfig.from_pretrained(config.pretrained_name_or_path) + policy_image_features = policy_config.image_features + + # The cameras specified for inference must match the one supported by the policy chosen + validate_robot_cameras_for_policy(lerobot_features, policy_image_features) + + # Use environment variable if server_address is not provided in config + self.server_address = config.server_address + + self.policy_config = RemotePolicyConfig( + config.policy_type, + config.pretrained_name_or_path, + lerobot_features, + config.actions_per_chunk, + config.policy_device, + ) + self.channel = grpc.insecure_channel(self.server_address) + self.stub = async_inference_pb2_grpc.AsyncInferenceStub(self.channel) + self.logger.info(f"Initializing client to connect to server at {self.server_address}") + + self._running_event = threading.Event() + + # Initialize client side variables + self.latest_action_lock = threading.Lock() + self.latest_action = -1 + self.action_chunk_size = -1 + + self._chunk_size_threshold = config.chunk_size_threshold + + self.action_queue = Queue() + self.action_queue_lock = threading.Lock() # Protect queue operations + self.action_queue_size = [] + self.start_barrier = threading.Barrier(2) # 2 threads: action receiver, control loop + + # FPS measurement + self.fps_tracker = FPSTracker(target_fps=self.config.fps) + + self.logger.info("Robot connected and ready") + + # Use an event for thread-safe coordination + self.must_go = threading.Event() + self.must_go.set() # Initially set - observations qualify for direct processing + + @property + def running(self): + return self._running_event.is_set() + + def start(self): + """Start the robot client and connect to the policy server""" + try: + # client-server handshake + start_time = time.perf_counter() + self.stub.Ready(async_inference_pb2.Empty()) + end_time = time.perf_counter() + self.logger.debug(f"Connected to policy server in {end_time - start_time:.4f}s") + + # send policy instructions + policy_config_bytes = pickle.dumps(self.policy_config) + policy_setup = async_inference_pb2.PolicySetup(data=policy_config_bytes) + + self.logger.info("Sending policy instructions to policy server") + self.logger.debug( + f"Policy type: {self.policy_config.policy_type} | " + f"Pretrained name or path: {self.policy_config.pretrained_name_or_path} | " + f"Device: {self.policy_config.device}" + ) + + self.stub.SendPolicyInstructions(policy_setup) + + self._running_event.set() + + return True + + except grpc.RpcError as e: + self.logger.error(f"Failed to connect to policy server: {e}") + return False + + def stop(self): + """Stop the robot client""" + self._running_event.clear() + + self.robot.disconnect() + self.logger.debug("Robot disconnected") + + self.channel.close() + self.logger.debug("Client stopped, channel closed") + + def send_observation( + self, + obs: TimedObservation, + ) -> bool: + """Send observation to the policy server. + Returns True if the observation was sent successfully, False otherwise.""" + if not self.running: + raise RuntimeError("Client not running. Run RobotClient.start() before sending observations.") + + if not isinstance(obs, TimedObservation): + raise ValueError("Input observation needs to be a TimedObservation!") + + start_time = time.perf_counter() + observation_bytes = pickle.dumps(obs) + serialize_time = time.perf_counter() - start_time + self.logger.debug(f"Observation serialization time: {serialize_time:.6f}s") + + try: + observation_iterator = send_bytes_in_chunks( + observation_bytes, + async_inference_pb2.Observation, + log_prefix="[CLIENT] Observation", + silent=True, + ) + _ = self.stub.SendObservations(observation_iterator) + obs_timestep = obs.get_timestep() + self.logger.info(f"Sent observation #{obs_timestep} | ") + + return True + + except grpc.RpcError as e: + self.logger.error(f"Error sending observation #{obs.get_timestep()}: {e}") + return False + + def _inspect_action_queue(self): + with self.action_queue_lock: + queue_size = self.action_queue.qsize() + timestamps = sorted([action.get_timestep() for action in self.action_queue.queue]) + self.logger.debug(f"Queue size: {queue_size}, Queue contents: {timestamps}") + return queue_size, timestamps + + def _aggregate_action_queues( + self, + incoming_actions: list[TimedAction], + aggregate_fn: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + ): + """Finds the same timestep actions in the queue and aggregates them using the aggregate_fn""" + if aggregate_fn is None: + # default aggregate function: take the latest action + def aggregate_fn(x1, x2): + return x2 + + future_action_queue = Queue() + with self.action_queue_lock: + internal_queue = self.action_queue.queue + + current_action_queue = {action.get_timestep(): action.get_action() for action in internal_queue} + + for new_action in incoming_actions: + with self.latest_action_lock: + latest_action = self.latest_action + + # New action is older than the latest action in the queue, skip it + if new_action.get_timestep() <= latest_action: + continue + + # If the new action's timestep is not in the current action queue, add it directly + elif new_action.get_timestep() not in current_action_queue: + future_action_queue.put(new_action) + continue + + # If the new action's timestep is in the current action queue, aggregate it + # TODO: There is probably a way to do this with broadcasting of the two action tensors + future_action_queue.put( + TimedAction( + timestamp=new_action.get_timestamp(), + timestep=new_action.get_timestep(), + action=aggregate_fn( + current_action_queue[new_action.get_timestep()], new_action.get_action() + ), + ) + ) + + with self.action_queue_lock: + self.action_queue = future_action_queue + + def receive_actions(self, verbose: bool = False): + """Receive actions from the policy server""" + # Wait at barrier for synchronized start + self.start_barrier.wait() + self.logger.info("Action receiving thread starting") + + while self.running: + try: + # Use StreamActions to get a stream of actions from the server + actions_chunk = self.stub.GetActions(async_inference_pb2.Empty()) + if len(actions_chunk.data) == 0: + continue # received `Empty` from server, wait for next call + + receive_time = time.time() + + # Deserialize bytes back into list[TimedAction] + deserialize_start = time.perf_counter() + timed_actions = pickle.loads(actions_chunk.data) # nosec + deserialize_time = time.perf_counter() - deserialize_start + + self.action_chunk_size = max(self.action_chunk_size, len(timed_actions)) + + # Calculate network latency if we have matching observations + if len(timed_actions) > 0 and verbose: + with self.latest_action_lock: + latest_action = self.latest_action + + self.logger.debug(f"Current latest action: {latest_action}") + + # Get queue state before changes + old_size, old_timesteps = self._inspect_action_queue() + if not old_timesteps: + old_timesteps = [latest_action] # queue was empty + + # Get queue state before changes + old_size, old_timesteps = self._inspect_action_queue() + if not old_timesteps: + old_timesteps = [latest_action] # queue was empty + + # Log incoming actions + incoming_timesteps = [a.get_timestep() for a in timed_actions] + + first_action_timestep = timed_actions[0].get_timestep() + server_to_client_latency = (receive_time - timed_actions[0].get_timestamp()) * 1000 + + self.logger.info( + f"Received action chunk for step #{first_action_timestep} | " + f"Latest action: #{latest_action} | " + f"Incoming actions: {incoming_timesteps[0]}:{incoming_timesteps[-1]} | " + f"Network latency (server->client): {server_to_client_latency:.2f}ms | " + f"Deserialization time: {deserialize_time * 1000:.2f}ms" + ) + + # Update action queue + start_time = time.perf_counter() + self._aggregate_action_queues(timed_actions, self.config.aggregate_fn) + queue_update_time = time.perf_counter() - start_time + + self.must_go.set() # after receiving actions, next empty queue triggers must-go processing! + + if verbose: + # Get queue state after changes + new_size, new_timesteps = self._inspect_action_queue() + + with self.latest_action_lock: + latest_action = self.latest_action + + self.logger.info( + f"Latest action: {latest_action} | " + f"Old action steps: {old_timesteps[0]}:{old_timesteps[-1]} | " + f"Incoming action steps: {incoming_timesteps[0]}:{incoming_timesteps[-1]} | " + f"Updated action steps: {new_timesteps[0]}:{new_timesteps[-1]}" + ) + self.logger.debug( + f"Queue update complete ({queue_update_time:.6f}s) | " + f"Before: {old_size} items | " + f"After: {new_size} items | " + ) + + except grpc.RpcError as e: + self.logger.error(f"Error receiving actions: {e}") + + def actions_available(self): + """Check if there are actions available in the queue""" + with self.action_queue_lock: + return not self.action_queue.empty() + + def _action_tensor_to_action_dict(self, action_tensor: torch.Tensor) -> dict[str, float]: + action = {key: action_tensor[i].item() for i, key in enumerate(self.robot.action_features)} + return action + + def control_loop_action(self, verbose: bool = False) -> dict[str, Any]: + """Reading and performing actions in local queue""" + + # Lock only for queue operations + get_start = time.perf_counter() + with self.action_queue_lock: + self.action_queue_size.append(self.action_queue.qsize()) + # Get action from queue + timed_action = self.action_queue.get_nowait() + get_end = time.perf_counter() - get_start + + _performed_action = self.robot.send_action( + self._action_tensor_to_action_dict(timed_action.get_action()) + ) + with self.latest_action_lock: + self.latest_action = timed_action.get_timestep() + + if verbose: + with self.action_queue_lock: + current_queue_size = self.action_queue.qsize() + + self.logger.debug( + f"Ts={timed_action.get_timestamp()} | " + f"Action #{timed_action.get_timestep()} performed | " + f"Queue size: {current_queue_size}" + ) + + self.logger.debug( + f"Popping action from queue to perform took {get_end:.6f}s | Queue size: {current_queue_size}" + ) + + return _performed_action + + def _ready_to_send_observation(self): + """Flags when the client is ready to send an observation""" + with self.action_queue_lock: + return self.action_queue.qsize() / self.action_chunk_size <= self._chunk_size_threshold + + def control_loop_observation(self, task: str, verbose: bool = False) -> RawObservation: + try: + # Get serialized observation bytes from the function + start_time = time.perf_counter() + + raw_observation: RawObservation = self.robot.get_observation() + raw_observation["task"] = task + + with self.latest_action_lock: + latest_action = self.latest_action + + observation = TimedObservation( + timestamp=time.time(), # need time.time() to compare timestamps across client and server + observation=raw_observation, + timestep=max(latest_action, 0), + ) + + obs_capture_time = time.perf_counter() - start_time + + # If there are no actions left in the queue, the observation must go through processing! + with self.action_queue_lock: + observation.must_go = self.must_go.is_set() and self.action_queue.empty() + current_queue_size = self.action_queue.qsize() + + _ = self.send_observation(observation) + + self.logger.debug(f"QUEUE SIZE: {current_queue_size} (Must go: {observation.must_go})") + if observation.must_go: + # must-go event will be set again after receiving actions + self.must_go.clear() + + if verbose: + # Calculate comprehensive FPS metrics + fps_metrics = self.fps_tracker.calculate_fps_metrics(observation.get_timestamp()) + + self.logger.info( + f"Obs #{observation.get_timestep()} | " + f"Avg FPS: {fps_metrics['avg_fps']:.2f} | " + f"Target: {fps_metrics['target_fps']:.2f}" + ) + + self.logger.debug( + f"Ts={observation.get_timestamp():.6f} | Capturing observation took {obs_capture_time:.6f}s" + ) + + return raw_observation + + except Exception as e: + self.logger.error(f"Error in observation sender: {e}") + + def control_loop(self, task: str, verbose: bool = False) -> tuple[Observation, Action]: + """Combined function for executing actions and streaming observations""" + # Wait at barrier for synchronized start + self.start_barrier.wait() + self.logger.info("Control loop thread starting") + + _performed_action = None + _captured_observation = None + + while self.running: + control_loop_start = time.perf_counter() + """Control loop: (1) Performing actions, when available""" + if self.actions_available(): + _performed_action = self.control_loop_action(verbose) + + """Control loop: (2) Streaming observations to the remote policy server""" + if self._ready_to_send_observation(): + _captured_observation = self.control_loop_observation(task, verbose) + + self.logger.info(f"Control loop (ms): {(time.perf_counter() - control_loop_start) * 1000:.2f}") + # Dynamically adjust sleep time to maintain the desired control frequency + time.sleep(max(0, self.config.environment_dt - (time.perf_counter() - control_loop_start))) + + return _captured_observation, _performed_action + + +@draccus.wrap() +def async_client(cfg: RobotClientConfig): + logging.info(pformat(asdict(cfg))) + + if cfg.robot.type not in SUPPORTED_ROBOTS: + raise ValueError(f"Robot {cfg.robot.type} not yet supported!") + + client = RobotClient(cfg) + + if client.start(): + client.logger.info("Starting action receiver thread...") + + # Create and start action receiver thread + action_receiver_thread = threading.Thread(target=client.receive_actions, daemon=True) + + # Start action receiver thread + action_receiver_thread.start() + + try: + # The main thread runs the control loop + client.control_loop(task=cfg.task) + + finally: + client.stop() + action_receiver_thread.join() + if cfg.debug_visualize_queue_size: + visualize_action_queue_size(client.action_queue_size) + client.logger.info("Client stopped") + + +if __name__ == "__main__": + async_client() # run the client diff --git a/src/lerobot/transport/async_inference.proto b/src/lerobot/transport/async_inference.proto new file mode 100644 index 000000000..434f3142b --- /dev/null +++ b/src/lerobot/transport/async_inference.proto @@ -0,0 +1,59 @@ +// fmt: off +// flake8: noqa +// !/usr/bin/env python + +// Copyright 2024 The HuggingFace Inc. team. +// All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +syntax = "proto3"; + +package async_inference; + +// AsyncInference: from Robot perspective +// Robot send observations to & executes action received from a remote Policy server +service AsyncInference { + // Robot -> Policy to share observations with a remote inference server + // Policy -> Robot to share actions predicted for given observations + rpc SendObservations(stream Observation) returns (Empty); + rpc GetActions(Empty) returns (Actions); + rpc SendPolicyInstructions(PolicySetup) returns (Empty); + rpc Ready(Empty) returns (Empty); + rpc Stop(Empty) returns (Empty); +} + +enum TransferState { + TRANSFER_UNKNOWN = 0; + TRANSFER_BEGIN = 1; + TRANSFER_MIDDLE = 2; + TRANSFER_END = 3; +} + +// Messages +message Observation { + // sent by Robot, to remote Policy + TransferState transfer_state = 1; // Observations can be streamed exceeding 4MB of size + bytes data = 2; +} + +message Actions { + // sent by remote Policy, to Robot + bytes data = 1; +} + +message PolicySetup { + // sent by Robot to remote server, to init Policy + bytes data = 1; +} + +message Empty {} diff --git a/src/lerobot/transport/async_inference_pb2.py b/src/lerobot/transport/async_inference_pb2.py new file mode 100644 index 000000000..59c8eb488 --- /dev/null +++ b/src/lerobot/transport/async_inference_pb2.py @@ -0,0 +1,45 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: async_inference.proto +# Protobuf Python Version: 5.29.0 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 29, + 0, + '', + 'async_inference.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x61sync_inference.proto\x12\x0f\x61sync_inference\"S\n\x0bObservation\x12\x36\n\x0etransfer_state\x18\x01 \x01(\x0e\x32\x1e.async_inference.TransferState\x12\x0c\n\x04\x64\x61ta\x18\x02 \x01(\x0c\"\x17\n\x07\x41\x63tions\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\x0c\"\x1b\n\x0bPolicySetup\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\x0c\"\x07\n\x05\x45mpty*`\n\rTransferState\x12\x14\n\x10TRANSFER_UNKNOWN\x10\x00\x12\x12\n\x0eTRANSFER_BEGIN\x10\x01\x12\x13\n\x0fTRANSFER_MIDDLE\x10\x02\x12\x10\n\x0cTRANSFER_END\x10\x03\x32\xdd\x02\n\x0e\x41syncInference\x12J\n\x10SendObservations\x12\x1c.async_inference.Observation\x1a\x16.async_inference.Empty(\x01\x12>\n\nGetActions\x12\x16.async_inference.Empty\x1a\x18.async_inference.Actions\x12N\n\x16SendPolicyInstructions\x12\x1c.async_inference.PolicySetup\x1a\x16.async_inference.Empty\x12\x37\n\x05Ready\x12\x16.async_inference.Empty\x1a\x16.async_inference.Empty\x12\x36\n\x04Stop\x12\x16.async_inference.Empty\x1a\x16.async_inference.Emptyb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'async_inference_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_TRANSFERSTATE']._serialized_start=190 + _globals['_TRANSFERSTATE']._serialized_end=286 + _globals['_OBSERVATION']._serialized_start=42 + _globals['_OBSERVATION']._serialized_end=125 + _globals['_ACTIONS']._serialized_start=127 + _globals['_ACTIONS']._serialized_end=150 + _globals['_POLICYSETUP']._serialized_start=152 + _globals['_POLICYSETUP']._serialized_end=179 + _globals['_EMPTY']._serialized_start=181 + _globals['_EMPTY']._serialized_end=188 + _globals['_ASYNCINFERENCE']._serialized_start=289 + _globals['_ASYNCINFERENCE']._serialized_end=638 +# @@protoc_insertion_point(module_scope) diff --git a/src/lerobot/transport/async_inference_pb2_grpc.py b/src/lerobot/transport/async_inference_pb2_grpc.py new file mode 100644 index 000000000..3042db0db --- /dev/null +++ b/src/lerobot/transport/async_inference_pb2_grpc.py @@ -0,0 +1,277 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + +from lerobot.transport import async_inference_pb2 as async__inference__pb2 + +GRPC_GENERATED_VERSION = '1.71.0' +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f'The grpc package installed is at version {GRPC_VERSION},' + + f' but the generated code in async_inference_pb2_grpc.py depends on' + + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + ) + + +class AsyncInferenceStub: + """AsyncInference: from Robot perspective + Robot send observations to & executes action received from a remote Policy server + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.SendObservations = channel.stream_unary( + '/async_inference.AsyncInference/SendObservations', + request_serializer=async__inference__pb2.Observation.SerializeToString, + response_deserializer=async__inference__pb2.Empty.FromString, + _registered_method=True) + self.GetActions = channel.unary_unary( + '/async_inference.AsyncInference/GetActions', + request_serializer=async__inference__pb2.Empty.SerializeToString, + response_deserializer=async__inference__pb2.Actions.FromString, + _registered_method=True) + self.SendPolicyInstructions = channel.unary_unary( + '/async_inference.AsyncInference/SendPolicyInstructions', + request_serializer=async__inference__pb2.PolicySetup.SerializeToString, + response_deserializer=async__inference__pb2.Empty.FromString, + _registered_method=True) + self.Ready = channel.unary_unary( + '/async_inference.AsyncInference/Ready', + request_serializer=async__inference__pb2.Empty.SerializeToString, + response_deserializer=async__inference__pb2.Empty.FromString, + _registered_method=True) + self.Stop = channel.unary_unary( + '/async_inference.AsyncInference/Stop', + request_serializer=async__inference__pb2.Empty.SerializeToString, + response_deserializer=async__inference__pb2.Empty.FromString, + _registered_method=True) + + +class AsyncInferenceServicer: + """AsyncInference: from Robot perspective + Robot send observations to & executes action received from a remote Policy server + """ + + def SendObservations(self, request_iterator, context): + """Robot -> Policy to share observations with a remote inference server + Policy -> Robot to share actions predicted for given observations + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetActions(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SendPolicyInstructions(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def Ready(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def Stop(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_AsyncInferenceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'SendObservations': grpc.stream_unary_rpc_method_handler( + servicer.SendObservations, + request_deserializer=async__inference__pb2.Observation.FromString, + response_serializer=async__inference__pb2.Empty.SerializeToString, + ), + 'GetActions': grpc.unary_unary_rpc_method_handler( + servicer.GetActions, + request_deserializer=async__inference__pb2.Empty.FromString, + response_serializer=async__inference__pb2.Actions.SerializeToString, + ), + 'SendPolicyInstructions': grpc.unary_unary_rpc_method_handler( + servicer.SendPolicyInstructions, + request_deserializer=async__inference__pb2.PolicySetup.FromString, + response_serializer=async__inference__pb2.Empty.SerializeToString, + ), + 'Ready': grpc.unary_unary_rpc_method_handler( + servicer.Ready, + request_deserializer=async__inference__pb2.Empty.FromString, + response_serializer=async__inference__pb2.Empty.SerializeToString, + ), + 'Stop': grpc.unary_unary_rpc_method_handler( + servicer.Stop, + request_deserializer=async__inference__pb2.Empty.FromString, + response_serializer=async__inference__pb2.Empty.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'async_inference.AsyncInference', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers('async_inference.AsyncInference', rpc_method_handlers) + + + # This class is part of an EXPERIMENTAL API. +class AsyncInference: + """AsyncInference: from Robot perspective + Robot send observations to & executes action received from a remote Policy server + """ + + @staticmethod + def SendObservations(request_iterator, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.stream_unary( + request_iterator, + target, + '/async_inference.AsyncInference/SendObservations', + async__inference__pb2.Observation.SerializeToString, + async__inference__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetActions(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/async_inference.AsyncInference/GetActions', + async__inference__pb2.Empty.SerializeToString, + async__inference__pb2.Actions.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def SendPolicyInstructions(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/async_inference.AsyncInference/SendPolicyInstructions', + async__inference__pb2.PolicySetup.SerializeToString, + async__inference__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def Ready(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/async_inference.AsyncInference/Ready', + async__inference__pb2.Empty.SerializeToString, + async__inference__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def Stop(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/async_inference.AsyncInference/Stop', + async__inference__pb2.Empty.SerializeToString, + async__inference__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) diff --git a/tests/async_inference/test_e2e.py b/tests/async_inference/test_e2e.py new file mode 100644 index 000000000..d7b68e66b --- /dev/null +++ b/tests/async_inference/test_e2e.py @@ -0,0 +1,177 @@ +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""End-to-end test of the asynchronous inference stack (client ↔ server). + +This test spins up a lightweight gRPC `PolicyServer` instance with a stubbed +policy network and launches a `RobotClient` that uses a `MockRobot`. The goal +is to exercise the full communication loop: + +1. Client sends policy specification → Server +2. Client streams observations → Server +3. Server streams action chunks → Client +4. Client executes received actions + +The test succeeds if at least one action is executed and the server records at +least one predicted timestep - demonstrating that the gRPC round-trip works +end-to-end using real (but lightweight) protocol messages. +""" + +from __future__ import annotations + +import threading +from concurrent import futures + +import pytest +import torch + +# Skip entire module if grpc is not available +pytest.importorskip("grpc") + +# ----------------------------------------------------------------------------- +# End-to-end test +# ----------------------------------------------------------------------------- + + +def test_async_inference_e2e(monkeypatch): + """Tests the full asynchronous inference pipeline.""" + # Import grpc-dependent modules inside the test function + import grpc + + from lerobot.robots.utils import make_robot_from_config + from lerobot.scripts.server.configs import PolicyServerConfig, RobotClientConfig + from lerobot.scripts.server.helpers import map_robot_keys_to_lerobot_features + from lerobot.scripts.server.policy_server import PolicyServer + from lerobot.scripts.server.robot_client import RobotClient + from lerobot.transport import ( + async_inference_pb2, # type: ignore + async_inference_pb2_grpc, # type: ignore + ) + from tests.mocks.mock_robot import MockRobotConfig + + # Create a stub policy similar to test_policy_server.py + class MockPolicy: + """A minimal mock for an actual policy, returning zeros.""" + + class _Config: + robot_type = "dummy_robot" + + @property + def image_features(self): + """Empty image features since this test doesn't use images.""" + return {} + + def __init__(self): + self.config = self._Config() + + def to(self, *args, **kwargs): + return self + + def model(self, batch): + # Return a chunk of 20 dummy actions. + batch_size = len(batch["robot_type"]) + return torch.zeros(batch_size, 20, 6) + + # ------------------------------------------------------------------ + # 1. Create PolicyServer instance with mock policy + # ------------------------------------------------------------------ + policy_server_config = PolicyServerConfig(host="localhost", port=9999) + policy_server = PolicyServer(policy_server_config) + # Replace the real policy with our fast, deterministic stub. + policy_server.policy = MockPolicy() + policy_server.actions_per_chunk = 20 + policy_server.device = "cpu" + + # Set up robot config and features + robot_config = MockRobotConfig() + mock_robot = make_robot_from_config(robot_config) + + lerobot_features = map_robot_keys_to_lerobot_features(mock_robot) + policy_server.lerobot_features = lerobot_features + + # Force server to produce deterministic action chunks in test mode + policy_server.policy_type = "act" + + def _fake_get_action_chunk(_self, _obs, _type="test"): + action_dim = 6 + batch_size = 1 + actions_per_chunk = policy_server.actions_per_chunk + + return torch.zeros(batch_size, actions_per_chunk, action_dim) + + monkeypatch.setattr(PolicyServer, "_get_action_chunk", _fake_get_action_chunk, raising=True) + + # Bypass potentially heavy model loading inside SendPolicyInstructions + def _fake_send_policy_instructions(self, request, context): # noqa: N802 + return async_inference_pb2.Empty() + + monkeypatch.setattr(PolicyServer, "SendPolicyInstructions", _fake_send_policy_instructions, raising=True) + + # Build gRPC server running a PolicyServer + server = grpc.server(futures.ThreadPoolExecutor(max_workers=1, thread_name_prefix="policy_server")) + async_inference_pb2_grpc.add_AsyncInferenceServicer_to_server(policy_server, server) + + # Use the host/port specified in the fixture's config + server_address = f"{policy_server.config.host}:{policy_server.config.port}" + server.add_insecure_port(server_address) + server.start() + + # ------------------------------------------------------------------ + # 2. Create a RobotClient around the MockRobot + # ------------------------------------------------------------------ + client_config = RobotClientConfig( + server_address=server_address, + robot=robot_config, + chunk_size_threshold=0.0, + policy_type="test", + pretrained_name_or_path="test", + actions_per_chunk=20, + verify_robot_cameras=False, + ) + + client = RobotClient(client_config) + assert client.start(), "Client failed initial handshake with the server" + + # Track action chunks received without modifying RobotClient + action_chunks_received = {"count": 0} + original_aggregate = client._aggregate_action_queues + + def counting_aggregate(*args, **kwargs): + action_chunks_received["count"] += 1 + return original_aggregate(*args, **kwargs) + + monkeypatch.setattr(client, "_aggregate_action_queues", counting_aggregate) + + # Start client threads + action_thread = threading.Thread(target=client.receive_actions, daemon=True) + control_thread = threading.Thread(target=client.control_loop, args=({"task": ""}), daemon=True) + action_thread.start() + control_thread.start() + + # ------------------------------------------------------------------ + # 3. System exchanges a few messages + # ------------------------------------------------------------------ + # Wait for 5 seconds + server.wait_for_termination(timeout=5) + + assert action_chunks_received["count"] > 0, "Client did not receive any action chunks" + assert len(policy_server._predicted_timesteps) > 0, "Server did not record any predicted timesteps" + + # ------------------------------------------------------------------ + # 4. Stop the system + # ------------------------------------------------------------------ + client.stop() + action_thread.join() + control_thread.join() + policy_server.stop() + server.stop(grace=None) diff --git a/tests/async_inference/test_helpers.py b/tests/async_inference/test_helpers.py new file mode 100644 index 000000000..e0b797371 --- /dev/null +++ b/tests/async_inference/test_helpers.py @@ -0,0 +1,459 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import pickle +import time + +import numpy as np +import torch + +from lerobot.configs.types import FeatureType, PolicyFeature +from lerobot.scripts.server.helpers import ( + FPSTracker, + TimedAction, + TimedObservation, + observations_similar, + prepare_image, + prepare_raw_observation, + raw_observation_to_observation, + resize_robot_observation_image, +) + +# --------------------------------------------------------------------- +# FPSTracker +# --------------------------------------------------------------------- + + +def test_fps_tracker_first_observation(): + """First observation should initialize timestamp and return 0 FPS.""" + tracker = FPSTracker(target_fps=30.0) + timestamp = 1000.0 + + metrics = tracker.calculate_fps_metrics(timestamp) + + assert tracker.first_timestamp == timestamp + assert tracker.total_obs_count == 1 + assert metrics["avg_fps"] == 0.0 + assert metrics["target_fps"] == 30.0 + + +def test_fps_tracker_single_interval(): + """Two observations 1 second apart should give 1 FPS.""" + tracker = FPSTracker(target_fps=30.0) + + # First observation at t=0 + metrics1 = tracker.calculate_fps_metrics(0.0) + assert metrics1["avg_fps"] == 0.0 + + # Second observation at t=1 (1 second later) + metrics2 = tracker.calculate_fps_metrics(1.0) + expected_fps = 1.0 # (2-1) observations / 1.0 seconds = 1 FPS + assert math.isclose(metrics2["avg_fps"], expected_fps, rel_tol=1e-6) + + +def test_fps_tracker_multiple_intervals(): + """Multiple observations should calculate correct average FPS.""" + tracker = FPSTracker(target_fps=30.0) + + # Simulate 5 observations over 2 seconds (should be 2 FPS average) + timestamps = [0.0, 0.5, 1.0, 1.5, 2.0] + + for i, ts in enumerate(timestamps): + metrics = tracker.calculate_fps_metrics(ts) + + if i == 0: + assert metrics["avg_fps"] == 0.0 + elif i == len(timestamps) - 1: + # After 5 observations over 2 seconds: (5-1)/2 = 2 FPS + expected_fps = 2.0 + assert math.isclose(metrics["avg_fps"], expected_fps, rel_tol=1e-6) + + +def test_fps_tracker_irregular_intervals(): + """FPS calculation should work with irregular time intervals.""" + tracker = FPSTracker(target_fps=30.0) + + # Irregular timestamps: 0, 0.1, 0.5, 2.0, 3.0 seconds + timestamps = [0.0, 0.1, 0.5, 2.0, 3.0] + + for ts in timestamps: + metrics = tracker.calculate_fps_metrics(ts) + + # 5 observations over 3 seconds: (5-1)/3 = 1.333... FPS + expected_fps = 4.0 / 3.0 + assert math.isclose(metrics["avg_fps"], expected_fps, rel_tol=1e-6) + + +# --------------------------------------------------------------------- +# TimedData helpers +# --------------------------------------------------------------------- + + +def test_timed_action_getters(): + """TimedAction stores & returns timestamp, action tensor and timestep.""" + ts = time.time() + action = torch.arange(10) + ta = TimedAction(timestamp=ts, action=action, timestep=0) + + assert math.isclose(ta.get_timestamp(), ts, rel_tol=0, abs_tol=1e-6) + torch.testing.assert_close(ta.get_action(), action) + assert ta.get_timestep() == 0 + + +def test_timed_observation_getters(): + """TimedObservation stores & returns timestamp, dict and timestep.""" + ts = time.time() + obs_dict = {"observation.state": torch.ones(6)} + to = TimedObservation(timestamp=ts, observation=obs_dict, timestep=0) + + assert math.isclose(to.get_timestamp(), ts, rel_tol=0, abs_tol=1e-6) + assert to.get_observation() is obs_dict + assert to.get_timestep() == 0 + + +def test_timed_data_deserialization_data_getters(): + """TimedAction / TimedObservation survive a round-trip through ``pickle``. + + The async-inference stack uses ``pickle.dumps`` to move these objects across + the gRPC boundary (see RobotClient.send_observation and PolicyServer.StreamActions). + This test ensures that the payload keeps its content intact after + the (de)serialization round-trip. + """ + ts = time.time() + + # ------------------------------------------------------------------ + # TimedAction + # ------------------------------------------------------------------ + original_action = torch.randn(6) + ta_in = TimedAction(timestamp=ts, action=original_action, timestep=13) + + # Serialize → bytes → deserialize + ta_bytes = pickle.dumps(ta_in) # nosec + ta_out: TimedAction = pickle.loads(ta_bytes) # nosec B301 + + # Identity & content checks + assert math.isclose(ta_out.get_timestamp(), ts, rel_tol=0, abs_tol=1e-6) + assert ta_out.get_timestep() == 13 + torch.testing.assert_close(ta_out.get_action(), original_action) + + # ------------------------------------------------------------------ + # TimedObservation + # ------------------------------------------------------------------ + obs_dict = {"observation.state": torch.arange(4).float()} + to_in = TimedObservation(timestamp=ts, observation=obs_dict, timestep=7, must_go=True) + + to_bytes = pickle.dumps(to_in) # nosec + to_out: TimedObservation = pickle.loads(to_bytes) # nosec B301 + + assert math.isclose(to_out.get_timestamp(), ts, rel_tol=0, abs_tol=1e-6) + assert to_out.get_timestep() == 7 + assert to_out.must_go is True + assert to_out.get_observation().keys() == obs_dict.keys() + torch.testing.assert_close(to_out.get_observation()["observation.state"], obs_dict["observation.state"]) + + +# --------------------------------------------------------------------- +# observations_similar() +# --------------------------------------------------------------------- + + +def _make_obs(state: torch.Tensor) -> TimedObservation: + """Create a TimedObservation with raw robot observation format.""" + return TimedObservation( + timestamp=time.time(), + observation={ + "shoulder": state[0].item() if len(state) > 0 else 0.0, + "elbow": state[1].item() if len(state) > 1 else 0.0, + "wrist": state[2].item() if len(state) > 2 else 0.0, + "gripper": state[3].item() if len(state) > 3 else 0.0, + }, + timestep=0, + ) + + +def test_observations_similar_true(): + """Distance below atol → observations considered similar.""" + # Create mock lerobot features for the similarity check + lerobot_features = { + "observation.state": { + "dtype": "float32", + "shape": [4], + "names": ["shoulder", "elbow", "wrist", "gripper"], + } + } + + obs1 = _make_obs(torch.zeros(4)) + obs2 = _make_obs(0.5 * torch.ones(4)) + assert observations_similar(obs1, obs2, lerobot_features, atol=2.0) + + obs3 = _make_obs(2.0 * torch.ones(4)) + assert not observations_similar(obs1, obs3, lerobot_features, atol=2.0) + + +# --------------------------------------------------------------------- +# raw_observation_to_observation and helpers +# --------------------------------------------------------------------- + + +def _create_mock_robot_observation(): + """Create a mock robot observation with motor positions and camera images.""" + return { + "shoulder": 1.0, + "elbow": 2.0, + "wrist": 3.0, + "gripper": 0.5, + "laptop": np.random.randint(0, 256, size=(480, 640, 3), dtype=np.uint8), + "phone": np.random.randint(0, 256, size=(480, 640, 3), dtype=np.uint8), + } + + +def _create_mock_lerobot_features(): + """Create mock lerobot features mapping similar to what hw_to_dataset_features returns.""" + return { + "observation.state": { + "dtype": "float32", + "shape": [4], + "names": ["shoulder", "elbow", "wrist", "gripper"], + }, + "observation.images.laptop": { + "dtype": "image", + "shape": [480, 640, 3], + "names": ["height", "width", "channels"], + }, + "observation.images.phone": { + "dtype": "image", + "shape": [480, 640, 3], + "names": ["height", "width", "channels"], + }, + } + + +def _create_mock_policy_image_features(): + """Create mock policy image features with different resolutions.""" + return { + "observation.images.laptop": PolicyFeature( + type=FeatureType.VISUAL, + shape=(3, 224, 224), # Policy expects smaller resolution + ), + "observation.images.phone": PolicyFeature( + type=FeatureType.VISUAL, + shape=(3, 160, 160), # Different resolution for second camera + ), + } + + +def test_prepare_image(): + """Test image preprocessing: int8 → float32, normalization to [0,1].""" + # Create mock int8 image data + image_int8 = torch.randint(0, 256, size=(3, 224, 224), dtype=torch.uint8) + + processed = prepare_image(image_int8) + + # Check dtype conversion + assert processed.dtype == torch.float32 + + # Check normalization range + assert processed.min() >= 0.0 + assert processed.max() <= 1.0 + + # Check that values are scaled correctly (255 → 1.0, 0 → 0.0) + if image_int8.max() == 255: + assert torch.isclose(processed.max(), torch.tensor(1.0), atol=1e-6) + if image_int8.min() == 0: + assert torch.isclose(processed.min(), torch.tensor(0.0), atol=1e-6) + + # Check memory contiguity + assert processed.is_contiguous() + + +def test_resize_robot_observation_image(): + """Test image resizing from robot resolution to policy resolution.""" + # Create mock image: (H=480, W=640, C=3) + original_image = torch.randint(0, 256, size=(480, 640, 3), dtype=torch.uint8) + target_shape = (3, 224, 224) # (C, H, W) + + resized = resize_robot_observation_image(original_image, target_shape) + + # Check output shape matches target + assert resized.shape == target_shape + + # Check that original image had different dimensions + assert original_image.shape != resized.shape + + # Check that resizing preserves value range + assert resized.min() >= 0 + assert resized.max() <= 255 + + +def test_prepare_raw_observation(): + """Test the preparation of raw robot observation to lerobot format.""" + robot_obs = _create_mock_robot_observation() + lerobot_features = _create_mock_lerobot_features() + policy_image_features = _create_mock_policy_image_features() + + prepared = prepare_raw_observation(robot_obs, lerobot_features, policy_image_features) + + # Check that state is properly extracted and batched + assert "observation.state" in prepared + state = prepared["observation.state"] + assert isinstance(state, torch.Tensor) + assert state.shape == (1, 4) # Batched state + + # Check that images are processed and resized + assert "observation.images.laptop" in prepared + assert "observation.images.phone" in prepared + + laptop_img = prepared["observation.images.laptop"] + phone_img = prepared["observation.images.phone"] + + # Check image shapes match policy requirements + assert laptop_img.shape == policy_image_features["observation.images.laptop"].shape + assert phone_img.shape == policy_image_features["observation.images.phone"].shape + + # Check that images are tensors + assert isinstance(laptop_img, torch.Tensor) + assert isinstance(phone_img, torch.Tensor) + + +def test_raw_observation_to_observation_basic(): + """Test the main raw_observation_to_observation function.""" + robot_obs = _create_mock_robot_observation() + lerobot_features = _create_mock_lerobot_features() + policy_image_features = _create_mock_policy_image_features() + device = "cpu" + + observation = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, device) + + # Check that all expected keys are present + assert "observation.state" in observation + assert "observation.images.laptop" in observation + assert "observation.images.phone" in observation + + # Check state processing + state = observation["observation.state"] + assert isinstance(state, torch.Tensor) + assert state.device.type == device + assert state.shape == (1, 4) # Batched + + # Check image processing + laptop_img = observation["observation.images.laptop"] + phone_img = observation["observation.images.phone"] + + # Images should have batch dimension: (B, C, H, W) + assert laptop_img.shape == (1, 3, 224, 224) + assert phone_img.shape == (1, 3, 160, 160) + + # Check device placement + assert laptop_img.device.type == device + assert phone_img.device.type == device + + # Check image dtype and range (should be float32 in [0, 1]) + assert laptop_img.dtype == torch.float32 + assert phone_img.dtype == torch.float32 + assert laptop_img.min() >= 0.0 and laptop_img.max() <= 1.0 + assert phone_img.min() >= 0.0 and phone_img.max() <= 1.0 + + +def test_raw_observation_to_observation_with_non_tensor_data(): + """Test that non-tensor data (like task strings) is preserved.""" + robot_obs = _create_mock_robot_observation() + robot_obs["task"] = "pick up the red cube" # Add string instruction + + lerobot_features = _create_mock_lerobot_features() + policy_image_features = _create_mock_policy_image_features() + device = "cpu" + + observation = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, device) + + # Check that task string is preserved + assert "task" in observation + assert observation["task"] == "pick up the red cube" + assert isinstance(observation["task"], str) + + +@torch.no_grad() +def test_raw_observation_to_observation_device_handling(): + """Test that tensors are properly moved to the specified device.""" + device = "mps" if torch.backends.mps.is_available() else "cpu" + + robot_obs = _create_mock_robot_observation() + lerobot_features = _create_mock_lerobot_features() + policy_image_features = _create_mock_policy_image_features() + + observation = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, device) + + # Check that all tensors are on the correct device + for key, value in observation.items(): + if isinstance(value, torch.Tensor): + assert value.device.type == device, f"Tensor {key} not on {device}" + + +def test_raw_observation_to_observation_deterministic(): + """Test that the function produces consistent results for the same input.""" + robot_obs = _create_mock_robot_observation() + lerobot_features = _create_mock_lerobot_features() + policy_image_features = _create_mock_policy_image_features() + device = "cpu" + + # Run twice with same input + obs1 = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, device) + obs2 = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, device) + + # Results should be identical + assert set(obs1.keys()) == set(obs2.keys()) + + for key in obs1: + if isinstance(obs1[key], torch.Tensor): + torch.testing.assert_close(obs1[key], obs2[key]) + else: + assert obs1[key] == obs2[key] + + +def test_image_processing_pipeline_preserves_content(): + """Test that the image processing pipeline preserves recognizable patterns.""" + # Create an image with a specific pattern + original_img = np.zeros((100, 100, 3), dtype=np.uint8) + original_img[25:75, 25:75, :] = 255 # White square in center + + robot_obs = {"shoulder": 1.0, "elbow": 1.0, "wrist": 1.0, "gripper": 1.0, "laptop": original_img} + lerobot_features = { + "observation.state": { + "dtype": "float32", + "shape": [4], + "names": ["shoulder", "elbow", "wrist", "gripper"], + }, + "observation.images.laptop": { + "dtype": "image", + "shape": [100, 100, 3], + "names": ["height", "width", "channels"], + }, + } + policy_image_features = { + "observation.images.laptop": PolicyFeature( + type=FeatureType.VISUAL, + shape=(3, 50, 50), # Downsamples from 100x100 + ) + } + + observation = raw_observation_to_observation(robot_obs, lerobot_features, policy_image_features, "cpu") + + processed_img = observation["observation.images.laptop"].squeeze(0) # Remove batch dim + + # Check that the center region has higher values than corners + # Due to bilinear interpolation, exact values will change but pattern should remain + center_val = processed_img[:, 25, 25].mean() # Center of 50x50 image + corner_val = processed_img[:, 5, 5].mean() # Corner + + assert center_val > corner_val, "Image processing should preserve recognizable patterns" diff --git a/tests/async_inference/test_policy_server.py b/tests/async_inference/test_policy_server.py new file mode 100644 index 000000000..5c795e7ec --- /dev/null +++ b/tests/async_inference/test_policy_server.py @@ -0,0 +1,215 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit-tests for the `PolicyServer` core logic. +Monkey-patch the `policy` attribute with a stub so that no real model inference is performed. +""" + +from __future__ import annotations + +import time + +import pytest +import torch + +from lerobot.configs.types import PolicyFeature +from tests.utils import require_package + +# ----------------------------------------------------------------------------- +# Test fixtures +# ----------------------------------------------------------------------------- + + +class MockPolicy: + """A minimal mock for an actual policy, returning zeros. + Refer to tests/policies for tests of the individual policies supported.""" + + class _Config: + robot_type = "dummy_robot" + + @property + def image_features(self) -> dict[str, PolicyFeature]: + """Empty image features since this test doesn't use images.""" + return {} + + def predict_action_chunk(self, observation: dict[str, torch.Tensor]) -> torch.Tensor: + """Return a chunk of 20 dummy actions.""" + batch_size = len(observation["observation.state"]) + return torch.zeros(batch_size, 20, 6) + + def __init__(self): + self.config = self._Config() + + def to(self, *args, **kwargs): + # The server calls `policy.to(device)`. This stub ignores it. + return self + + def model(self, batch: dict) -> torch.Tensor: + # Return a chunk of 20 dummy actions. + batch_size = len(batch["robot_type"]) + return torch.zeros(batch_size, 20, 6) + + +@pytest.fixture +@require_package("grpc") +def policy_server(): + """Fresh `PolicyServer` instance with a stubbed-out policy model.""" + # Import only when the test actually runs (after decorator check) + from lerobot.scripts.server.configs import PolicyServerConfig + from lerobot.scripts.server.policy_server import PolicyServer + + test_config = PolicyServerConfig(host="localhost", port=9999) + server = PolicyServer(test_config) + # Replace the real policy with our fast, deterministic stub. + server.policy = MockPolicy() + server.actions_per_chunk = 20 + server.device = "cpu" + + # Add mock lerobot_features that the observation similarity functions need + server.lerobot_features = { + "observation.state": { + "dtype": "float32", + "shape": [6], + "names": ["joint1", "joint2", "joint3", "joint4", "joint5", "joint6"], + } + } + + return server + + +# ----------------------------------------------------------------------------- +# Helper utilities for tests +# ----------------------------------------------------------------------------- + + +def _make_obs(state: torch.Tensor, timestep: int = 0, must_go: bool = False): + """Create a TimedObservation with a given state vector.""" + # Import only when needed + from lerobot.scripts.server.helpers import TimedObservation + + return TimedObservation( + observation={ + "joint1": state[0].item() if len(state) > 0 else 0.0, + "joint2": state[1].item() if len(state) > 1 else 0.0, + "joint3": state[2].item() if len(state) > 2 else 0.0, + "joint4": state[3].item() if len(state) > 3 else 0.0, + "joint5": state[4].item() if len(state) > 4 else 0.0, + "joint6": state[5].item() if len(state) > 5 else 0.0, + }, + timestamp=time.time(), + timestep=timestep, + must_go=must_go, + ) + + +# ----------------------------------------------------------------------------- +# Tests +# ----------------------------------------------------------------------------- + + +def test_time_action_chunk(policy_server): + """Verify that `_time_action_chunk` assigns correct timestamps and timesteps.""" + start_ts = time.time() + start_t = 10 + # A chunk of 3 action tensors. + action_tensors = [torch.randn(6) for _ in range(3)] + + timed_actions = policy_server._time_action_chunk(start_ts, action_tensors, start_t) + + assert len(timed_actions) == 3 + # Check timesteps + assert [ta.get_timestep() for ta in timed_actions] == [10, 11, 12] + # Check timestamps + expected_timestamps = [ + start_ts, + start_ts + policy_server.config.environment_dt, + start_ts + 2 * policy_server.config.environment_dt, + ] + for ta, expected_ts in zip(timed_actions, expected_timestamps, strict=True): + assert abs(ta.get_timestamp() - expected_ts) < 1e-6 + + +def test_maybe_enqueue_observation_must_go(policy_server): + """An observation with `must_go=True` is always enqueued.""" + obs = _make_obs(torch.zeros(6), must_go=True) + assert policy_server._enqueue_observation(obs) is True + assert policy_server.observation_queue.qsize() == 1 + assert policy_server.observation_queue.get_nowait() is obs + + +def test_maybe_enqueue_observation_dissimilar(policy_server): + """A dissimilar observation (not `must_go`) is enqueued.""" + # Set a last predicted observation. + policy_server.last_processed_obs = _make_obs(torch.zeros(6)) + # Create a new, dissimilar observation. + new_obs = _make_obs(torch.ones(6) * 5) # High norm difference + + assert policy_server._enqueue_observation(new_obs) is True + assert policy_server.observation_queue.qsize() == 1 + + +def test_maybe_enqueue_observation_is_skipped(policy_server): + """A similar observation (not `must_go`) is skipped.""" + # Set a last predicted observation. + policy_server.last_processed_obs = _make_obs(torch.zeros(6)) + # Create a new, very similar observation. + new_obs = _make_obs(torch.zeros(6) + 1e-4) + + assert policy_server._enqueue_observation(new_obs) is False + assert policy_server.observation_queue.empty() is True + + +def test_obs_sanity_checks(policy_server): + """Unit-test the private `_obs_sanity_checks` helper.""" + prev = _make_obs(torch.zeros(6), timestep=0) + + # Case 1 – timestep already predicted + policy_server._predicted_timesteps.add(1) + obs_same_ts = _make_obs(torch.ones(6), timestep=1) + assert policy_server._obs_sanity_checks(obs_same_ts, prev) is False + + # Case 2 – observation too similar + policy_server._predicted_timesteps.clear() + obs_similar = _make_obs(torch.zeros(6) + 1e-4, timestep=2) + assert policy_server._obs_sanity_checks(obs_similar, prev) is False + + # Case 3 – genuinely new & dissimilar observation passes + obs_ok = _make_obs(torch.ones(6) * 5, timestep=3) + assert policy_server._obs_sanity_checks(obs_ok, prev) is True + + +def test_predict_action_chunk(monkeypatch, policy_server): + """End-to-end test of `_predict_action_chunk` with a stubbed _get_action_chunk.""" + # Import only when needed + from lerobot.scripts.server.policy_server import PolicyServer + + # Force server to act-style policy; patch method to return deterministic tensor + policy_server.policy_type = "act" + action_dim = 6 + batch_size = 1 + actions_per_chunk = policy_server.actions_per_chunk + + def _fake_get_action_chunk(_self, _obs, _type="act"): + return torch.zeros(batch_size, actions_per_chunk, action_dim) + + monkeypatch.setattr(PolicyServer, "_get_action_chunk", _fake_get_action_chunk, raising=True) + + obs = _make_obs(torch.zeros(6), timestep=5) + timed_actions = policy_server._predict_action_chunk(obs) + + assert len(timed_actions) == actions_per_chunk + assert [ta.get_timestep() for ta in timed_actions] == list(range(5, 5 + actions_per_chunk)) + + for i, ta in enumerate(timed_actions): + expected_ts = obs.get_timestamp() + i * policy_server.config.environment_dt + assert abs(ta.get_timestamp() - expected_ts) < 1e-6 diff --git a/tests/async_inference/test_robot_client.py b/tests/async_inference/test_robot_client.py new file mode 100644 index 000000000..d1273ae63 --- /dev/null +++ b/tests/async_inference/test_robot_client.py @@ -0,0 +1,234 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit-tests for the `RobotClient` action-queue logic (pure Python, no gRPC). + +We monkey-patch `lerobot.common.robot_devices.robots.utils.make_robot` so that +no real hardware is accessed. Only the queue-update mechanism is verified. +""" + +from __future__ import annotations + +import time +from queue import Queue + +import pytest +import torch + +# Skip entire module if grpc is not available +pytest.importorskip("grpc") + +# ----------------------------------------------------------------------------- +# Test fixtures +# ----------------------------------------------------------------------------- + + +@pytest.fixture() +def robot_client(): + """Fresh `RobotClient` instance for each test case (no threads started). + Uses DummyRobot.""" + # Import only when the test actually runs (after decorator check) + from lerobot.scripts.server.configs import RobotClientConfig + from lerobot.scripts.server.robot_client import RobotClient + from tests.mocks.mock_robot import MockRobotConfig + + test_config = MockRobotConfig() + + # gRPC channel is not actually used in tests, so using a dummy address + test_config = RobotClientConfig( + robot=test_config, + server_address="localhost:9999", + policy_type="test", + pretrained_name_or_path="test", + actions_per_chunk=20, + verify_robot_cameras=False, + ) + + client = RobotClient(test_config) + + # Initialize attributes that are normally set in start() method + client.chunks_received = 0 + client.available_actions_size = [] + + yield client + + if client.robot.is_connected: + client.stop() + + +# ----------------------------------------------------------------------------- +# Helper utilities for tests +# ----------------------------------------------------------------------------- + + +def _make_actions(start_ts: float, start_t: int, count: int): + """Generate `count` consecutive TimedAction objects starting at timestep `start_t`.""" + from lerobot.scripts.server.helpers import TimedAction + + fps = 30 # emulates most common frame-rate + actions = [] + for i in range(count): + timestep = start_t + i + timestamp = start_ts + i * (1 / fps) + action_tensor = torch.full((6,), timestep, dtype=torch.float32) + actions.append(TimedAction(action=action_tensor, timestep=timestep, timestamp=timestamp)) + return actions + + +# ----------------------------------------------------------------------------- +# Tests +# ----------------------------------------------------------------------------- + + +def test_update_action_queue_discards_stale(robot_client): + """`_update_action_queue` must drop actions with `timestep` <= `latest_action`.""" + + # Pretend we already executed up to action #4 + robot_client.latest_action = 4 + + # Incoming chunk contains timesteps 3..7 -> expect 5,6,7 kept. + incoming = _make_actions(start_ts=time.time(), start_t=3, count=5) # 3,4,5,6,7 + + robot_client._aggregate_action_queues(incoming) + + # Extract timesteps from queue + resulting_timesteps = [a.get_timestep() for a in robot_client.action_queue.queue] + + assert resulting_timesteps == [5, 6, 7] + + +@pytest.mark.parametrize( + "weight_old, weight_new", + [ + (1.0, 0.0), + (0.0, 1.0), + (0.5, 0.5), + (0.2, 0.8), + (0.8, 0.2), + (0.1, 0.9), + (0.9, 0.1), + ], +) +def test_aggregate_action_queues_combines_actions_in_overlap( + robot_client, weight_old: float, weight_new: float +): + """`_aggregate_action_queues` must combine actions on overlapping timesteps according + to the provided aggregate_fn, here tested with multiple coefficients.""" + from lerobot.scripts.server.helpers import TimedAction + + robot_client.chunks_received = 0 + + # Pretend we already executed up to action #4, and queue contains actions for timesteps 5..6 + robot_client.latest_action = 4 + current_actions = _make_actions( + start_ts=time.time(), start_t=5, count=2 + ) # actions are [torch.ones(6), torch.ones(6), ...] + current_actions = [ + TimedAction(action=10 * a.get_action(), timestep=a.get_timestep(), timestamp=a.get_timestamp()) + for a in current_actions + ] + + for a in current_actions: + robot_client.action_queue.put(a) + + # Incoming chunk contains timesteps 3..7 -> expect 5,6,7 kept. + incoming = _make_actions(start_ts=time.time(), start_t=3, count=5) # 3,4,5,6,7 + + overlap_timesteps = [5, 6] # properly tested in test_aggregate_action_queues_discards_stale + nonoverlap_timesteps = [7] + + robot_client._aggregate_action_queues( + incoming, aggregate_fn=lambda x1, x2: weight_old * x1 + weight_new * x2 + ) + + queue_overlap_actions = [] + queue_non_overlap_actions = [] + for a in robot_client.action_queue.queue: + if a.get_timestep() in overlap_timesteps: + queue_overlap_actions.append(a) + elif a.get_timestep() in nonoverlap_timesteps: + queue_non_overlap_actions.append(a) + + queue_overlap_actions = sorted(queue_overlap_actions, key=lambda x: x.get_timestep()) + queue_non_overlap_actions = sorted(queue_non_overlap_actions, key=lambda x: x.get_timestep()) + + assert torch.allclose( + queue_overlap_actions[0].get_action(), + weight_old * current_actions[0].get_action() + weight_new * incoming[-3].get_action(), + ) + assert torch.allclose( + queue_overlap_actions[1].get_action(), + weight_old * current_actions[1].get_action() + weight_new * incoming[-2].get_action(), + ) + assert torch.allclose(queue_non_overlap_actions[0].get_action(), incoming[-1].get_action()) + + +@pytest.mark.parametrize( + "chunk_size, queue_len, expected", + [ + (20, 12, False), # 12 / 20 = 0.6 > g=0.5 threshold, not ready to send + (20, 8, True), # 8 / 20 = 0.4 <= g=0.5, ready to send + (10, 5, True), + (10, 6, False), + ], +) +def test_ready_to_send_observation(robot_client, chunk_size: int, queue_len: int, expected: bool): + """Validate `_ready_to_send_observation` ratio logic for various sizes.""" + + robot_client.action_chunk_size = chunk_size + + # Clear any existing actions then fill with `queue_len` dummy entries ---- + robot_client.action_queue = Queue() + + dummy_actions = _make_actions(start_ts=time.time(), start_t=0, count=queue_len) + for act in dummy_actions: + robot_client.action_queue.put(act) + + assert robot_client._ready_to_send_observation() is expected + + +@pytest.mark.parametrize( + "g_threshold, expected", + [ + # The condition is `queue_size / chunk_size <= g`. + # Here, ratio = 6 / 10 = 0.6. + (0.0, False), # 0.6 <= 0.0 is False + (0.1, False), + (0.2, False), + (0.3, False), + (0.4, False), + (0.5, False), + (0.6, True), # 0.6 <= 0.6 is True + (0.7, True), + (0.8, True), + (0.9, True), + (1.0, True), + ], +) +def test_ready_to_send_observation_with_varying_threshold(robot_client, g_threshold: float, expected: bool): + """Validate `_ready_to_send_observation` with fixed sizes and varying `g`.""" + # Fixed sizes for this test: ratio = 6 / 10 = 0.6 + chunk_size = 10 + queue_len = 6 + + robot_client.action_chunk_size = chunk_size + # This is the parameter we are testing + robot_client._chunk_size_threshold = g_threshold + + # Fill queue with dummy actions + robot_client.action_queue = Queue() + dummy_actions = _make_actions(start_ts=time.time(), start_t=0, count=queue_len) + for act in dummy_actions: + robot_client.action_queue.put(act) + + assert robot_client._ready_to_send_observation() is expected From abe51eeba3ad7fde864147c187fcf71ad7ae469c Mon Sep 17 00:00:00 2001 From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Date: Thu, 10 Jul 2025 12:24:40 +0200 Subject: [PATCH 04/20] Update async docs with blogpost (#1479) Co-authored-by: Michel Aractingi --- docs/source/async.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/async.mdx b/docs/source/async.mdx index 0a0823cf0..6ff05a88a 100644 --- a/docs/source/async.mdx +++ b/docs/source/async.mdx @@ -18,7 +18,7 @@ This is fundamentally different from synchronous inference (sync), where the rob --- ## Getting started with async inference -You can read more information on asynchronous inference in our [blogpost](NOTE:blogpost). Here, we report a getting started guide meant to help you setup and run asynchronous inference in your setup. +You can read more information on asynchronous inference in our [blogpost](https://huggingface.co/blog/async-robot-inference). This guide is designed to help you quickly set up and run asynchronous inference in your environment. First, install `lerobot` with the `async` tag, to install the extra dependencies required to run async inference. From d2645cb19fc521e5b117fe03d90a84f698d3d3f6 Mon Sep 17 00:00:00 2001 From: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Date: Thu, 10 Jul 2025 20:13:56 +0200 Subject: [PATCH 05/20] fix(docs): Record-Upload failed? Don't panic! (#1478) * fix: add instruction to manually upload dataset Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> * fix: repo type is explicited --------- Signed-off-by: Francesco Capuano <74058581+fracapuano@users.noreply.github.com> Co-authored-by: Michel Aractingi --- docs/source/il_robots.mdx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx index cfa0a2809..2e8ac3619 100644 --- a/docs/source/il_robots.mdx +++ b/docs/source/il_robots.mdx @@ -282,6 +282,12 @@ Your dataset will be automatically tagged with `LeRobot` for the community to fi You can look for other LeRobot datasets on the hub by searching for `LeRobot` [tags](https://huggingface.co/datasets?other=LeRobot). +You can also push your local dataset to the Hub manually, running: +```bash +huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset +``` + + #### Record function The `record` function provides a suite of tools for capturing and managing data during robot operation: From 519b76110efeea55a4f919895d0029dc0df41e8b Mon Sep 17 00:00:00 2001 From: Ben Zhang <5977478+ben-z@users.noreply.github.com> Date: Sun, 13 Jul 2025 12:58:05 -0700 Subject: [PATCH 06/20] Remove random noise injected by policy server (#1496) --- src/lerobot/scripts/server/policy_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lerobot/scripts/server/policy_server.py b/src/lerobot/scripts/server/policy_server.py index 669ccc58e..13ba976e2 100644 --- a/src/lerobot/scripts/server/policy_server.py +++ b/src/lerobot/scripts/server/policy_server.py @@ -323,7 +323,7 @@ class PolicyServer(async_inference_pb2_grpc.AsyncInferenceServicer): if chunk.ndim != 3: chunk = chunk.unsqueeze(0) # adding batch dimension, now shape is (B, chunk_size, action_dim) - return chunk[:, : self.actions_per_chunk, :] + torch.randn_like(chunk[:, : self.actions_per_chunk, :]) + return chunk[:, : self.actions_per_chunk, :] def _predict_action_chunk(self, observation_t: TimedObservation) -> list[TimedAction]: """Predict an action chunk based on an observation""" From 91b110d8063afdf7f5086aad0be8eea0ac939892 Mon Sep 17 00:00:00 2001 From: Adil Zouitine Date: Tue, 15 Jul 2025 10:28:19 +0200 Subject: [PATCH 07/20] fix(mps): gradient exploding and nan loss issues with ACT (#1490) Co-authored-by: Michel Aractingi --- src/lerobot/policies/act/modeling_act.py | 15 ++++++--------- src/lerobot/scripts/train.py | 4 ++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py index f66c8ae82..aa81d3cd2 100644 --- a/src/lerobot/policies/act/modeling_act.py +++ b/src/lerobot/policies/act/modeling_act.py @@ -485,12 +485,10 @@ class ACT(nn.Module): self.encoder_env_state_input_proj(batch["observation.environment_state"]) ) - # Camera observation features and positional embeddings. if self.config.image_features: - all_cam_features = [] - all_cam_pos_embeds = [] - # For a list of images, the H and W may vary but H*W is constant. + # NOTE: If modifying this section, verify on MPS devices that + # gradients remain stable (no explosions or NaNs). for img in batch["observation.images"]: cam_features = self.backbone(img)["feature_map"] cam_pos_embed = self.encoder_cam_feat_pos_embed(cam_features).to(dtype=cam_features.dtype) @@ -500,11 +498,10 @@ class ACT(nn.Module): cam_features = einops.rearrange(cam_features, "b c h w -> (h w) b c") cam_pos_embed = einops.rearrange(cam_pos_embed, "b c h w -> (h w) b c") - all_cam_features.append(cam_features) - all_cam_pos_embeds.append(cam_pos_embed) - - encoder_in_tokens.extend(torch.cat(all_cam_features, axis=0)) - encoder_in_pos_embed.extend(torch.cat(all_cam_pos_embeds, axis=0)) + # Extend immediately instead of accumulating and concatenating + # Convert to list to extend properly + encoder_in_tokens.extend(list(cam_features)) + encoder_in_pos_embed.extend(list(cam_pos_embed)) # Stack all tokens along the sequence dimension. encoder_in_tokens = torch.stack(encoder_in_tokens, axis=0) diff --git a/src/lerobot/scripts/train.py b/src/lerobot/scripts/train.py index 2f2e88de6..f09d231a8 100644 --- a/src/lerobot/scripts/train.py +++ b/src/lerobot/scripts/train.py @@ -180,7 +180,7 @@ def train(cfg: TrainPipelineConfig): batch_size=cfg.batch_size, shuffle=shuffle, sampler=sampler, - pin_memory=device.type != "cpu", + pin_memory=device.type == "cuda", drop_last=False, ) dl_iter = cycle(dataloader) @@ -207,7 +207,7 @@ def train(cfg: TrainPipelineConfig): for key in batch: if isinstance(batch[key], torch.Tensor): - batch[key] = batch[key].to(device, non_blocking=True) + batch[key] = batch[key].to(device, non_blocking=device.type == "cuda") train_tracker, output_dict = update_policy( train_tracker, From 724874e063ecfb892bbcbc4a5e16fde5860cb28c Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:27:01 +0200 Subject: [PATCH 08/20] Fix tests (#1510) --- tests/motors/test_feetech.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/motors/test_feetech.py b/tests/motors/test_feetech.py index 7f9e5dd7b..c5a170dd9 100644 --- a/tests/motors/test_feetech.py +++ b/tests/motors/test_feetech.py @@ -219,7 +219,7 @@ def test__write(addr, length, id_, value, mock_motors, dummy_motors): comm, error = bus._write(addr, length, id_, value) - assert mock_motors.stubs[stub].called + assert mock_motors.stubs[stub].wait_called() assert comm == scs.COMM_SUCCESS assert error == 0 @@ -371,9 +371,9 @@ def test_reset_calibration(mock_motors, dummy_motors): bus.reset_calibration() - assert all(mock_motors.stubs[stub].called for stub in write_homing_stubs) - assert all(mock_motors.stubs[stub].called for stub in write_mins_stubs) - assert all(mock_motors.stubs[stub].called for stub in write_maxes_stubs) + assert all(mock_motors.stubs[stub].wait_called() for stub in write_homing_stubs) + assert all(mock_motors.stubs[stub].wait_called() for stub in write_mins_stubs) + assert all(mock_motors.stubs[stub].wait_called() for stub in write_maxes_stubs) def test_set_half_turn_homings(mock_motors, dummy_motors): @@ -410,7 +410,7 @@ def test_set_half_turn_homings(mock_motors, dummy_motors): bus.reset_calibration.assert_called_once() assert mock_motors.stubs[read_pos_stub].called - assert all(mock_motors.stubs[stub].called for stub in write_homing_stubs) + assert all(mock_motors.stubs[stub].wait_called() for stub in write_homing_stubs) def test_record_ranges_of_motion(mock_motors, dummy_motors): From 1b878c9155d7ff9783929d55035c942dd8bb7933 Mon Sep 17 00:00:00 2001 From: aka <47563398+todateman@users.noreply.github.com> Date: Tue, 15 Jul 2025 18:33:02 +0900 Subject: [PATCH 09/20] fix(record): Improve OpenCV backend handling for Windows systems (#1495) * fix(record): Improve OpenCV backend handling for Windows systems * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Resolved ruff's E402 error (import statements not at the beginning of the file): - Moved all import statements to the beginning of the file - Defined _fix_opencv_backend() as a function - Adjusted the timing of the fix call - Code structure conforming to ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix(record): Correct OpenCV backend for Windows systems * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix(opencv): Set OpenCV environment variable for Windows systems * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix(opencv): Refactor MSMF hardware transform environment variable setting for Windows * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/lerobot/cameras/opencv/camera_opencv.py | 4 ++++ src/lerobot/cameras/utils.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lerobot/cameras/opencv/camera_opencv.py b/src/lerobot/cameras/opencv/camera_opencv.py index fd99922a4..1d7a1645d 100644 --- a/src/lerobot/cameras/opencv/camera_opencv.py +++ b/src/lerobot/cameras/opencv/camera_opencv.py @@ -18,12 +18,16 @@ Provides the OpenCVCamera class for capturing frames from cameras using OpenCV. import logging import math +import os import platform import time from pathlib import Path from threading import Event, Lock, Thread from typing import Any, Dict, List +# Fix MSMF hardware transform compatibility for Windows before importing cv2 +if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS" not in os.environ: + os.environ["OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS"] = "0" import cv2 import numpy as np diff --git a/src/lerobot/cameras/utils.py b/src/lerobot/cameras/utils.py index f8bbd6e70..1eb69840b 100644 --- a/src/lerobot/cameras/utils.py +++ b/src/lerobot/cameras/utils.py @@ -60,6 +60,8 @@ def get_cv2_backend() -> int: import cv2 if platform.system() == "Windows": - return cv2.CAP_AVFOUNDATION - else: + return cv2.CAP_MSMF # Use MSMF for Windows instead of AVFOUNDATION + # elif platform.system() == "Darwin": # macOS + # return cv2.CAP_AVFOUNDATION + else: # Linux and others return cv2.CAP_ANY From c4c0105a474008e6bfd0bfd4e6b4c8fa94684e82 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 12:28:22 +0200 Subject: [PATCH 10/20] [pre-commit.ci] pre-commit autoupdate (#1327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/adhtruong/mirrors-typos: v1.33.1 → v1.34.0](https://github.com/adhtruong/mirrors-typos/compare/v1.33.1...v1.34.0) - [github.com/astral-sh/ruff-pre-commit: v0.11.13 → v0.12.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.11.13...v0.12.3) - [github.com/woodruffw/zizmor-pre-commit: v1.9.0 → v1.11.0](https://github.com/woodruffw/zizmor-pre-commit/compare/v1.9.0...v1.11.0) - [github.com/PyCQA/bandit: 1.8.3 → 1.8.6](https://github.com/PyCQA/bandit/compare/1.8.3...1.8.6) * Ignore B615 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> --- .pre-commit-config.yaml | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e1f971d39..e25f33ee0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/adhtruong/mirrors-typos - rev: v1.33.1 + rev: v1.34.0 hooks: - id: typos args: [--force-exclude] @@ -48,7 +48,7 @@ repos: - id: pyupgrade - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.13 + rev: v0.12.3 hooks: - id: ruff args: [--fix] @@ -62,12 +62,12 @@ repos: - id: gitleaks - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.9.0 + rev: v1.11.0 hooks: - id: zizmor - repo: https://github.com/PyCQA/bandit - rev: 1.8.3 + rev: 1.8.6 hooks: - id: bandit args: ["-c", "pyproject.toml"] diff --git a/pyproject.toml b/pyproject.toml index 81cb22a21..878a36dbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,7 +133,7 @@ exclude_dirs = [ "src/lerobot/policies/pi0/conversion_scripts", "src/lerobot/scripts/push_dataset_to_hub.py", ] -skips = ["B101", "B311", "B404", "B603"] +skips = ["B101", "B311", "B404", "B603", "B615"] [tool.typos] default.extend-ignore-re = [ From 1c0ac8e3415adff7411846c73c6e9dfb94941eb1 Mon Sep 17 00:00:00 2001 From: Ben Zhang <5977478+ben-z@users.noreply.github.com> Date: Tue, 15 Jul 2025 03:29:07 -0700 Subject: [PATCH 11/20] Parse draccus subclass overrides when using `--policy.path` (#1501) * Parse draccus subclass overrides when using --policy.path * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> --- src/lerobot/configs/policies.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py index 36e6ea2e5..05f3296b8 100644 --- a/src/lerobot/configs/policies.py +++ b/src/lerobot/configs/policies.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import abc +import json import logging import os +import tempfile from dataclasses import dataclass, field from pathlib import Path from typing import Type, TypeVar @@ -183,8 +185,22 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): f"{CONFIG_NAME} not found on the HuggingFace Hub in {model_id}" ) from e - # HACK: this is very ugly, ideally we'd like to be able to do that natively with draccus + # HACK: Parse the original config to get the config subclass, so that we can + # apply cli overrides. + # This is very ugly, ideally we'd like to be able to do that natively with draccus # something like --policy.path (in addition to --policy.type) - cli_overrides = policy_kwargs.pop("cli_overrides", []) with draccus.config_type("json"): - return draccus.parse(cls, config_file, args=cli_overrides) + orig_config = draccus.parse(cls, config_file, args=[]) + + with open(config_file) as f: + config = json.load(f) + + config.pop("type") + with tempfile.NamedTemporaryFile("w+") as f: + json.dump(config, f) + config_file = f.name + f.flush() + + cli_overrides = policy_kwargs.pop("cli_overrides", []) + with draccus.config_type("json"): + return draccus.parse(orig_config.__class__, config_file, args=cli_overrides) From 3efb4410f10c0b21f6e07456b35a9a3b07a1399d Mon Sep 17 00:00:00 2001 From: Eugene Mironov Date: Wed, 16 Jul 2025 02:23:00 +0700 Subject: [PATCH 12/20] Fix logging for mps in auto_select_torch_device (#1513) --- src/lerobot/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lerobot/utils/utils.py b/src/lerobot/utils/utils.py index 2e94a9c93..7a9717dce 100644 --- a/src/lerobot/utils/utils.py +++ b/src/lerobot/utils/utils.py @@ -48,7 +48,7 @@ def auto_select_torch_device() -> torch.device: logging.info("Cuda backend detected, using cuda.") return torch.device("cuda") elif torch.backends.mps.is_available(): - logging.info("Metal backend detected, using cuda.") + logging.info("Metal backend detected, using mps.") return torch.device("mps") else: logging.warning("No accelerated backend detected. Using default cpu, this will be slow.") From dfb1571bcf3a84d19ac84855942e72ac2fe04431 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Wed, 16 Jul 2025 11:31:25 +0200 Subject: [PATCH 13/20] Added missing licenses (#1517) * Added missing liscenses --- src/lerobot/motors/dynamixel/__init__.py | 16 ++++++++++++++++ src/lerobot/motors/feetech/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/hope_jr/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/koch_follower/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/lekiwi/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/so100_follower/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/so101_follower/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/stretch3/__init__.py | 16 ++++++++++++++++ src/lerobot/robots/viperx/__init__.py | 16 ++++++++++++++++ src/lerobot/teleoperators/__init__.py | 16 ++++++++++++++++ src/lerobot/teleoperators/homunculus/__init__.py | 16 ++++++++++++++++ src/lerobot/teleoperators/keyboard/__init__.py | 16 ++++++++++++++++ .../teleoperators/koch_leader/__init__.py | 16 ++++++++++++++++ .../teleoperators/so100_leader/__init__.py | 16 ++++++++++++++++ .../teleoperators/so101_leader/__init__.py | 16 ++++++++++++++++ .../teleoperators/stretch3_gamepad/__init__.py | 16 ++++++++++++++++ src/lerobot/teleoperators/widowx/__init__.py | 16 ++++++++++++++++ tests/configs/test_plugin_loading.py | 16 ++++++++++++++++ tests/mocks/mock_dynamixel.py | 16 ++++++++++++++++ tests/mocks/mock_feetech.py | 16 ++++++++++++++++ tests/mocks/mock_motors_bus.py | 14 ++++++++++++++ tests/mocks/mock_robot.py | 16 ++++++++++++++++ tests/mocks/mock_serial_patch.py | 16 ++++++++++++++++ tests/mocks/mock_teleop.py | 16 ++++++++++++++++ tests/motors/test_dynamixel.py | 16 ++++++++++++++++ tests/motors/test_feetech.py | 16 ++++++++++++++++ tests/motors/test_motors_bus.py | 16 ++++++++++++++++ tests/robots/test_so100_follower.py | 16 ++++++++++++++++ tests/test_control_robot.py | 16 ++++++++++++++++ tests/utils/test_encoding_utils.py | 16 ++++++++++++++++ 30 files changed, 478 insertions(+) diff --git a/src/lerobot/motors/dynamixel/__init__.py b/src/lerobot/motors/dynamixel/__init__.py index 3e414557e..425f8538a 100644 --- a/src/lerobot/motors/dynamixel/__init__.py +++ b/src/lerobot/motors/dynamixel/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .dynamixel import DriveMode, DynamixelMotorsBus, OperatingMode, TorqueMode from .tables import * diff --git a/src/lerobot/motors/feetech/__init__.py b/src/lerobot/motors/feetech/__init__.py index 911d1d19f..75da2d221 100644 --- a/src/lerobot/motors/feetech/__init__.py +++ b/src/lerobot/motors/feetech/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .feetech import DriveMode, FeetechMotorsBus, OperatingMode, TorqueMode from .tables import * diff --git a/src/lerobot/robots/hope_jr/__init__.py b/src/lerobot/robots/hope_jr/__init__.py index 324e3c8e8..26603ebb0 100644 --- a/src/lerobot/robots/hope_jr/__init__.py +++ b/src/lerobot/robots/hope_jr/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_hope_jr import HopeJrArmConfig, HopeJrHandConfig from .hope_jr_arm import HopeJrArm from .hope_jr_hand import HopeJrHand diff --git a/src/lerobot/robots/koch_follower/__init__.py b/src/lerobot/robots/koch_follower/__init__.py index ae98a2c38..6271c4e55 100644 --- a/src/lerobot/robots/koch_follower/__init__.py +++ b/src/lerobot/robots/koch_follower/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_koch_follower import KochFollowerConfig from .koch_follower import KochFollower diff --git a/src/lerobot/robots/lekiwi/__init__.py b/src/lerobot/robots/lekiwi/__init__.py index e3d10c5c1..ada2ff368 100644 --- a/src/lerobot/robots/lekiwi/__init__.py +++ b/src/lerobot/robots/lekiwi/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_lekiwi import LeKiwiClientConfig, LeKiwiConfig from .lekiwi import LeKiwi from .lekiwi_client import LeKiwiClient diff --git a/src/lerobot/robots/so100_follower/__init__.py b/src/lerobot/robots/so100_follower/__init__.py index 63c3e1c17..b995aab13 100644 --- a/src/lerobot/robots/so100_follower/__init__.py +++ b/src/lerobot/robots/so100_follower/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_so100_follower import SO100FollowerConfig, SO100FollowerEndEffectorConfig from .so100_follower import SO100Follower from .so100_follower_end_effector import SO100FollowerEndEffector diff --git a/src/lerobot/robots/so101_follower/__init__.py b/src/lerobot/robots/so101_follower/__init__.py index f6615b15b..9ff2baf45 100644 --- a/src/lerobot/robots/so101_follower/__init__.py +++ b/src/lerobot/robots/so101_follower/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_so101_follower import SO101FollowerConfig from .so101_follower import SO101Follower diff --git a/src/lerobot/robots/stretch3/__init__.py b/src/lerobot/robots/stretch3/__init__.py index e2a859cde..b3070bbd6 100644 --- a/src/lerobot/robots/stretch3/__init__.py +++ b/src/lerobot/robots/stretch3/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .configuration_stretch3 import Stretch3RobotConfig from .robot_stretch3 import Stretch3Robot diff --git a/src/lerobot/robots/viperx/__init__.py b/src/lerobot/robots/viperx/__init__.py index 522d02f1c..bfba07fc7 100644 --- a/src/lerobot/robots/viperx/__init__.py +++ b/src/lerobot/robots/viperx/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_viperx import ViperXConfig from .viperx import ViperX diff --git a/src/lerobot/teleoperators/__init__.py b/src/lerobot/teleoperators/__init__.py index ec93547f7..56f48af7e 100644 --- a/src/lerobot/teleoperators/__init__.py +++ b/src/lerobot/teleoperators/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config import TeleoperatorConfig from .teleoperator import Teleoperator from .utils import make_teleoperator_from_config diff --git a/src/lerobot/teleoperators/homunculus/__init__.py b/src/lerobot/teleoperators/homunculus/__init__.py index 04b5c0f2b..b3c6c0bf5 100644 --- a/src/lerobot/teleoperators/homunculus/__init__.py +++ b/src/lerobot/teleoperators/homunculus/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_homunculus import HomunculusArmConfig, HomunculusGloveConfig from .homunculus_arm import HomunculusArm from .homunculus_glove import HomunculusGlove diff --git a/src/lerobot/teleoperators/keyboard/__init__.py b/src/lerobot/teleoperators/keyboard/__init__.py index 5761bf788..72d01003a 100644 --- a/src/lerobot/teleoperators/keyboard/__init__.py +++ b/src/lerobot/teleoperators/keyboard/__init__.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .configuration_keyboard import KeyboardEndEffectorTeleopConfig, KeyboardTeleopConfig from .teleop_keyboard import KeyboardEndEffectorTeleop, KeyboardTeleop diff --git a/src/lerobot/teleoperators/koch_leader/__init__.py b/src/lerobot/teleoperators/koch_leader/__init__.py index ad2d6a0e4..1bf9d51db 100644 --- a/src/lerobot/teleoperators/koch_leader/__init__.py +++ b/src/lerobot/teleoperators/koch_leader/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_koch_leader import KochLeaderConfig from .koch_leader import KochLeader diff --git a/src/lerobot/teleoperators/so100_leader/__init__.py b/src/lerobot/teleoperators/so100_leader/__init__.py index 63c877e60..747416be2 100644 --- a/src/lerobot/teleoperators/so100_leader/__init__.py +++ b/src/lerobot/teleoperators/so100_leader/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_so100_leader import SO100LeaderConfig from .so100_leader import SO100Leader diff --git a/src/lerobot/teleoperators/so101_leader/__init__.py b/src/lerobot/teleoperators/so101_leader/__init__.py index 1f45170e9..11e277c91 100644 --- a/src/lerobot/teleoperators/so101_leader/__init__.py +++ b/src/lerobot/teleoperators/so101_leader/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_so101_leader import SO101LeaderConfig from .so101_leader import SO101Leader diff --git a/src/lerobot/teleoperators/stretch3_gamepad/__init__.py b/src/lerobot/teleoperators/stretch3_gamepad/__init__.py index ac45b6dd4..fa5a19974 100644 --- a/src/lerobot/teleoperators/stretch3_gamepad/__init__.py +++ b/src/lerobot/teleoperators/stretch3_gamepad/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .configuration_stretch3 import Stretch3GamePadConfig from .stretch3_gamepad import Stretch3GamePad diff --git a/src/lerobot/teleoperators/widowx/__init__.py b/src/lerobot/teleoperators/widowx/__init__.py index 122ee3290..42e312f49 100644 --- a/src/lerobot/teleoperators/widowx/__init__.py +++ b/src/lerobot/teleoperators/widowx/__init__.py @@ -1,2 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .config_widowx import WidowXConfig from .widowx import WidowX diff --git a/tests/configs/test_plugin_loading.py b/tests/configs/test_plugin_loading.py index 957574eb4..e81057c95 100644 --- a/tests/configs/test_plugin_loading.py +++ b/tests/configs/test_plugin_loading.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from dataclasses import dataclass from pathlib import Path diff --git a/tests/mocks/mock_dynamixel.py b/tests/mocks/mock_dynamixel.py index 64592439f..00403d146 100644 --- a/tests/mocks/mock_dynamixel.py +++ b/tests/mocks/mock_dynamixel.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import abc from typing import Callable diff --git a/tests/mocks/mock_feetech.py b/tests/mocks/mock_feetech.py index e0b677d57..041c09421 100644 --- a/tests/mocks/mock_feetech.py +++ b/tests/mocks/mock_feetech.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import abc from typing import Callable diff --git a/tests/mocks/mock_motors_bus.py b/tests/mocks/mock_motors_bus.py index 91e33473d..a499dbfee 100644 --- a/tests/mocks/mock_motors_bus.py +++ b/tests/mocks/mock_motors_bus.py @@ -1,3 +1,17 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # ruff: noqa: N802 from lerobot.motors.motors_bus import ( diff --git a/tests/mocks/mock_robot.py b/tests/mocks/mock_robot.py index 971fc00ad..8108c7c25 100644 --- a/tests/mocks/mock_robot.py +++ b/tests/mocks/mock_robot.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import random from dataclasses import dataclass, field from functools import cached_property diff --git a/tests/mocks/mock_serial_patch.py b/tests/mocks/mock_serial_patch.py index e39923188..bde0efae2 100644 --- a/tests/mocks/mock_serial_patch.py +++ b/tests/mocks/mock_serial_patch.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import threading import time diff --git a/tests/mocks/mock_teleop.py b/tests/mocks/mock_teleop.py index c29cc9219..e37d4a2c5 100644 --- a/tests/mocks/mock_teleop.py +++ b/tests/mocks/mock_teleop.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import random from dataclasses import dataclass from functools import cached_property diff --git a/tests/motors/test_dynamixel.py b/tests/motors/test_dynamixel.py index a54e49056..d990b5b0f 100644 --- a/tests/motors/test_dynamixel.py +++ b/tests/motors/test_dynamixel.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re import sys from typing import Generator diff --git a/tests/motors/test_feetech.py b/tests/motors/test_feetech.py index c5a170dd9..d6ea1db20 100644 --- a/tests/motors/test_feetech.py +++ b/tests/motors/test_feetech.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re import sys from typing import Generator diff --git a/tests/motors/test_motors_bus.py b/tests/motors/test_motors_bus.py index 966af3fb0..27650ef1b 100644 --- a/tests/motors/test_motors_bus.py +++ b/tests/motors/test_motors_bus.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re from unittest.mock import patch diff --git a/tests/robots/test_so100_follower.py b/tests/robots/test_so100_follower.py index 498eec94b..d76b9591a 100644 --- a/tests/robots/test_so100_follower.py +++ b/tests/robots/test_so100_follower.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from contextlib import contextmanager from unittest.mock import MagicMock, patch diff --git a/tests/test_control_robot.py b/tests/test_control_robot.py index 2ec6a2905..e45688c14 100644 --- a/tests/test_control_robot.py +++ b/tests/test_control_robot.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from lerobot.calibrate import CalibrateConfig, calibrate from lerobot.record import DatasetRecordConfig, RecordConfig, record from lerobot.replay import DatasetReplayConfig, ReplayConfig, replay diff --git a/tests/utils/test_encoding_utils.py b/tests/utils/test_encoding_utils.py index 9c9796762..813942862 100644 --- a/tests/utils/test_encoding_utils.py +++ b/tests/utils/test_encoding_utils.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from lerobot.utils.encoding_utils import ( From 816034948af0fcd2cdfb2ae51ea41eb42d98101a Mon Sep 17 00:00:00 2001 From: Eugene Mironov Date: Wed, 16 Jul 2025 21:13:01 +0700 Subject: [PATCH 14/20] [Async Inference] Add gRPC retry mechanism to Async client (#1485) Co-authored-by: Michel Aractingi --- src/lerobot/scripts/rl/actor.py | 31 ++-------------- src/lerobot/scripts/rl/learner.py | 5 +-- src/lerobot/scripts/rl/learner_service.py | 1 - src/lerobot/scripts/server/robot_client.py | 5 ++- src/lerobot/transport/utils.py | 41 ++++++++++++++++++++++ 5 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/lerobot/scripts/rl/actor.py b/src/lerobot/scripts/rl/actor.py index cd5e286c0..1c8f9286b 100644 --- a/src/lerobot/scripts/rl/actor.py +++ b/src/lerobot/scripts/rl/actor.py @@ -63,12 +63,12 @@ from lerobot.configs.train import TrainRLServerPipelineConfig from lerobot.policies.factory import make_policy from lerobot.policies.sac.modeling_sac import SACPolicy from lerobot.robots import so100_follower # noqa: F401 -from lerobot.scripts.rl import learner_service from lerobot.scripts.rl.gym_manipulator import make_robot_env from lerobot.teleoperators import gamepad, so101_leader # noqa: F401 from lerobot.transport import services_pb2, services_pb2_grpc from lerobot.transport.utils import ( bytes_to_state_dict, + grpc_channel_options, python_object_to_bytes, receive_bytes_in_chunks, send_bytes_in_chunks, @@ -399,8 +399,6 @@ def learner_service_client( host: str = "127.0.0.1", port: int = 50051, ) -> tuple[services_pb2_grpc.LearnerServiceStub, grpc.Channel]: - import json - """ Returns a client for the learner service. @@ -408,34 +406,9 @@ def learner_service_client( So we need to create only one client and reuse it. """ - service_config = { - "methodConfig": [ - { - "name": [{}], # Applies to ALL methods in ALL services - "retryPolicy": { - "maxAttempts": 5, # Max retries (total attempts = 5) - "initialBackoff": "0.1s", # First retry after 0.1s - "maxBackoff": "2s", # Max wait time between retries - "backoffMultiplier": 2, # Exponential backoff factor - "retryableStatusCodes": [ - "UNAVAILABLE", - "DEADLINE_EXCEEDED", - ], # Retries on network failures - }, - } - ] - } - - service_config_json = json.dumps(service_config) - channel = grpc.insecure_channel( f"{host}:{port}", - options=[ - ("grpc.max_receive_message_length", learner_service.MAX_MESSAGE_SIZE), - ("grpc.max_send_message_length", learner_service.MAX_MESSAGE_SIZE), - ("grpc.enable_retries", 1), - ("grpc.service_config", service_config_json), - ], + grpc_channel_options(), ) stub = services_pb2_grpc.LearnerServiceStub(channel) logging.info("[ACTOR] Learner service client created") diff --git a/src/lerobot/scripts/rl/learner.py b/src/lerobot/scripts/rl/learner.py index edd2363b1..cb88895cf 100644 --- a/src/lerobot/scripts/rl/learner.py +++ b/src/lerobot/scripts/rl/learner.py @@ -77,6 +77,7 @@ from lerobot.scripts.rl import learner_service from lerobot.teleoperators import gamepad, so101_leader # noqa: F401 from lerobot.transport import services_pb2_grpc from lerobot.transport.utils import ( + MAX_MESSAGE_SIZE, bytes_to_python_object, bytes_to_transitions, state_to_bytes, @@ -658,8 +659,8 @@ def start_learner( server = grpc.server( ThreadPoolExecutor(max_workers=learner_service.MAX_WORKERS), options=[ - ("grpc.max_receive_message_length", learner_service.MAX_MESSAGE_SIZE), - ("grpc.max_send_message_length", learner_service.MAX_MESSAGE_SIZE), + ("grpc.max_receive_message_length", MAX_MESSAGE_SIZE), + ("grpc.max_send_message_length", MAX_MESSAGE_SIZE), ], ) diff --git a/src/lerobot/scripts/rl/learner_service.py b/src/lerobot/scripts/rl/learner_service.py index 198e52945..b07c296e6 100644 --- a/src/lerobot/scripts/rl/learner_service.py +++ b/src/lerobot/scripts/rl/learner_service.py @@ -23,7 +23,6 @@ from lerobot.transport import services_pb2, services_pb2_grpc from lerobot.transport.utils import receive_bytes_in_chunks, send_bytes_in_chunks from lerobot.utils.queue import get_last_item_from_queue -MAX_MESSAGE_SIZE = 4 * 1024 * 1024 # 4 MB MAX_WORKERS = 3 # Stream parameters, send transitions and interactions SHUTDOWN_TIMEOUT = 10 diff --git a/src/lerobot/scripts/server/robot_client.py b/src/lerobot/scripts/server/robot_client.py index a6d7b7242..44d9cdf77 100644 --- a/src/lerobot/scripts/server/robot_client.py +++ b/src/lerobot/scripts/server/robot_client.py @@ -76,6 +76,7 @@ from lerobot.transport import ( async_inference_pb2, # type: ignore async_inference_pb2_grpc, # type: ignore ) +from lerobot.transport.utils import grpc_channel_options class RobotClient: @@ -113,7 +114,9 @@ class RobotClient: config.actions_per_chunk, config.policy_device, ) - self.channel = grpc.insecure_channel(self.server_address) + self.channel = grpc.insecure_channel( + self.server_address, grpc_channel_options(initial_backoff=f"{config.environment_dt:.4f}s") + ) self.stub = async_inference_pb2_grpc.AsyncInferenceStub(self.channel) self.logger.info(f"Initializing client to connect to server at {self.server_address}") diff --git a/src/lerobot/transport/utils.py b/src/lerobot/transport/utils.py index 1c6683262..bf1aab755 100644 --- a/src/lerobot/transport/utils.py +++ b/src/lerobot/transport/utils.py @@ -16,6 +16,7 @@ # limitations under the License. import io +import json import logging import pickle # nosec B403: Safe usage for internal serialization only from multiprocessing import Event, Queue @@ -27,6 +28,7 @@ from lerobot.transport import services_pb2 from lerobot.utils.transition import Transition CHUNK_SIZE = 2 * 1024 * 1024 # 2 MB +MAX_MESSAGE_SIZE = 4 * 1024 * 1024 # 4 MB def bytes_buffer_size(buffer: io.BytesIO) -> int: @@ -139,3 +141,42 @@ def transitions_to_bytes(transitions: list[Transition]) -> bytes: buffer = io.BytesIO() torch.save(transitions, buffer) return buffer.getvalue() + + +def grpc_channel_options( + max_receive_message_length: int = MAX_MESSAGE_SIZE, + max_send_message_length: int = MAX_MESSAGE_SIZE, + enable_retries: bool = True, + initial_backoff: str = "0.1s", + max_attempts: int = 5, + backoff_multiplier: float = 2, + max_backoff: str = "2s", +): + service_config = { + "methodConfig": [ + { + "name": [{}], # Applies to ALL methods in ALL services + "retryPolicy": { + "maxAttempts": max_attempts, # Max retries (total attempts = 5) + "initialBackoff": initial_backoff, # First retry after 0.1s + "maxBackoff": max_backoff, # Max wait time between retries + "backoffMultiplier": backoff_multiplier, # Exponential backoff factor + "retryableStatusCodes": [ + "UNAVAILABLE", + "DEADLINE_EXCEEDED", + ], # Retries on network failures + }, + } + ] + } + + service_config_json = json.dumps(service_config) + + retries_option = 1 if enable_retries else 0 + + return [ + ("grpc.max_receive_message_length", max_receive_message_length), + ("grpc.max_send_message_length", max_send_message_length), + ("grpc.enable_retries", retries_option), + ("grpc.service_config", service_config_json), + ] From 0938a1d816c70d689ab6df3298a71572798c15a6 Mon Sep 17 00:00:00 2001 From: Pepijn <138571049+pkooij@users.noreply.github.com> Date: Wed, 16 Jul 2025 17:50:36 +0200 Subject: [PATCH 15/20] Feat/add bimanual so100 robot (#1509) --- src/lerobot/record.py | 24 +++ src/lerobot/replay.py | 15 +- .../robots/bi_so100_follower/__init__.py | 18 ++ .../bi_so100_follower/bi_so100_follower.py | 163 ++++++++++++++++++ .../config_bi_so100_follower.py | 39 +++++ .../so100_follower/config_so100_follower.py | 4 +- src/lerobot/robots/utils.py | 4 + src/lerobot/teleoperate.py | 23 +++ .../teleoperators/bi_so100_leader/__init__.py | 18 ++ .../bi_so100_leader/bi_so100_leader.py | 121 +++++++++++++ .../bi_so100_leader/config_bi_so100_leader.py | 26 +++ src/lerobot/teleoperators/utils.py | 4 + 12 files changed, 457 insertions(+), 2 deletions(-) create mode 100644 src/lerobot/robots/bi_so100_follower/__init__.py create mode 100644 src/lerobot/robots/bi_so100_follower/bi_so100_follower.py create mode 100644 src/lerobot/robots/bi_so100_follower/config_bi_so100_follower.py create mode 100644 src/lerobot/teleoperators/bi_so100_leader/__init__.py create mode 100644 src/lerobot/teleoperators/bi_so100_leader/bi_so100_leader.py create mode 100644 src/lerobot/teleoperators/bi_so100_leader/config_bi_so100_leader.py diff --git a/src/lerobot/record.py b/src/lerobot/record.py index 9fc0dc7ed..c8184d40b 100644 --- a/src/lerobot/record.py +++ b/src/lerobot/record.py @@ -33,6 +33,28 @@ python -m lerobot.record \ # <- Policy optional if you want to record with a policy \ # --policy.path=${HF_USER}/my_policy \ ``` + +Example recording with bimanual so100: +```shell +python -m lerobot.record \ + --robot.type=bi_so100_follower \ + --robot.left_arm_port=/dev/tty.usbmodem5A460851411 \ + --robot.right_arm_port=/dev/tty.usbmodem5A460812391 \ + --robot.id=bimanual_follower \ + --robot.cameras='{ + left: {"type": "opencv", "index_or_path": 0, "width": 640, "height": 480, "fps": 30}, + top: {"type": "opencv", "index_or_path": 1, "width": 640, "height": 480, "fps": 30}, + right: {"type": "opencv", "index_or_path": 2, "width": 640, "height": 480, "fps": 30} + }' \ + --teleop.type=bi_so100_leader \ + --teleop.left_arm_port=/dev/tty.usbmodem5A460828611 \ + --teleop.right_arm_port=/dev/tty.usbmodem5A460826981 \ + --teleop.id=bimanual_leader \ + --display_data=true \ + --dataset.repo_id=${HF_USER}/bimanual-so100-handover-cube \ + --dataset.num_episodes=25 \ + --dataset.single_task="Grab and handover the red cube to the other arm" +``` """ import logging @@ -57,6 +79,7 @@ from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.robots import ( # noqa: F401 Robot, RobotConfig, + bi_so100_follower, hope_jr, koch_follower, make_robot_from_config, @@ -66,6 +89,7 @@ from lerobot.robots import ( # noqa: F401 from lerobot.teleoperators import ( # noqa: F401 Teleoperator, TeleoperatorConfig, + bi_so100_leader, homunculus, koch_leader, make_teleoperator_from_config, diff --git a/src/lerobot/replay.py b/src/lerobot/replay.py index c51c55cee..afe54d90f 100644 --- a/src/lerobot/replay.py +++ b/src/lerobot/replay.py @@ -15,7 +15,7 @@ """ Replays the actions of an episode from a dataset on a robot. -Example: +Examples: ```shell python -m lerobot.replay \ @@ -25,6 +25,18 @@ python -m lerobot.replay \ --dataset.repo_id=aliberts/record-test \ --dataset.episode=2 ``` + +Example replay with bimanual so100: +```shell +python -m lerobot.replay \ + --robot.type=bi_so100_follower \ + --robot.left_arm_port=/dev/tty.usbmodem5A460851411 \ + --robot.right_arm_port=/dev/tty.usbmodem5A460812391 \ + --robot.id=bimanual_follower \ + --dataset.repo_id=${HF_USER}/bimanual-so100-handover-cube \ + --dataset.episode=0 +``` + """ import logging @@ -39,6 +51,7 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.robots import ( # noqa: F401 Robot, RobotConfig, + bi_so100_follower, hope_jr, koch_follower, make_robot_from_config, diff --git a/src/lerobot/robots/bi_so100_follower/__init__.py b/src/lerobot/robots/bi_so100_follower/__init__.py new file mode 100644 index 000000000..90f56516b --- /dev/null +++ b/src/lerobot/robots/bi_so100_follower/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .bi_so100_follower import BiSO100Follower +from .config_bi_so100_follower import BiSO100FollowerConfig diff --git a/src/lerobot/robots/bi_so100_follower/bi_so100_follower.py b/src/lerobot/robots/bi_so100_follower/bi_so100_follower.py new file mode 100644 index 000000000..7992b79fd --- /dev/null +++ b/src/lerobot/robots/bi_so100_follower/bi_so100_follower.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time +from functools import cached_property +from typing import Any + +from lerobot.cameras.utils import make_cameras_from_configs +from lerobot.robots.so100_follower import SO100Follower +from lerobot.robots.so100_follower.config_so100_follower import SO100FollowerConfig + +from ..robot import Robot +from .config_bi_so100_follower import BiSO100FollowerConfig + +logger = logging.getLogger(__name__) + + +class BiSO100Follower(Robot): + """ + [Bimanual SO-100 Follower Arms](https://github.com/TheRobotStudio/SO-ARM100) designed by TheRobotStudio + This bimanual robot can also be easily adapted to use SO-101 follower arms, just replace the SO100Follower class with SO101Follower and SO100FollowerConfig with SO101FollowerConfig. + """ + + config_class = BiSO100FollowerConfig + name = "bi_so100_follower" + + def __init__(self, config: BiSO100FollowerConfig): + super().__init__(config) + self.config = config + + left_arm_config = SO100FollowerConfig( + id=f"{config.id}_left" if config.id else None, + calibration_dir=config.calibration_dir, + port=config.left_arm_port, + disable_torque_on_disconnect=config.left_arm_disable_torque_on_disconnect, + max_relative_target=config.left_arm_max_relative_target, + use_degrees=config.left_arm_use_degrees, + cameras={}, + ) + + right_arm_config = SO100FollowerConfig( + id=f"{config.id}_right" if config.id else None, + calibration_dir=config.calibration_dir, + port=config.right_arm_port, + disable_torque_on_disconnect=config.right_arm_disable_torque_on_disconnect, + max_relative_target=config.right_arm_max_relative_target, + use_degrees=config.right_arm_use_degrees, + cameras={}, + ) + + self.left_arm = SO100Follower(left_arm_config) + self.right_arm = SO100Follower(right_arm_config) + self.cameras = make_cameras_from_configs(config.cameras) + + @property + def _motors_ft(self) -> dict[str, type]: + return {f"left_{motor}.pos": float for motor in self.left_arm.bus.motors} | { + f"right_{motor}.pos": float for motor in self.right_arm.bus.motors + } + + @property + def _cameras_ft(self) -> dict[str, tuple]: + return { + cam: (self.config.cameras[cam].height, self.config.cameras[cam].width, 3) for cam in self.cameras + } + + @cached_property + def observation_features(self) -> dict[str, type | tuple]: + return {**self._motors_ft, **self._cameras_ft} + + @cached_property + def action_features(self) -> dict[str, type]: + return self._motors_ft + + @property + def is_connected(self) -> bool: + return ( + self.left_arm.bus.is_connected + and self.right_arm.bus.is_connected + and all(cam.is_connected for cam in self.cameras.values()) + ) + + def connect(self, calibrate: bool = True) -> None: + self.left_arm.connect(calibrate) + self.right_arm.connect(calibrate) + + for cam in self.cameras.values(): + cam.connect() + + @property + def is_calibrated(self) -> bool: + return self.left_arm.is_calibrated and self.right_arm.is_calibrated + + def calibrate(self) -> None: + self.left_arm.calibrate() + self.right_arm.calibrate() + + def configure(self) -> None: + self.left_arm.configure() + self.right_arm.configure() + + def setup_motors(self) -> None: + self.left_arm.setup_motors() + self.right_arm.setup_motors() + + def get_observation(self) -> dict[str, Any]: + obs_dict = {} + + # Add "left_" prefix + left_obs = self.left_arm.get_observation() + obs_dict.update({f"left_{key}": value for key, value in left_obs.items()}) + + # Add "right_" prefix + right_obs = self.right_arm.get_observation() + obs_dict.update({f"right_{key}": value for key, value in right_obs.items()}) + + for cam_key, cam in self.cameras.items(): + start = time.perf_counter() + obs_dict[cam_key] = cam.async_read() + dt_ms = (time.perf_counter() - start) * 1e3 + logger.debug(f"{self} read {cam_key}: {dt_ms:.1f}ms") + + return obs_dict + + def send_action(self, action: dict[str, Any]) -> dict[str, Any]: + # Remove "left_" prefix + left_action = { + key.removeprefix("left_"): value for key, value in action.items() if key.startswith("left_") + } + # Remove "right_" prefix + right_action = { + key.removeprefix("right_"): value for key, value in action.items() if key.startswith("right_") + } + + send_action_left = self.left_arm.send_action(left_action) + send_action_right = self.right_arm.send_action(right_action) + + # Add prefixes back + prefixed_send_action_left = {f"left_{key}": value for key, value in send_action_left.items()} + prefixed_send_action_right = {f"right_{key}": value for key, value in send_action_right.items()} + + return {**prefixed_send_action_left, **prefixed_send_action_right} + + def disconnect(self): + self.left_arm.disconnect() + self.right_arm.disconnect() + + for cam in self.cameras.values(): + cam.disconnect() diff --git a/src/lerobot/robots/bi_so100_follower/config_bi_so100_follower.py b/src/lerobot/robots/bi_so100_follower/config_bi_so100_follower.py new file mode 100644 index 000000000..00643b85f --- /dev/null +++ b/src/lerobot/robots/bi_so100_follower/config_bi_so100_follower.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass, field + +from lerobot.cameras import CameraConfig + +from ..config import RobotConfig + + +@RobotConfig.register_subclass("bi_so100_follower") +@dataclass +class BiSO100FollowerConfig(RobotConfig): + left_arm_port: str + right_arm_port: str + + # Optional + left_arm_disable_torque_on_disconnect: bool = True + left_arm_max_relative_target: int | None = None + left_arm_use_degrees: bool = False + right_arm_disable_torque_on_disconnect: bool = True + right_arm_max_relative_target: int | None = None + right_arm_use_degrees: bool = False + + # cameras (shared between both arms) + cameras: dict[str, CameraConfig] = field(default_factory=dict) diff --git a/src/lerobot/robots/so100_follower/config_so100_follower.py b/src/lerobot/robots/so100_follower/config_so100_follower.py index 7cd23d340..ea8b9f1c2 100644 --- a/src/lerobot/robots/so100_follower/config_so100_follower.py +++ b/src/lerobot/robots/so100_follower/config_so100_follower.py @@ -1,4 +1,6 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/lerobot/robots/utils.py b/src/lerobot/robots/utils.py index 911d40465..7486ee499 100644 --- a/src/lerobot/robots/utils.py +++ b/src/lerobot/robots/utils.py @@ -57,6 +57,10 @@ def make_robot_from_config(config: RobotConfig) -> Robot: from .hope_jr import HopeJrArm return HopeJrArm(config) + elif config.type == "bi_so100_follower": + from .bi_so100_follower import BiSO100Follower + + return BiSO100Follower(config) elif config.type == "mock_robot": from tests.mocks.mock_robot import MockRobot diff --git a/src/lerobot/teleoperate.py b/src/lerobot/teleoperate.py index 168f898c4..9836f1393 100644 --- a/src/lerobot/teleoperate.py +++ b/src/lerobot/teleoperate.py @@ -28,6 +28,27 @@ python -m lerobot.teleoperate \ --teleop.id=blue \ --display_data=true ``` + +Example teleoperation with bimanual so100: + +```shell +python -m lerobot.teleoperate \ + --robot.type=bi_so100_follower \ + --robot.left_arm_port=/dev/tty.usbmodem5A460851411 \ + --robot.right_arm_port=/dev/tty.usbmodem5A460812391 \ + --robot.id=bimanual_follower \ + --robot.cameras='{ + left: {"type": "opencv", "index_or_path": 0, "width": 1920, "height": 1080, "fps": 30}, + top: {"type": "opencv", "index_or_path": 1, "width": 1920, "height": 1080, "fps": 30}, + right: {"type": "opencv", "index_or_path": 2, "width": 1920, "height": 1080, "fps": 30} + }' \ + --teleop.type=bi_so100_leader \ + --teleop.left_arm_port=/dev/tty.usbmodem5A460828611 \ + --teleop.right_arm_port=/dev/tty.usbmodem5A460826981 \ + --teleop.id=bimanual_leader \ + --display_data=true +``` + """ import logging @@ -43,6 +64,7 @@ from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraCon from lerobot.robots import ( # noqa: F401 Robot, RobotConfig, + bi_so100_follower, hope_jr, koch_follower, make_robot_from_config, @@ -52,6 +74,7 @@ from lerobot.robots import ( # noqa: F401 from lerobot.teleoperators import ( # noqa: F401 Teleoperator, TeleoperatorConfig, + bi_so100_leader, gamepad, homunculus, koch_leader, diff --git a/src/lerobot/teleoperators/bi_so100_leader/__init__.py b/src/lerobot/teleoperators/bi_so100_leader/__init__.py new file mode 100644 index 000000000..34313a61e --- /dev/null +++ b/src/lerobot/teleoperators/bi_so100_leader/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .bi_so100_leader import BiSO100Leader +from .config_bi_so100_leader import BiSO100LeaderConfig diff --git a/src/lerobot/teleoperators/bi_so100_leader/bi_so100_leader.py b/src/lerobot/teleoperators/bi_so100_leader/bi_so100_leader.py new file mode 100644 index 000000000..769669655 --- /dev/null +++ b/src/lerobot/teleoperators/bi_so100_leader/bi_so100_leader.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from functools import cached_property + +from lerobot.teleoperators.so100_leader.config_so100_leader import SO100LeaderConfig +from lerobot.teleoperators.so100_leader.so100_leader import SO100Leader + +from ..teleoperator import Teleoperator +from .config_bi_so100_leader import BiSO100LeaderConfig + +logger = logging.getLogger(__name__) + + +class BiSO100Leader(Teleoperator): + """ + [Bimanual SO-100 Leader Arms](https://github.com/TheRobotStudio/SO-ARM100) designed by TheRobotStudio + This bimanual leader arm can also be easily adapted to use SO-101 leader arms, just replace the SO100Leader class with SO101Leader and SO100LeaderConfig with SO101LeaderConfig. + """ + + config_class = BiSO100LeaderConfig + name = "bi_so100_leader" + + def __init__(self, config: BiSO100LeaderConfig): + super().__init__(config) + self.config = config + + left_arm_config = SO100LeaderConfig( + id=f"{config.id}_left" if config.id else None, + calibration_dir=config.calibration_dir, + port=config.left_arm_port, + ) + + right_arm_config = SO100LeaderConfig( + id=f"{config.id}_right" if config.id else None, + calibration_dir=config.calibration_dir, + port=config.right_arm_port, + ) + + self.left_arm = SO100Leader(left_arm_config) + self.right_arm = SO100Leader(right_arm_config) + + @cached_property + def action_features(self) -> dict[str, type]: + return {f"left_{motor}.pos": float for motor in self.left_arm.bus.motors} | { + f"right_{motor}.pos": float for motor in self.right_arm.bus.motors + } + + @cached_property + def feedback_features(self) -> dict[str, type]: + return {} + + @property + def is_connected(self) -> bool: + return self.left_arm.is_connected and self.right_arm.is_connected + + def connect(self, calibrate: bool = True) -> None: + self.left_arm.connect(calibrate) + self.right_arm.connect(calibrate) + + @property + def is_calibrated(self) -> bool: + return self.left_arm.is_calibrated and self.right_arm.is_calibrated + + def calibrate(self) -> None: + self.left_arm.calibrate() + self.right_arm.calibrate() + + def configure(self) -> None: + self.left_arm.configure() + self.right_arm.configure() + + def setup_motors(self) -> None: + self.left_arm.setup_motors() + self.right_arm.setup_motors() + + def get_action(self) -> dict[str, float]: + action_dict = {} + + # Add "left_" prefix + left_action = self.left_arm.get_action() + action_dict.update({f"left_{key}": value for key, value in left_action.items()}) + + # Add "right_" prefix + right_action = self.right_arm.get_action() + action_dict.update({f"right_{key}": value for key, value in right_action.items()}) + + return action_dict + + def send_feedback(self, feedback: dict[str, float]) -> None: + # Remove "left_" prefix + left_feedback = { + key.removeprefix("left_"): value for key, value in feedback.items() if key.startswith("left_") + } + # Remove "right_" prefix + right_feedback = { + key.removeprefix("right_"): value for key, value in feedback.items() if key.startswith("right_") + } + + if left_feedback: + self.left_arm.send_feedback(left_feedback) + if right_feedback: + self.right_arm.send_feedback(right_feedback) + + def disconnect(self) -> None: + self.left_arm.disconnect() + self.right_arm.disconnect() diff --git a/src/lerobot/teleoperators/bi_so100_leader/config_bi_so100_leader.py b/src/lerobot/teleoperators/bi_so100_leader/config_bi_so100_leader.py new file mode 100644 index 000000000..117e09913 --- /dev/null +++ b/src/lerobot/teleoperators/bi_so100_leader/config_bi_so100_leader.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +from ..config import TeleoperatorConfig + + +@TeleoperatorConfig.register_subclass("bi_so100_leader") +@dataclass +class BiSO100LeaderConfig(TeleoperatorConfig): + left_arm_port: str + right_arm_port: str diff --git a/src/lerobot/teleoperators/utils.py b/src/lerobot/teleoperators/utils.py index 8a667fd41..344a95d72 100644 --- a/src/lerobot/teleoperators/utils.py +++ b/src/lerobot/teleoperators/utils.py @@ -61,5 +61,9 @@ def make_teleoperator_from_config(config: TeleoperatorConfig) -> Teleoperator: from .homunculus import HomunculusArm return HomunculusArm(config) + elif config.type == "bi_so100_leader": + from .bi_so100_leader import BiSO100Leader + + return BiSO100Leader(config) else: raise ValueError(config.type) From 378e1f0338749bae1d2d1d50d01e944b8a2d9f55 Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Thu, 17 Jul 2025 14:30:20 +0200 Subject: [PATCH 16/20] Update pre-commit-config.yaml + pyproject.toml + ceil rerun & transformer dependencies version (#1520) * chore: update .gitignore * chore: update pre-commit * chore(deps): update pyproject * fix(ci): multiple fixes * chore: pre-commit apply * chore: address review comments * Update pyproject.toml Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> Signed-off-by: Steven Palma * chore(deps): add todo --------- Signed-off-by: Steven Palma Co-authored-by: Ben Zhang <5977478+ben-z@users.noreply.github.com> --- .github/PULL_REQUEST_TEMPLATE.md | 15 +- .gitignore | 282 +++++++++--------- .pre-commit-config.yaml | 53 +++- CODE_OF_CONDUCT.md | 21 +- CONTRIBUTING.md | 55 ++-- README.md | 51 +++- benchmarks/video/README.md | 45 ++- docs/README.md | 4 +- docs/source/async.mdx | 78 +++-- docs/source/backwardcomp.mdx | 29 +- docs/source/cameras.mdx | 59 +++- docs/source/hilserl.mdx | 115 +++++-- docs/source/hilserl_sim.mdx | 24 +- docs/source/il_robots.mdx | 56 +++- docs/source/il_sim.mdx | 38 ++- docs/source/index.mdx | 6 +- docs/source/installation.mdx | 27 +- docs/source/integrate_hardware.mdx | 44 ++- docs/source/notebooks.mdx | 4 +- docs/source/smolvla.mdx | 35 ++- examples/4_train_policy_with_script.md | 43 ++- pyproject.toml | 202 +++++++++---- src/lerobot/cameras/camera.py | 4 +- src/lerobot/cameras/opencv/camera_opencv.py | 4 +- .../cameras/realsense/camera_realsense.py | 4 +- .../realsense/configuration_realsense.py | 6 +- src/lerobot/configs/parser.py | 9 +- src/lerobot/configs/policies.py | 6 +- src/lerobot/configs/train.py | 4 +- src/lerobot/datasets/card_template.md | 3 +- src/lerobot/datasets/lerobot_dataset.py | 2 +- .../datasets/push_dataset_to_hub/utils.py | 3 +- src/lerobot/datasets/sampler.py | 4 +- src/lerobot/datasets/transforms.py | 9 +- src/lerobot/envs/configs.py | 36 +-- src/lerobot/find_cameras.py | 20 +- src/lerobot/motors/motors_bus.py | 2 +- src/lerobot/policies/act/modeling_act.py | 4 +- .../policies/diffusion/modeling_diffusion.py | 2 +- .../policies/pi0/paligemma_with_expert.py | 13 +- src/lerobot/policies/pretrained.py | 10 +- src/lerobot/policies/sac/modeling_sac.py | 3 +- .../policies/smolvla/smolvlm_with_expert.py | 13 +- src/lerobot/policies/tdmpc/modeling_tdmpc.py | 2 +- src/lerobot/policies/vqbet/modeling_vqbet.py | 4 +- src/lerobot/policies/vqbet/vqbet_utils.py | 6 +- src/lerobot/record.py | 3 +- src/lerobot/robots/hope_jr/hope_jr.mdx | 29 +- src/lerobot/robots/koch_follower/koch.mdx | 43 ++- src/lerobot/robots/lekiwi/lekiwi.mdx | 53 +++- src/lerobot/robots/lekiwi/lekiwi_client.py | 24 +- src/lerobot/robots/robot.py | 5 +- src/lerobot/robots/so100_follower/so100.mdx | 233 ++++++++++++--- src/lerobot/robots/so101_follower/so101.mdx | 103 +++++-- src/lerobot/robots/stretch3/README.md | 18 +- src/lerobot/robots/viperx/README.md | 28 +- src/lerobot/scripts/eval.py | 2 +- src/lerobot/scripts/rl/crop_dataset_roi.py | 5 +- src/lerobot/scripts/rl/gym_manipulator.py | 3 +- src/lerobot/scripts/rl/learner.py | 4 +- src/lerobot/scripts/server/configs.py | 2 +- src/lerobot/scripts/server/robot_client.py | 5 +- src/lerobot/scripts/visualize_dataset.py | 2 +- .../homunculus/homunculus_arm.py | 10 +- .../homunculus/homunculus_glove.py | 10 +- src/lerobot/teleoperators/teleoperator.py | 5 +- .../templates/lerobot_modelcard_template.md | 7 +- src/lerobot/utils/benchmark.py | 2 + src/lerobot/utils/buffer.py | 3 +- src/lerobot/utils/hub.py | 5 +- src/lerobot/utils/random_utils.py | 3 +- src/lerobot/utils/utils.py | 8 +- tests/configs/test_plugin_loading.py | 2 +- tests/mocks/mock_dynamixel.py | 2 +- tests/mocks/mock_feetech.py | 2 +- tests/motors/test_dynamixel.py | 2 +- tests/motors/test_feetech.py | 2 +- tests/utils/test_replay_buffer.py | 2 +- 78 files changed, 1450 insertions(+), 636 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index df2e2db29..22f1ee3d7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,33 +1,40 @@ ## What this does + Explain what this PR does. Feel free to tag your PR with the appropriate label(s). Examples: -| Title | Label | +| Title | Label | |----------------------|-----------------| -| Fixes #[issue] | (🐛 Bug) | -| Adds new dataset | (🗃️ Dataset) | -| Optimizes something | (⚡️ Performance) | +| Fixes #[issue] | (🐛 Bug) | +| Adds new dataset | (🗃️ Dataset) | +| Optimizes something | (⚡️ Performance) | ## How it was tested + Explain/show how you tested your changes. Examples: + - Added `test_something` in `tests/test_stuff.py`. - Added `new_feature` and checked that training converges with policy X on dataset/environment Y. - Optimized `some_function`, it now runs X times faster than previously. ## How to checkout & try? (for the reviewer) + Provide a simple way for the reviewer to try out your changes. Examples: + ```bash pytest -sx tests/test_stuff.py::test_something ``` + ```bash python -m lerobot.scripts.train --some.option=true ``` ## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR + **Note**: Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. Try to avoid tagging more than 3 people. diff --git a/.gitignore b/.gitignore index 4ab886933..c4d1f769f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,164 +12,164 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Dev scripts -.dev - -# Logging -logs -tmp -wandb - -# Data -data -outputs - -# Apple -.DS_Store - -# VS Code -.vscode -.devcontainer - -# HPC -nautilus/*.yaml -*.key - -# Slurm -sbatch*.sh - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# uv/poetry lock files -poetry.lock -uv.lock - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -!tests/artifacts -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Ignore .cache -.cache/* - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments +### Environments & Dependencies ### .env .venv env/ venv/ env.bak/ venv.bak/ +.python-version +__pypackages__/ +node_modules/ -# Spyder project settings +# Lock files +poetry.lock +uv.lock +Pipfile.lock + +### Build & Distribution ### +build/ +dist/ +sdist/ +wheels/ +downloads/ +eggs/ +.eggs/ +parts/ +var/ +pip-wheel-metadata/ +share/python-wheels/ +develop-eggs/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +lib/ +lib64/ + +# PyInstaller +*.manifest +*.spec + +### Compiled & Cached Files ### +__pycache__/ +*.py[cod] +*$py.class +*.so +*.sage.py +.cache/ +.ruff_cache/ +.mypy_cache/ +.pyre/ +.pytype/ +cython_debug/ + +### Testing & Coverage ### +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.pytest_cache/ +.hypothesis/ +nosetests.xml +coverage.xml +*.cover +*.py,cover +!tests/artifacts + +### Logs & Temporary Files ### +logs/ +tmp/ +*.log +pip-log.txt +pip-delete-this-directory.txt +celerybeat-schedule +celerybeat.pid + +### IDE & Editor Config ### +# VS Code +.vscode/ +.devcontainer/ + +# JetBrains / PyCharm +.idea/ + +# Spyder .spyderproject .spyproject -# Rope project settings +# Rope .ropeproject -# mkdocs documentation +# Vim +*.swp + +# Other +*~ + +### OS Specific ### +# macOS +.DS_Store + +# Windows +Thumbs.db + +### Framework & Tool Specific ### + +.Python + +# Django +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask +instance/ +.webassets-cache + +# Scrapy +.scrapy + +# Jupyter +.ipynb_checkpoints/ +profile_default/ +ipython_config.py + +# Sphinx +docs/_build/ + +# MkDocs /site +# PyBuilder +.pybuilder/ +target/ + # mypy -.mypy_cache/ .dmypy.json dmypy.json -# Pyre type checker -.pyre/ +### HPC & Slurm ### +nautilus/*.yaml +*.key +sbatch*.sh -# pytype static type analyzer -.pytype/ +### Miscellaneous ### +# W&B +wandb/ -# Cython debug symbols -cython_debug/ +# Dev scripts +.dev/ + +# Data folders +data/ +outputs/ + +# Translations +*.mo +*.pot + +# Dev folders +.cache/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e25f33ee0..e509d6d88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -exclude: "tests/artifacts/.*\\.safetensors$" default_language_version: python: python3.10 + +exclude: "tests/artifacts/.*\\.safetensors$" + repos: ##### Meta ##### - repo: meta @@ -22,12 +24,12 @@ repos: - id: check-useless-excludes - id: check-hooks-apply - - ##### Style / Misc. ##### + ##### General Code Quality & Formatting ##### - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-added-large-files + args: ['--maxkb=1024'] - id: debug-statements - id: check-merge-conflict - id: check-case-conflict @@ -36,7 +38,14 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/adhtruong/mirrors-typos + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.13 + hooks: + - id: ruff-format + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + + - repo: https://github.com/crate-ci/typos rev: v1.34.0 hooks: - id: typos @@ -46,14 +55,16 @@ repos: rev: v3.20.0 hooks: - id: pyupgrade + args: [--py310-plus] - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.3 + ##### Markdown Quality ##### + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 hooks: - - id: ruff - args: [--fix] - - id: ruff-format - + - id: prettier + name: Format Markdown with Prettier + types_or: [markdown, mdx] + args: [--prose-wrap=preserve] ##### Security ##### - repo: https://github.com/gitleaks/gitleaks @@ -72,3 +83,25 @@ repos: - id: bandit args: ["-c", "pyproject.toml"] additional_dependencies: ["bandit[toml]"] + + # TODO(Steven): Uncomment when ready to use + ##### Static Analysis & Typing ##### + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.16.0 + # hooks: + # - id: mypy + # args: [--python-version=3.10] + + ##### Docstring Checks ##### + # - repo: https://github.com/akaihola/darglint2 + # rev: v1.8.2 + # hooks: + # - id: darglint2 + # args: ["--docstring-style", "google", "-v", "2"] + # exclude: ^tests/.*$ + + # - repo: https://github.com/econchick/interrogate + # rev: 1.7.0 + # hooks: + # - id: interrogate + # args: ["-vv", "--config=pyproject.toml"] diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 04a052753..c0fdac843 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,4 +1,3 @@ - # Contributor Covenant Code of Conduct ## Our Pledge @@ -18,23 +17,23 @@ diverse, inclusive, and healthy community. Examples of behavior that contributes to a positive environment for our community include: -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the overall +- Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or advances of +- The use of sexualized language or imagery, and sexual attention or advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email address, +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a354e1346..369af602b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,10 +15,11 @@ Whichever way you choose to contribute, please be mindful to respect our ## You can contribute in so many ways! Some of the ways you can contribute to 🤗 LeRobot: -* Fixing outstanding issues with the existing code. -* Implementing new models, datasets or simulation environments. -* Contributing to the examples or to the documentation. -* Submitting issues related to bugs or desired new features. + +- Fixing outstanding issues with the existing code. +- Implementing new models, datasets or simulation environments. +- Contributing to the examples or to the documentation. +- Submitting issues related to bugs or desired new features. Following the guides below, feel free to open issues and PRs and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](mailto:remi.cadene@huggingface.co). @@ -40,24 +41,26 @@ already reported** (use the search bar on Github under Issues). Did not find it? :( So we can act quickly on it, please follow these steps: -* Include your **OS type and version**, the versions of **Python** and **PyTorch**. -* A short, self-contained, code snippet that allows us to reproduce the bug in +- Include your **OS type and version**, the versions of **Python** and **PyTorch**. +- A short, self-contained, code snippet that allows us to reproduce the bug in less than 30s. -* The full traceback if an exception is raised. -* Attach any other additional information, like screenshots, you think may help. +- The full traceback if an exception is raised. +- Attach any other additional information, like screenshots, you think may help. ### Do you want a new feature? A good feature request addresses the following points: 1. Motivation first: -* Is it related to a problem/frustration with the library? If so, please explain + +- Is it related to a problem/frustration with the library? If so, please explain why. Providing a code snippet that demonstrates the problem is best. -* Is it related to something you would need for a project? We'd love to hear +- Is it related to something you would need for a project? We'd love to hear about it! -* Is it something you worked on and think could benefit the community? +- Is it something you worked on and think could benefit the community? Awesome! Tell us what problem it solved for you. -2. Write a *paragraph* describing the feature. + +2. Write a _paragraph_ describing the feature. 3. Provide a **code snippet** that demonstrates its future use. 4. In case this is related to a paper, please attach a link. 5. Attach any additional information (drawings, screenshots, etc.) you think may help. @@ -74,12 +77,15 @@ environments ([aloha](https://github.com/huggingface/gym-aloha), and follow the same api design. When implementing a new dataset loadable with LeRobotDataset follow these steps: + - Update `available_datasets_per_env` in `lerobot/__init__.py` When implementing a new environment (e.g. `gym_aloha`), follow these steps: + - Update `available_tasks_per_env` and `available_datasets_per_env` in `lerobot/__init__.py` When implementing a new policy class (e.g. `DiffusionPolicy`) follow these steps: + - Update `available_policies` and `available_policies_per_env`, in `lerobot/__init__.py` - Set the required `name` class attribute. - Update variables in `tests/test_available.py` by importing your new Policy class @@ -133,11 +139,13 @@ Follow these steps to start contributing: Follow the instructions to [install poetry](https://python-poetry.org/docs/#installation) (use a version >=2.1.0) or to [install uv](https://docs.astral.sh/uv/getting-started/installation/#installation-methods) if you don't have one of them already. Set up a development environment with conda or miniconda: + ```bash conda create -y -n lerobot-dev python=3.10 && conda activate lerobot-dev ``` If you're using `uv`, it can manage python versions so you can instead do: + ```bash uv venv --python 3.10 && source .venv/bin/activate ``` @@ -145,11 +153,13 @@ Follow these steps to start contributing: To develop on 🤗 LeRobot, you will at least need to install the `dev` and `test` extras dependencies along with the core library: using `poetry` + ```bash poetry sync --extras "dev test" ``` using `uv` + ```bash uv sync --extra dev --extra test ``` @@ -157,43 +167,48 @@ Follow these steps to start contributing: You can also install the project with all its dependencies (including environments): using `poetry` + ```bash poetry sync --all-extras ``` using `uv` + ```bash uv sync --all-extras ``` - > **Note:** If you don't install simulation environments with `--all-extras`, the tests that require them will be skipped when running the pytest suite locally. However, they *will* be tested in the CI. In general, we advise you to install everything and test locally before pushing. + > **Note:** If you don't install simulation environments with `--all-extras`, the tests that require them will be skipped when running the pytest suite locally. However, they _will_ be tested in the CI. In general, we advise you to install everything and test locally before pushing. Whichever command you chose to install the project (e.g. `poetry sync --all-extras`), you should run it again when pulling code with an updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the new dependencies. The equivalent of `pip install some-package`, would just be: using `poetry` + ```bash poetry add some-package ``` using `uv` + ```bash uv add some-package ``` When making changes to the poetry sections of the `pyproject.toml`, you should run the following command to lock dependencies. using `poetry` + ```bash poetry lock ``` using `uv` + ```bash uv lock ``` - 5. Develop the features on your branch. As you work on the features, you should make sure that the test suite @@ -211,11 +226,13 @@ Follow these steps to start contributing: automatically as Git commit hooks. Install `pre-commit` hooks: + ```bash pre-commit install ``` You can run these hooks whenever you need on staged files with: + ```bash pre-commit ``` @@ -229,6 +246,7 @@ Follow these steps to start contributing: ``` Note, if you already committed some changes that have a wrong formatting, you can use: + ```bash pre-commit run --all-files ``` @@ -249,16 +267,15 @@ Follow these steps to start contributing: git push -u origin a-descriptive-name-for-my-changes ``` -6. Once you are satisfied (**and the checklist below is happy too**), go to the +7. Once you are satisfied (**and the checklist below is happy too**), go to the webpage of your fork on GitHub. Click on 'Pull request' to send your changes to the project maintainers for review. -7. It's ok if maintainers ask you for changes. It happens to core contributors +8. It's ok if maintainers ask you for changes. It happens to core contributors too! So everyone can see the changes in the Pull request, work in your local branch and push the changes to your fork. They will automatically appear in the pull request. - ### Checklist 1. The title of your pull request should be a summary of its contribution; @@ -277,18 +294,21 @@ An extensive test suite is included to test the library behavior and several exa Install [git lfs](https://git-lfs.com/) to retrieve test artifacts (if you don't have it already). On Mac: + ```bash brew install git-lfs git lfs install ``` On Ubuntu: + ```bash sudo apt-get install git-lfs git lfs install ``` Pull artifacts if they're not in [tests/artifacts](tests/artifacts) + ```bash git lfs pull ``` @@ -300,6 +320,5 @@ repository, here's how to run tests with `pytest` for the library: python -m pytest -sv ./tests ``` - You can specify a smaller set of tests in order to test only the feature you're working on. diff --git a/README.md b/README.md index ff7a92384..1d7cbcad4 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,6 @@ /> -

Meet the updated SO100, the SO-101 – Just €114 per arm!

Train it in minutes with a few simple moves on your laptop.

Then sit back and watch your creation act autonomously! 🤯

@@ -120,52 +119,61 @@ - Thanks to Antonio Loquercio and Ashish Kumar for their early support. - Thanks to [Seungjae (Jay) Lee](https://sjlee.cc/), [Mahi Shafiullah](https://mahis.life/) and colleagues for open sourcing [VQ-BeT](https://sjlee.cc/vq-bet/) policy and helping us adapt the codebase to our repository. The policy is adapted from [VQ-BeT repo](https://github.com/jayLEE0301/vq_bet_official). - ## Installation Download our source code: + ```bash git clone https://github.com/huggingface/lerobot.git cd lerobot ``` Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html): + ```bash conda create -y -n lerobot python=3.10 conda activate lerobot ``` When using `miniconda`, install `ffmpeg` in your environment: + ```bash conda install ffmpeg -c conda-forge ``` > **NOTE:** This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can: -> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using: -> ```bash -> conda install ffmpeg=7.1.1 -c conda-forge -> ``` -> - _[On Linux only]_ Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. +> +> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using: +> +> ```bash +> conda install ffmpeg=7.1.1 -c conda-forge +> ``` +> +> - _[On Linux only]_ Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. Install 🤗 LeRobot: + ```bash pip install -e . ``` > **NOTE:** If you encounter build errors, you may need to install additional dependencies (`cmake`, `build-essential`, and `ffmpeg libs`). On Linux, run: -`sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg) +> `sudo apt-get install cmake build-essential python3-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev`. For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg) For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras: + - [aloha](https://github.com/huggingface/gym-aloha) - [xarm](https://github.com/huggingface/gym-xarm) - [pusht](https://github.com/huggingface/gym-pusht) For instance, to install 🤗 LeRobot with aloha and pusht, use: + ```bash pip install -e ".[aloha, pusht]" ``` To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with + ```bash wandb login ``` @@ -177,6 +185,7 @@ wandb login Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically downloads data from the Hugging Face hub. You can also locally visualize episodes from a dataset on the hub by executing our script from the command line: + ```bash python -m lerobot.scripts.visualize_dataset \ --repo-id lerobot/pusht \ @@ -184,6 +193,7 @@ python -m lerobot.scripts.visualize_dataset \ ``` or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`) + ```bash python -m lerobot.scripts.visualize_dataset \ --repo-id lerobot/pusht \ @@ -192,19 +202,17 @@ python -m lerobot.scripts.visualize_dataset \ --episode-index 0 ``` - It will open `rerun.io` and display the camera streams, robot states and actions, like this: https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144 - Our script can also visualize datasets stored on a distant server. See `python -m lerobot.scripts.visualize_dataset --help` for more instructions. ### The `LeRobotDataset` format A dataset in `LeRobotDataset` format is very simple to use. It can be loaded from a repository on the Hugging Face hub or a local folder simply with e.g. `dataset = LeRobotDataset("lerobot/aloha_static_coffee")` and can be indexed into like any Hugging Face and PyTorch dataset. For instance `dataset[0]` will retrieve a single temporal frame from the dataset containing observation(s) and an action as PyTorch tensors ready to be fed to a model. -A specificity of `LeRobotDataset` is that, rather than retrieving a single frame by its index, we can retrieve several frames based on their temporal relationship with the indexed frame, by setting `delta_timestamps` to a list of relative times with respect to the indexed frame. For example, with `delta_timestamps = {"observation.image": [-1, -0.5, -0.2, 0]}` one can retrieve, for a given index, 4 frames: 3 "previous" frames 1 second, 0.5 seconds, and 0.2 seconds before the indexed frame, and the indexed frame itself (corresponding to the 0 entry). See example [1_load_lerobot_dataset.py](examples/1_load_lerobot_dataset.py) for more details on `delta_timestamps`. +A specificity of `LeRobotDataset` is that, rather than retrieving a single frame by its index, we can retrieve several frames based on their temporal relationship with the indexed frame, by setting `delta_timestamps` to a list of relative times with respect to the indexed frame. For example, with `delta_timestamps = {"observation.image": [-1, -0.5, -0.2, 0]}` one can retrieve, for a given index, 4 frames: 3 "previous" frames 1 second, 0.5 seconds, and 0.2 seconds before the indexed frame, and the indexed frame itself (corresponding to the 0 entry). See example [1_load_lerobot_dataset.py](examples/1_load_lerobot_dataset.py) for more details on `delta_timestamps`. Under the hood, the `LeRobotDataset` format makes use of several ways to serialize data which can be useful to understand if you plan to work more closely with this format. We tried to make a flexible yet simple dataset format that would cover most type of features and specificities present in reinforcement learning and robotics, in simulation and in real-world, with a focus on cameras and robot states but easily extended to other types of sensory inputs as long as they can be represented by a tensor. @@ -239,6 +247,7 @@ dataset attributes: ``` A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely: + - hf_dataset stored using Hugging Face datasets library serialization to parquet - videos are stored in mp4 format to save space - metadata are stored in plain json/jsonl files @@ -250,6 +259,7 @@ Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrates how to download a pretrained policy from Hugging Face hub, and run an evaluation on its corresponding environment. We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht): + ```bash python -m lerobot.scripts.eval \ --policy.path=lerobot/diffusion_pusht \ @@ -284,9 +294,11 @@ Note: For efficiency, during training every checkpoint is evaluated on a low num We provide some pretrained policies on our [hub page](https://huggingface.co/lerobot) that can achieve state-of-the-art performances. You can reproduce their training by loading the config from their run. Simply running: + ```bash python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht ``` + reproduces SOTA results for Diffusion Policy on the PushT task. ## Contribute @@ -313,27 +325,29 @@ See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions If your dataset format is not supported, implement your own in `lerobot/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/datasets/push_dataset_to_hub/xarm_pkl_format.py). --> - ### Add a pretrained policy Once you have trained a policy you may upload it to the Hugging Face hub using a hub id that looks like `${hf_user}/${repo_name}` (e.g. [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)). You first need to find the checkpoint folder located inside your experiment directory (e.g. `outputs/train/2024-05-05/20-21-12_aloha_act_default/checkpoints/002500`). Within that there is a `pretrained_model` directory which should contain: + - `config.json`: A serialized version of the policy configuration (following the policy's dataclass config). - `model.safetensors`: A set of `torch.nn.Module` parameters, saved in [Hugging Face Safetensors](https://huggingface.co/docs/safetensors/index) format. - `train_config.json`: A consolidated configuration containing all parameters used for training. The policy configuration should match `config.json` exactly. This is useful for anyone who wants to evaluate your policy or for reproducibility. To upload these to the hub, run the following: + ```bash huggingface-cli upload ${hf_user}/${repo_name} path/to/pretrained_model ``` See [eval.py](https://github.com/huggingface/lerobot/blob/main/lerobot/scripts/eval.py) for an example of how other people may use your policy. - ### Improve your code with profiling An example of a code snippet to profile the evaluation of a policy: + + ```python from torch.profiler import profile, record_function, ProfilerActivity @@ -354,10 +368,12 @@ with profile( prof.step() # insert code to profile, potentially whole body of eval_policy function ``` + ## Citation If you want, you can cite this work with: + ```bibtex @misc{cadene2024lerobot, author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascale, Caroline and Choghari, Jade and Moss, Jess and Wolf, Thomas}, @@ -368,7 +384,9 @@ If you want, you can cite this work with: ``` Additionally, if you are using any of the particular policy architecture, pretrained models, or datasets, it is recommended to cite the original authors of the work as they appear below: + - [SmolVLA](https://arxiv.org/abs/2506.01844) + ```bibtex @article{shukor2025smolvla, title={SmolVLA: A Vision-Language-Action Model for Affordable and Efficient Robotics}, @@ -379,6 +397,7 @@ Additionally, if you are using any of the particular policy architecture, pretra ``` - [Diffusion Policy](https://diffusion-policy.cs.columbia.edu) + ```bibtex @article{chi2024diffusionpolicy, author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song}, @@ -387,7 +406,9 @@ Additionally, if you are using any of the particular policy architecture, pretra year = {2024}, } ``` + - [ACT or ALOHA](https://tonyzhaozh.github.io/aloha) + ```bibtex @article{zhao2023learning, title={Learning fine-grained bimanual manipulation with low-cost hardware}, @@ -409,6 +430,7 @@ Additionally, if you are using any of the particular policy architecture, pretra ``` - [VQ-BeT](https://sjlee.cc/vq-bet/) + ```bibtex @article{lee2024behavior, title={Behavior generation with latent actions}, @@ -418,8 +440,8 @@ Additionally, if you are using any of the particular policy architecture, pretra } ``` - - [HIL-SERL](https://hil-serl.github.io/) + ```bibtex @Article{luo2024hilserl, title={Precise and Dexterous Robotic Manipulation via Human-in-the-Loop Reinforcement Learning}, @@ -430,6 +452,7 @@ archivePrefix={arXiv}, primaryClass={cs.RO} } ``` + ## Star History [![Star History Chart](https://api.star-history.com/svg?repos=huggingface/lerobot&type=Timeline)](https://star-history.com/#huggingface/lerobot&Timeline) diff --git a/benchmarks/video/README.md b/benchmarks/video/README.md index daa3e1f48..490a4b495 100644 --- a/benchmarks/video/README.md +++ b/benchmarks/video/README.md @@ -1,28 +1,32 @@ # Video benchmark - ## Questions + What is the optimal trade-off between: + - maximizing loading time with random access, - minimizing memory space on disk, - maximizing success rate of policies, - compatibility across devices/platforms for decoding videos (e.g. video players, web browsers). How to encode videos? + - Which video codec (`-vcodec`) to use? h264, h265, AV1? - What pixel format to use (`-pix_fmt`)? `yuv444p` or `yuv420p`? - How much compression (`-crf`)? No compression with `0`, intermediate compression with `25` or extreme with `50+`? - Which frequency to chose for key frames (`-g`)? A key frame every `10` frames? How to decode videos? + - Which `decoder`? `torchvision`, `torchaudio`, `ffmpegio`, `decord`, or `nvc`? - What scenarios to use for the requesting timestamps during benchmark? (`timestamps_mode`) - ## Variables + **Image content & size** We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an apartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution). For these reasons, we run this benchmark on four representative datasets: + - `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera. - `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera. - `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera. @@ -34,8 +38,9 @@ Note: The datasets used for this benchmark need to be image datasets, not video We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robust (e.g. robust to color changes, compression, etc.). ### Encoding parameters + | parameter | values | -|-------------|--------------------------------------------------------------| +| ----------- | ------------------------------------------------------------ | | **vcodec** | `libx264`, `libx265`, `libsvtav1` | | **pix_fmt** | `yuv444p`, `yuv420p` | | **g** | `1`, `2`, `3`, `4`, `5`, `6`, `10`, `15`, `20`, `40`, `None` | @@ -44,19 +49,23 @@ We might revisit this benchmark and find better settings if we train our policie Note that `crf` value might be interpreted differently by various video codecs. In other words, the same value used with one codec doesn't necessarily translate into the same compression level with another codec. In fact, the default value (`None`) isn't the same amongst the different video codecs. Importantly, it is also the case for many other ffmpeg arguments like `g` which specifies the frequency of the key frames. For a comprehensive list and documentation of these parameters, see the ffmpeg documentation depending on the video codec used: + - h264: https://trac.ffmpeg.org/wiki/Encode/H.264 - h265: https://trac.ffmpeg.org/wiki/Encode/H.265 - AV1: https://trac.ffmpeg.org/wiki/Encode/AV1 ### Decoding parameters + **Decoder** We tested two video decoding backends from torchvision: + - `pyav` - `video_reader` (requires to build torchvision from source) **Requested timestamps** Given the way video decoding works, once a keyframe has been loaded, the decoding of subsequent frames is fast. This of course is affected by the `-g` parameter during encoding, which specifies the frequency of the keyframes. Given our typical use cases in robotics policies which might request a few timestamps in different random places, we want to replicate these use cases with the following scenarios: + - `1_frame`: 1 frame, - `2_frames`: 2 consecutive frames (e.g. `[t, t + 1 / fps]`), - `6_frames`: 6 consecutive frames (e.g. `[t + i / fps for i in range(6)]`) @@ -64,12 +73,13 @@ This of course is affected by the `-g` parameter during encoding, which specifie Note that this differs significantly from a typical use case like watching a movie, in which every frame is loaded sequentially from the beginning to the end and it's acceptable to have big values for `-g`. Additionally, because some policies might request single timestamps that are a few frames apart, we also have the following scenario: + - `2_frames_4_space`: 2 frames with 4 consecutive frames of spacing in between (e.g `[t, t + 5 / fps]`), However, due to how video decoding is implemented with `pyav`, we don't have access to an accurate seek so in practice this scenario is essentially the same as `6_frames` since all 6 frames between `t` and `t + 5 / fps` will be decoded. - ## Metrics + **Data compression ratio (lower is better)** `video_images_size_ratio` is the ratio of the memory space on disk taken by the encoded video over the memory space taken by the original images. For instance, `video_images_size_ratio=25%` means that the video takes 4 times less memory space on disk compared to the original images. @@ -87,18 +97,18 @@ However, due to how video decoding is implemented with `pyav`, we don't have acc One aspect that can't be measured here with those metrics is the compatibility of the encoding across platforms, in particular on web browser, for visualization purposes. h264, h265 and AV1 are all commonly used codecs and should not pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility: + - `yuv420p` is more widely supported across various platforms, including web browsers. - `yuv444p` offers higher color fidelity but might not be supported as broadly. - - ## How the benchmark works + The benchmark evaluates both encoding and decoding of video frames on the first episode of each dataset. **Encoding:** for each `vcodec` and `pix_fmt` pair, we use a default value for `g` and `crf` upon which we change a single value (either `g` or `crf`) to one of the specified values (we don't test every combination of those as this would be computationally too heavy). @@ -110,15 +120,18 @@ Intermediate results saved for each `vcodec` and `pix_fmt` combination in csv ta These are then all concatenated to a single table ready for analysis. ## Caveats + We tried to measure the most impactful parameters for both encoding and decoding. However, for computational reasons we can't test out every combination. Additional encoding parameters exist that are not included in this benchmark. In particular: + - `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1. - `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.). See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters. Similarly on the decoding side, other decoders exist but are not implemented in our current benchmark. To name a few: + - `torchaudio` - `ffmpegio` - `decord` @@ -127,16 +140,17 @@ Similarly on the decoding side, other decoders exist but are not implemented in Note as well that since we are mostly interested in the performance at decoding time (also because encoding is done only once before uploading a dataset), we did not measure encoding times nor have any metrics regarding encoding. However, besides the necessity to build ffmpeg from source, encoding did not pose any issue and it didn't take a significant amount of time during this benchmark. - ## Install + Building ffmpeg from source is required to include libx265 and libaom/libsvtav1 (av1) video codecs ([compilation guide](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu)). **Note:** While you still need to build torchvision with a conda-installed `ffmpeg<4.3` to use the `video_reader` decoder (as described in [#220](https://github.com/huggingface/lerobot/pull/220)), you also need another version which is custom-built with all the video codecs for encoding. For the script to then use that version, you can prepend the command above with `PATH="$HOME/bin:$PATH"`, which is where ffmpeg should be built. - ## Adding a video decoder + Right now, we're only benchmarking the two video decoder available with torchvision: `pyav` and `video_reader`. You can easily add a new decoder to benchmark by adding it to this function in the script: + ```diff def decode_video_frames( video_path: str, @@ -156,9 +170,10 @@ def decode_video_frames( raise NotImplementedError(backend) ``` - ## Example + For a quick run, you can try these parameters: + ```bash python benchmark/video/run_video_benchmark.py \ --output-dir outputs/video_benchmark \ @@ -176,11 +191,12 @@ python benchmark/video/run_video_benchmark.py \ --save-frames 0 ``` - ## Results ### Reproduce + We ran the benchmark with the following parameters: + ```bash # h264 and h265 encodings python benchmark/video/run_video_benchmark.py \ @@ -221,9 +237,10 @@ python benchmark/video/run_video_benchmark.py \ The full results are available [here](https://docs.google.com/spreadsheets/d/1OYJB43Qu8fC26k_OyoMFgGBBKfQRCi4BIuYitQnq3sw/edit?usp=sharing) - ### Parameters selected for LeRobotDataset + Considering these results, we chose what we think is the best set of encoding parameter: + - vcodec: `libsvtav1` - pix-fmt: `yuv420p` - g: `2` @@ -236,7 +253,7 @@ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_read These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav` | video_images_size_ratio | vcodec | pix_fmt | | | | -|------------------------------------|------------|---------|-----------|-----------|-----------| +| ---------------------------------- | ---------- | ------- | --------- | --------- | --------- | | | libx264 | | libx265 | | libsvtav1 | | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% | @@ -245,7 +262,7 @@ These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_ | aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% | | video_images_load_time_ratio | vcodec | pix_fmt | | | | -|------------------------------------|---------|---------|----------|---------|-----------| +| ---------------------------------- | ------- | ------- | -------- | ------- | --------- | | | libx264 | | libx265 | | libsvtav1 | | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 | @@ -254,7 +271,7 @@ These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_ | aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** | | | | vcodec | pix_fmt | | | | -|------------------------------------|----------|----------|--------------|----------|-----------|--------------| +| ---------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ | | | | libx264 | | libx265 | | libsvtav1 | | repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | | lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 | diff --git a/docs/README.md b/docs/README.md index 275fee46b..967de7b84 100644 --- a/docs/README.md +++ b/docs/README.md @@ -26,6 +26,7 @@ pip install -e ".[docs]" You will also need `nodejs`. Please refer to their [installation page](https://nodejs.org/en/download) --- + **NOTE** You only need to generate the documentation to inspect it locally (if you're planning changes and want to @@ -63,6 +64,7 @@ doc-builder preview lerobot docs/source/ The docs will be viewable at [http://localhost:3000](http://localhost:3000). You can also preview the docs once you have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives. --- + **NOTE** The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again). @@ -89,6 +91,7 @@ Sections that were moved: [ Section A ] ``` + and of course, if you moved it to another file, then: ``` @@ -119,7 +122,6 @@ and objects like True, None or any strings should usually be put in `code`. Multi-line code blocks can be useful for displaying examples. They are done between two lines of three backticks as usual in Markdown: - ```` ``` # first line of code diff --git a/docs/source/async.mdx b/docs/source/async.mdx index 6ff05a88a..397c513cf 100644 --- a/docs/source/async.mdx +++ b/docs/source/async.mdx @@ -5,17 +5,18 @@ In this tutorial, we'll show how to use asynchronous inference (_async inference **Try async inference with all the policies** supported by LeRobot! **What you'll learn:** + 1. Why asynchronous inference matters and how it compares to, more traditional, sequential inference. 2. How to spin-up a `PolicyServer` and connect a `RobotClient` from the same machine, and even over the network. 3. How to tune key parameters (`actions_per_chunk`, `chunk_size_threshold`) for your robot and policy. If you get stuck, hop into our [Discord community](https://discord.gg/s3KuuzsPFb)! - -In a nutshell: with *async inference*, your robot keeps acting while the policy server is already busy computing the next chunk of actions---eliminating "wait-for-inference" lags and unlocking smoother, more reactive behaviours. +In a nutshell: with _async inference_, your robot keeps acting while the policy server is already busy computing the next chunk of actions---eliminating "wait-for-inference" lags and unlocking smoother, more reactive behaviours. This is fundamentally different from synchronous inference (sync), where the robot stays idle while the policy computes the next chunk of actions. --- + ## Getting started with async inference You can read more information on asynchronous inference in our [blogpost](https://huggingface.co/blog/async-robot-inference). This guide is designed to help you quickly set up and run asynchronous inference in your environment. @@ -53,40 +54,53 @@ python src/lerobot/scripts/server/robot_client.py \ --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions --debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime ``` + In summary, you need to specify instructions for: + - `SERVER`: the address and port of the policy server - `ROBOT`: the type of robot to connect to, the port to connect to, and the local `id` of the robot - `POLICY`: the type of policy to run, and the model name/path on server to the checkpoint to run. You also need to specify which device should the sever be using, and how many actions to output at once (capped at the policy max actions value). - `CLIENT`: the threshold for the chunk size before sending a new observation to the server, and the function to aggregate actions on overlapping portions. Optionally, you can also visualize the queue size at runtime, to help you tune the `CLIENT` parameters. Importantly, + - `actions_per_chunk` and `chunk_size_threshold` are key parameters to tune for your setup. - `aggregate_fn_name` is the function to aggregate actions on overlapping portions. You can either add a new one to a registry of functions, or add your own in `robot_client.py` (see [here](NOTE:addlinktoLOC)) - `debug_visualize_queue_size` is a useful tool to tune the `CLIENT` parameters. -Done! You should see your robot moving around by now 😉 ---- +## Done! You should see your robot moving around by now 😉 ## Async vs. synchronous inference -Synchronous inference relies on interleaving action chunk prediction and action execution. This inherently results in *idle frames*, frames where the robot awaits idle the policy's output: a new action chunk. +Synchronous inference relies on interleaving action chunk prediction and action execution. This inherently results in _idle frames_, frames where the robot awaits idle the policy's output: a new action chunk. In turn, inference is plagued by evident real-time lags, where the robot simply stops acting due to the lack of available actions. With robotics models increasing in size, this problem risks becoming only more severe.

- + +

+

+ Synchronous inference makes the robot idle while the policy is + computing the next chunk of actions.

-

Synchronous inference makes the robot idle while the policy is computing the next chunk of actions.

To overcome this, we design async inference, a paradigm where action planning and execution are decoupled, resulting in (1) higher adaptability and, most importantly, (2) no idle frames. -Crucially, with async inference, the next action chunk is computed *before* the current one is exhausted, resulting in no idleness. +Crucially, with async inference, the next action chunk is computed _before_ the current one is exhausted, resulting in no idleness. Higher adaptability is ensured by aggregating the different action chunks on overlapping portions, obtaining an up-to-date plan and a tighter control loop.

- + +

+

+ Asynchronous inference results in no idleness because the next chunk is + computed before the current chunk is exhausted.

-

Asynchronous inference results in no idleness because the next chunk is computed before the current chunk is exhausted.

- --- @@ -105,6 +119,8 @@ python -m lerobot.scripts.server.policy_server \ ``` + + ```python from lerobot.scripts.server.configs import PolicyServerConfig from lerobot.scripts.server.policy_server import serve @@ -115,6 +131,8 @@ config = PolicyServerConfig( ) serve(config) ``` + + @@ -147,6 +165,8 @@ python src/lerobot/scripts/server/robot_client.py \ ``` + + ```python import threading from lerobot.robots.so100_follower import SO100FollowerConfig @@ -201,6 +221,8 @@ if client.start(): # (Optionally) plot the action queue size visualize_action_queue_size(client.action_queue_size) ``` + + @@ -216,20 +238,30 @@ The following two parameters are key in every setup: - actions_per_chunk + + actions_per_chunk + 50 - How many actions the policy outputs at once. Typical values: 10-50. + + How many actions the policy outputs at once. Typical values: 10-50. + - chunk_size_threshold + + chunk_size_threshold + 0.7 - When the queue is ≤ 50% full, the client sends a fresh observation. Value in [0, 1]. + + When the queue is ≤ 50% full, the client sends a fresh observation. + Value in [0, 1]. + -Different values of `actions_per_chunk` and `chunk_size_threshold` do result in different behaviours. + Different values of `actions_per_chunk` and `chunk_size_threshold` do result + in different behaviours. On the one hand, increasing the value of `actions_per_chunk` will result in reducing the likelihood of ending up with no actions to execute, as more actions will be available when the new chunk is computed. @@ -249,10 +281,18 @@ We found the default values of `actions_per_chunk` and `chunk_size_threshold` to - We found values around 0.5-0.6 to work well. If you want to tweak this, spin up a `RobotClient` setting the `--debug-visualize-queue-size` to `True`. This will plot the action queue size evolution at runtime, and you can use it to find the value of `chunk_size_threshold` that works best for your setup.

- + +

+

+ + The action queue size is plotted at runtime when the + `--debug-visualize-queue-size` flag is passed, for various levels of + `chunk_size_threshold` (`g` in the SmolVLA paper). +

-

The action queue size is plotted at runtime when the `--debug-visualize-queue-size` flag is passed, for various levels of `chunk_size_threshold` (`g` in the SmolVLA paper).

- --- diff --git a/docs/source/backwardcomp.mdx b/docs/source/backwardcomp.mdx index 555239170..0e1d01636 100644 --- a/docs/source/backwardcomp.mdx +++ b/docs/source/backwardcomp.mdx @@ -6,21 +6,22 @@ PR [#777](https://github.com/huggingface/lerobot/pull/777) improves the LeRobot ### What changed? -| | Before PR #777 | After PR #777 | -| --------------------------------- | ------------------------------------------------- | --------------------------------------------------------------------------- | -| **Joint range** | Degrees `-180...180°` | **Normalised range** Joints: `–100...100` Gripper: `0...100` | -| **Zero position (SO100 / SO101)** | Arm fully extended horizontally | **In middle of the range for each joint** | -| **Boundary handling** | Software safeguards to detect ±180 ° wrap-arounds | No wrap-around logic needed due to mid-range zero | +| | Before PR #777 | After PR #777 | +| --------------------------------- | ------------------------------------------------- | ------------------------------------------------------------ | +| **Joint range** | Degrees `-180...180°` | **Normalised range** Joints: `–100...100` Gripper: `0...100` | +| **Zero position (SO100 / SO101)** | Arm fully extended horizontally | **In middle of the range for each joint** | +| **Boundary handling** | Software safeguards to detect ±180 ° wrap-arounds | No wrap-around logic needed due to mid-range zero | --- ### Impact on existing datasets -* Recorded trajectories created **before** PR #777 will replay incorrectly if loaded directly: - * Joint angles are offset and incorrectly normalized. -* Any models directly finetuned or trained on the old data will need their inputs and outputs converted. +- Recorded trajectories created **before** PR #777 will replay incorrectly if loaded directly: + - Joint angles are offset and incorrectly normalized. +- Any models directly finetuned or trained on the old data will need their inputs and outputs converted. ### Using datasets made with the previous calibration system + We provide a migration example script for replaying an episode recorded with the previous calibration here: `examples/backward_compatibility/replay.py`. Below we take you through the modifications that are done in the example script to make the previous calibration datasets work. @@ -33,20 +34,31 @@ Below we take you through the modifications that are done in the example script Let's break this down. New codebase uses `.pos` suffix for the position observations and we have removed `main_` prefix: + + ```python key = f"{name.removeprefix('main_')}.pos" ``` + For `"shoulder_lift"` (id = 2), the 0 position is changed by -90 degrees and the direction is reversed compared to old calibration/code. + + ```python action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90) ``` + + For `"elbow_flex"` (id = 3), the 0 position is changed by -90 degrees compared to old calibration/code. + + ```python action["elbow_flex.pos"] -= 90 ``` + To use degrees normalization we then set the `--robot.use_degrees` option to `true`. + ```diff python examples/backward_compatibility/replay.py \ --robot.type=so101_follower \ @@ -63,6 +75,7 @@ Policies output actions in the same format as the datasets (`torch.Tensors`). Th To find these transformations, we recommend to first try and and replay an episode of the dataset your policy was trained on using the section above. Then, add these same transformations on your inference script (shown here in the `record.py` script): + ```diff action_values = predict_action( observation_frame, diff --git a/docs/source/cameras.mdx b/docs/source/cameras.mdx index 313d5a7cd..604863d74 100644 --- a/docs/source/cameras.mdx +++ b/docs/source/cameras.mdx @@ -7,11 +7,13 @@ LeRobot offers multiple options for video capture, including phone cameras, buil To instantiate a camera, you need a camera identifier. This identifier might change if you reboot your computer or re-plug your camera, a behavior mostly dependant on your operating system. To find the camera indices of the cameras plugged into your system, run the following script: + ```bash python -m lerobot.find_cameras opencv # or realsense for Intel Realsense cameras ``` The output will look something like this if you have two cameras connected: + ``` --- Detected Cameras --- Camera #0: @@ -31,7 +33,6 @@ Camera #0: > [!WARNING] > When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable. - ## Use Cameras Below are two examples, demonstrating how to work with the API. @@ -39,10 +40,10 @@ Below are two examples, demonstrating how to work with the API. - **Asynchronous frame capture** using an OpenCV-based camera - **Color and depth capture** using an Intel RealSense camera - + ```python from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.cameras.opencv.camera_opencv import OpenCVCamera @@ -70,10 +71,12 @@ try: finally: camera.disconnect() ``` + + ```python from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig from lerobot.cameras.realsense.camera_realsense import RealSenseCamera @@ -103,15 +106,18 @@ try: finally: camera.disconnect() ``` + + - ## Use your phone + To use your iPhone as a camera on macOS, enable the Continuity Camera feature: + - Ensure your Mac is running macOS 13 or later, and your iPhone is on iOS 16 or later. - Sign in both devices with the same Apple ID. - Connect your devices with a USB cable or turn on Wi-Fi and Bluetooth for a wireless connection. @@ -125,40 +131,67 @@ Your iPhone should be detected automatically when running the camera setup scrip If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera -1. *Install `v4l2loopback-dkms` and `v4l-utils`*. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using: +1. _Install `v4l2loopback-dkms` and `v4l-utils`_. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using: + + ```python sudo apt install v4l2loopback-dkms v4l-utils ``` -2. *Install [DroidCam](https://droidcam.app) on your phone*. This app is available for both iOS and Android. -3. *Install [OBS Studio](https://obsproject.com)*. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org): + + +2. _Install [DroidCam](https://droidcam.app) on your phone_. This app is available for both iOS and Android. +3. _Install [OBS Studio](https://obsproject.com)_. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org): + + ```python flatpak install flathub com.obsproject.Studio ``` -4. *Install the DroidCam OBS plugin*. This plugin integrates DroidCam with OBS Studio. Install it with: + + +4. _Install the DroidCam OBS plugin_. This plugin integrates DroidCam with OBS Studio. Install it with: + + ```python flatpak install flathub com.obsproject.Studio.Plugin.DroidCam ``` -5. *Start OBS Studio*. Launch with: + + +5. _Start OBS Studio_. Launch with: + + ```python flatpak run com.obsproject.Studio ``` -6. *Add your phone as a source*. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`. -7. *Adjust resolution settings*. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in. -8. *Start virtual camera*. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide). -9. *Verify the virtual camera setup*. Use `v4l2-ctl` to list the devices: + + +6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`. +7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in. +8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide). +9. _Verify the virtual camera setup_. Use `v4l2-ctl` to list the devices: + + ```python v4l2-ctl --list-devices ``` + + You should see an entry like: + ``` VirtualCam (platform:v4l2loopback-000): /dev/video1 ``` -10. *Check the camera resolution*. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`. + +10. _Check the camera resolution_. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`. + + ```python v4l2-ctl -d /dev/video1 --get-fmt-video ``` + + You should see an entry like: + ``` >>> Format Video Capture: >>> Width/Height : 640/480 diff --git a/docs/source/hilserl.mdx b/docs/source/hilserl.mdx index b3ab40c89..c647a58d5 100644 --- a/docs/source/hilserl.mdx +++ b/docs/source/hilserl.mdx @@ -4,18 +4,22 @@ In this tutorial you will go through the full Human-in-the-Loop Sample-Efficient HIL-SERL is a sample-efficient reinforcement learning algorithm that combines human demonstrations with online learning and human interventions. The approach starts from a small set of human demonstrations, uses them to train a reward classifier, and then employs an actor-learner architecture where humans can intervene during policy execution to guide exploration and correct unsafe behaviors. In this tutorial, you'll use a gamepad to provide interventions and control the robot during the learning process. -It combines three key ingredients: - 1. **Offline demonstrations & reward classifier:** a handful of human-teleop episodes plus a vision-based success detector give the policy a shaped starting point. - 2. **On-robot actor / learner loop with human interventions:** a distributed Soft Actor Critic (SAC) learner updates the policy while an actor explores on the physical robot; the human can jump in at any time to correct dangerous or unproductive behaviour. - 3. **Safety & efficiency tools:** joint/end-effector (EE) bounds, crop region of interest (ROI) preprocessing and WandB monitoring keep the data useful and the hardware safe. +It combines three key ingredients: 1. **Offline demonstrations & reward classifier:** a handful of human-teleop episodes plus a vision-based success detector give the policy a shaped starting point. 2. **On-robot actor / learner loop with human interventions:** a distributed Soft Actor Critic (SAC) learner updates the policy while an actor explores on the physical robot; the human can jump in at any time to correct dangerous or unproductive behaviour. 3. **Safety & efficiency tools:** joint/end-effector (EE) bounds, crop region of interest (ROI) preprocessing and WandB monitoring keep the data useful and the hardware safe. Together these elements let HIL-SERL reach near-perfect task success and faster cycle times than imitation-only baselines.

- HIL-SERL workflow + HIL-SERL workflow

-

HIL-SERL workflow, Luo et al. 2024

+

+ HIL-SERL workflow, Luo et al. 2024 +

This guide provides step-by-step instructions for training a robot policy using LeRobot's HilSerl implementation to train on a real robot. @@ -29,6 +33,7 @@ This guide provides step-by-step instructions for training a robot policy using ## What kind of tasks can I train? One can use HIL-SERL to train on a variety of manipulation tasks. Some recommendations: + - Start with a simple task to understand how the system works. - Push cube to a goal region - Pick and lift cube with the gripper @@ -53,6 +58,7 @@ pip install -e ".[hilserl]" The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. Which is defined as: + ```python class HILSerlRobotEnvConfig(EnvConfig): robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/robots`) @@ -72,7 +78,7 @@ class HILSerlRobotEnvConfig(EnvConfig): reward_classifier_pretrained_path: str | None = None # For reward model number_of_steps_after_success: int = 0 # For reward classifier, collect more positive examples after a success to train a classifier ``` - + ### Finding Robot Workspace Bounds @@ -131,6 +137,7 @@ Create a configuration file for recording demonstrations (or edit an existing on 5. Configure `robot`, `cameras`, and other hardware settings Example configuration section: + ```json "mode": "record", "repo_id": "username/pick_lift_cube", @@ -150,6 +157,7 @@ HIL-Serl learns actions in the end-effector space of the robot. Therefore, the t For that we need to define a version of the robot that takes actions in the end-effector space. Check the robot class `SO100FollowerEndEffector` and its configuration `SO100FollowerEndEffectorConfig` for the default parameters related to the end-effector space. + ```python class SO100FollowerEndEffectorConfig(SO100FollowerConfig): """Configuration for the SO100FollowerEndEffector robot.""" @@ -172,6 +180,7 @@ class SO100FollowerEndEffectorConfig(SO100FollowerConfig): } ) ``` + The `Teleoperator` defines the teleoperation device. You can check the list of available teleoperators in `lerobot/teleoperators`. @@ -189,9 +198,16 @@ To setup the gamepad, you need to set the `control_mode` to `"gamepad"` and defi ```

- Figure shows the control mappings on a Logitech gamepad. + Figure shows the control mappings on a Logitech gamepad. +

+

+ Gamepad button mapping for robot control and episode management

-

Gamepad button mapping for robot control and episode management

**Setting up the SO101 leader** @@ -215,7 +231,10 @@ During the online training, press `space` to take over the policy and `space` ag
@@ -231,6 +250,7 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/e ``` During recording: + 1. The robot will reset to the initial position defined in the configuration file `fixed_reset_joint_positions` 2. Complete the task successfully 3. The episode ends with a reward of 1 when you press the "success" button @@ -239,13 +259,13 @@ During recording: 6. The process automatically continues to the next episode 7. After recording all episodes, the dataset is pushed to the Hugging Face Hub (optional) and saved locally - ### Processing the Dataset After collecting demonstrations, process them to determine optimal camera crops. Reinforcement learning is sensitive to background distractions, so it is important to crop the images to the relevant workspace area. Visual RL algorithms learn directly from pixel inputs, making them vulnerable to irrelevant visual information. Background elements like changing lighting, shadows, people moving, or objects outside the workspace can confuse the learning process. Good ROI selection should: + - Include only the essential workspace where the task happens - Capture the robot's end-effector and all objects involved in the task - Exclude unnecessary background elements and distractions @@ -267,6 +287,7 @@ python -m lerobot.scripts.rl.crop_dataset_roi --repo-id username/pick_lift_cube 5. The script outputs cropping parameters and creates a new cropped dataset Example output: + ``` Selected Rectangular Regions of Interest (top, left, height, width): observation.images.side: [180, 207, 180, 200] @@ -274,11 +295,15 @@ observation.images.front: [180, 250, 120, 150] ```

- +

-

Interactive cropping tool for selecting regions of interest

- +

+ Interactive cropping tool for selecting regions of interest +

**Updating Configuration** @@ -294,8 +319,7 @@ Add these crop parameters to your training configuration: **Recommended image resolution** -Most vision-based policies have been validated on square inputs of either **128×128** (default) or **64×64** pixels. We therefore advise setting the resize_size parameter to [128, 128] – or [64, 64] if you need to save GPU memory and bandwidth. Other resolutions are possible but have not been extensively tested. - +Most vision-based policies have been validated on square inputs of either **128×128** (default) or **64×64** pixels. We therefore advise setting the resize_size parameter to [128, 128] – or [64, 64] if you need to save GPU memory and bandwidth. Other resolutions are possible but have not been extensively tested. ### Training a Reward Classifier @@ -332,13 +356,13 @@ Example configuration section for data collection: ```json { - "mode": "record", - "repo_id": "hf_username/dataset_name", - "dataset_root": "data/your_dataset", - "num_episodes": 20, - "push_to_hub": true, - "fps": 10, - "number_of_steps_after_success": 15 + "mode": "record", + "repo_id": "hf_username/dataset_name", + "dataset_root": "data/your_dataset", + "num_episodes": 20, + "push_to_hub": true, + "fps": 10, + "number_of_steps_after_success": 15 } ``` @@ -395,21 +419,25 @@ python -m lerobot.scripts.train --config_path path/to/reward_classifier_train_co To use your trained reward classifier, configure the `HILSerlRobotEnvConfig` to use your model: + ```python env_config = HILSerlRobotEnvConfig( reward_classifier_pretrained_path="path_to_your_pretrained_trained_model", # Other environment parameters ) ``` + + or set the argument in the json config file. ```json { - "reward_classifier_pretrained_path": "path_to_your_pretrained_model" + "reward_classifier_pretrained_path": "path_to_your_pretrained_model" } ``` Run `gym_manipulator.py` to test the model. + ```bash python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config.json ``` @@ -422,11 +450,13 @@ The reward classifier will automatically provide rewards based on the visual inp Create the necessary json configuration files for the reward classifier and the environment. Check the examples [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/tree/main). 2. **Collect a dataset**: + ```bash python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json ``` 3. **Train the classifier**: + ```bash python -m lerobot.scripts.train --config_path src/lerobot/configs/reward_classifier_train_config.json ``` @@ -459,6 +489,7 @@ python -m lerobot.scripts.rl.learner --config_path src/lerobot/configs/train_con ``` The learner: + - Initializes the policy network - Prepares replay buffers - Opens a `gRPC` server to communicate with actors @@ -473,6 +504,7 @@ python -m lerobot.scripts.rl.actor --config_path src/lerobot/configs/train_confi ``` The actor: + - Connects to the learner via `gRPC` - Initializes the environment - Execute rollouts of the policy to collect experience @@ -496,10 +528,19 @@ The training proceeds automatically: - A successful experiment is one where the human has to intervene at the start but then reduces the amount of interventions as the policy improves. You can monitor the intervention rate in the `wandb` dashboard.

- Figure shows the control mappings on a Logitech gamepad. + Figure shows the control mappings on a Logitech gamepad.

-

Example showing how human interventions help guide policy learning over time

+

+ + Example showing how human interventions help guide policy learning over time + +

- The figure shows the plot of the episodic reward over interaction step. The figure shows the effect of human interventions on the policy learning. - The orange curve is an experiment without any human interventions. While the pink and blue curves are experiments with human interventions. @@ -510,7 +551,9 @@ The training proceeds automatically: If you have `wandb.enable` set to `true` in your configuration, you can monitor training progress in real-time through the [Weights & Biases](https://wandb.ai/site/) dashboard. ### Guide to Human Interventions + The learning process is very sensitive to the intervention strategy. It will takes a few runs to understand how to intervene effectively. Some tips and hints: + - Allow the policy to explore for a few episodes at the start of training. - Avoid intervening for long periods of time. Try to intervene in situation to correct the robot's behaviour when it goes off track. - Once the policy starts achieving the task, even if its not perfect, you can limit your interventions to simple quick actions like a simple grasping commands. @@ -518,26 +561,36 @@ The learning process is very sensitive to the intervention strategy. It will tak The ideal behaviour is that your intervention rate should drop gradually during training as shown in the figure below.

- Intervention rate + Intervention rate

-

Plot of the intervention rate during a training run on a pick and lift cube task

+

+ + Plot of the intervention rate during a training run on a pick and lift cube + task + +

### Key hyperparameters to tune Some configuration values have a disproportionate impact on training stability and speed: - **`temperature_init`** (`policy.temperature_init`) – initial entropy temperature in SAC. Higher values encourage more exploration; lower values make the policy more deterministic early on. A good starting point is `1e-2`. We observed that setting it too high can make human interventions ineffective and slow down learning. -- **`policy_parameters_push_frequency`** (`policy.actor_learner_config.policy_parameters_push_frequency`) – interval in *seconds* between two weight pushes from the learner to the actor. The default is `4 s`. Decrease to **1-2 s** to provide fresher weights (at the cost of more network traffic); increase only if your connection is slow, as this will reduce sample efficiency. +- **`policy_parameters_push_frequency`** (`policy.actor_learner_config.policy_parameters_push_frequency`) – interval in _seconds_ between two weight pushes from the learner to the actor. The default is `4 s`. Decrease to **1-2 s** to provide fresher weights (at the cost of more network traffic); increase only if your connection is slow, as this will reduce sample efficiency. - **`storage_device`** (`policy.storage_device`) – device on which the learner keeps the policy parameters. If you have spare GPU memory, set this to `"cuda"` (instead of the default `"cpu"`). Keeping the weights on-GPU removes CPU→GPU transfer overhead and can significantly increase the number of learner updates per second. - Congrats 🎉, you have finished this tutorial! > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). Paper citation: + ``` @article{luo2024precise, title={Precise and Dexterous Robotic Manipulation via Human-in-the-Loop Reinforcement Learning}, diff --git a/docs/source/hilserl_sim.mdx b/docs/source/hilserl_sim.mdx index ad7a9584a..c739be835 100644 --- a/docs/source/hilserl_sim.mdx +++ b/docs/source/hilserl_sim.mdx @@ -11,7 +11,6 @@ This guide explains how to use the `gym_hil` simulation environments as an alter Currently, the main environment is a Franka Panda robot simulation based on MuJoCo, with tasks like picking up a cube. - ## Installation First, install the `gym_hil` package within the LeRobot environment: @@ -25,8 +24,6 @@ pip install -e ".[hilserl]" - A gamepad or keyboard to control the robot - A Nvidia GPU - - ## Configuration To use `gym_hil` with LeRobot, you need to create a configuration file. An example is provided [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/gym_hil_env.json). Key configuration sections include: @@ -35,14 +32,15 @@ To use `gym_hil` with LeRobot, you need to create a configuration file. An examp ```json { - "type": "hil", - "name": "franka_sim", - "task": "PandaPickCubeGamepad-v0", - "device": "cuda" + "type": "hil", + "name": "franka_sim", + "task": "PandaPickCubeGamepad-v0", + "device": "cuda" } ``` Available tasks: + - `PandaPickCubeBase-v0`: Basic environment - `PandaPickCubeGamepad-v0`: With gamepad control - `PandaPickCubeKeyboard-v0`: With keyboard control @@ -65,6 +63,7 @@ Available tasks: ``` Important parameters: + - `gripper_penalty`: Penalty for excessive gripper movement - `use_gripper`: Whether to enable gripper control - `end_effector_step_sizes`: Size of the steps in the x,y,z axes of the end-effector @@ -76,40 +75,49 @@ Important parameters: To run the environment, set mode to null: + ```python python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json ``` + ### Recording a Dataset To collect a dataset, set the mode to `record` whilst defining the repo_id and number of episodes to record: + ```python python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json ``` + ### Training a Policy To train a policy, checkout the configuration example available [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/train_gym_hil_env.json) and run the actor and learner servers: + ```python python -m lerobot.scripts.rl.actor --config_path path/to/train_gym_hil_env.json ``` + In a different terminal, run the learner server: + ```python python -m lerobot.scripts.rl.learner --config_path path/to/train_gym_hil_env.json ``` + The simulation environment provides a safe and repeatable way to develop and test your Human-In-the-Loop reinforcement learning components before deploying to real robots. Congrats 🎉, you have finished this tutorial! > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). Paper citation: + ``` @article{luo2024precise, title={Precise and Dexterous Robotic Manipulation via Human-in-the-Loop Reinforcement Learning}, diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx index 2e8ac3619..b18adb8f4 100644 --- a/docs/source/il_robots.mdx +++ b/docs/source/il_robots.mdx @@ -3,6 +3,7 @@ This tutorial will explain how to train a neural network to control a real robot autonomously. **You'll learn:** + 1. How to record and visualize your dataset. 2. How to train a policy using your data and prepare it for evaluation. 3. How to evaluate your policy and visualize the results. @@ -14,7 +15,10 @@ By following these steps, you'll be able to replicate tasks, such as picking up
@@ -51,6 +55,8 @@ python -m lerobot.teleoperate \ ```
+ + ```python from lerobot.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader from lerobot.robots.so101_follower import SO101FollowerConfig, SO101Follower @@ -74,10 +80,13 @@ while True: action = teleop_device.get_action() robot.send_action(action) ``` + +
The teleoperate command will automatically: + 1. Identify any missing calibrations and initiate the calibration procedure. 2. Connect the robot and teleop device and start teleoperation. @@ -104,6 +113,8 @@ python -m lerobot.teleoperate \ ``` + + ```python from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.teleoperators.koch_leader import KochLeaderConfig, KochLeader @@ -134,6 +145,8 @@ while True: action = teleop_device.get_action() robot.send_action(action) ``` + + @@ -144,11 +157,13 @@ Once you're familiar with teleoperation, you can record your first dataset. We use the Hugging Face hub features for uploading your dataset. If you haven't previously used the Hub, make sure you can login via the cli using a write-access token, this token can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens). Add your token to the CLI by running this command: + ```bash huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential ``` Then store your Hugging Face repository name in a variable: + ```bash HF_USER=$(huggingface-cli whoami | head -n 1) echo $HF_USER @@ -174,6 +189,8 @@ python -m lerobot.record \ ``` + + ```python from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.datasets.lerobot_dataset import LeRobotDataset @@ -270,40 +287,49 @@ robot.disconnect() teleop.disconnect() dataset.push_to_hub() ``` + + #### Dataset upload + Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running: + ```bash echo https://huggingface.co/datasets/${HF_USER}/so101_test ``` + Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example). You can look for other LeRobot datasets on the hub by searching for `LeRobot` [tags](https://huggingface.co/datasets?other=LeRobot). You can also push your local dataset to the Hub manually, running: + ```bash huggingface-cli upload ${HF_USER}/record-test ~/.cache/huggingface/lerobot/{repo-id} --repo-type dataset ``` - #### Record function The `record` function provides a suite of tools for capturing and managing data during robot operation: ##### 1. Data Storage + - Data is stored using the `LeRobotDataset` format and is stored on disk during recording. - By default, the dataset is pushed to your Hugging Face page after recording. - To disable uploading, use `--dataset.push_to_hub=False`. ##### 2. Checkpointing and Resuming + - Checkpoints are automatically created during recording. - If an issue occurs, you can resume by re-running the same command with `--resume=true`. - To start recording from scratch, **manually delete** the dataset directory. ##### 3. Recording Parameters + Set the flow of data recording using command-line arguments: + - `--dataset.episode_time_s=60` Duration of each data recording episode (default: **60 seconds**). - `--dataset.reset_time_s=60` @@ -312,7 +338,9 @@ Set the flow of data recording using command-line arguments: Total number of episodes to record (default: **50**). ##### 4. Keyboard Controls During Recording + Control the data recording flow using keyboard shortcuts: + - Press **Right Arrow (`→`)**: Early stop the current episode or reset time and move to the next. - Press **Left Arrow (`←`)**: Cancel the current episode and re-record it. - Press **Escape (`ESC`)**: Immediately stop the session, encode videos, and upload the dataset. @@ -327,13 +355,14 @@ Avoid adding too much variation too quickly, as it may hinder your results. If you want to dive deeper into this important topic, you can check out the [blog post](https://huggingface.co/blog/lerobot-datasets#what-makes-a-good-dataset) we wrote on what makes a good dataset. - #### Troubleshooting: + - On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux). ## Visualize a dataset If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by: + ```bash echo ${HF_USER}/so101_test ``` @@ -356,6 +385,8 @@ python -m lerobot.replay \ ``` + + ```python import time @@ -388,6 +419,8 @@ for idx in range(dataset.num_frames): robot.disconnect() ``` + + @@ -396,6 +429,7 @@ Your robot should replicate movements similar to those you recorded. For example ## Train a policy To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: + ```bash python -m lerobot.scripts.train \ --dataset.repo_id=${HF_USER}/so101_test \ @@ -408,14 +442,16 @@ python -m lerobot.scripts.train \ ``` Let's explain the command: + 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so101_test`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. -4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. -5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. +3. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. +4. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. Training should take several hours. You will find checkpoints in `outputs/train/act_so101_test/checkpoints`. To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so101_test` policy: + ```bash python -m lerobot.scripts.train \ --config_path=outputs/train/act_so101_test/checkpoints/last/pretrained_model/train_config.json \ @@ -427,17 +463,20 @@ If you do not want to push your model to the hub after training use `--policy.pu Additionally you can provide extra `tags` or specify a `license` for your model or make the model repo `private` by adding this: `--policy.private=true --policy.tags=\[ppo,rl\] --policy.license=mit` #### Train using Collab + If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act). #### Upload policy checkpoints Once training is done, upload the latest checkpoint with: + ```bash huggingface-cli upload ${HF_USER}/act_so101_test \ outputs/train/act_so101_test/checkpoints/last/pretrained_model ``` You can also upload intermediate checkpoints with: + ```bash CKPT=010000 huggingface-cli upload ${HF_USER}/act_so101_test${CKPT} \ @@ -467,6 +506,8 @@ python -m lerobot.record \ ``` + + ```python from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig from lerobot.datasets.lerobot_dataset import LeRobotDataset @@ -539,9 +580,12 @@ for episode_idx in range(NUM_EPISODES): robot.disconnect() dataset.push_to_hub() ``` + + As you can see, it's almost the same command as previously used to record your training dataset. Two things changed: -1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`). + +1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_so101_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so101_test`). 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_so101_test`). diff --git a/docs/source/il_sim.mdx b/docs/source/il_sim.mdx index 048d3147e..193b09b1b 100644 --- a/docs/source/il_sim.mdx +++ b/docs/source/il_sim.mdx @@ -3,6 +3,7 @@ This tutorial will explain how to train a neural network to control a robot in simulation with imitation learning. **You'll learn:** + 1. How to record a dataset in simulation with [gym-hil](https://github.com/huggingface/gym-hil) and visualize the dataset. 2. How to train a policy using your data. 3. How to evaluate your policy in simulation and visualize the results. @@ -55,13 +56,21 @@ Note that to teleoperate the robot you have to hold the "Human Take Over Pause P **Gamepad Controls**

- Figure shows the control mappings on a Logitech gamepad. + Figure shows the control mappings on a Logitech gamepad. +

+

+ Gamepad button mapping for robot control and episode management

-

Gamepad button mapping for robot control and episode management

**Keyboard controls** For keyboard controls use the `spacebar` to enable control and the following keys to move the robot: + ```bash Arrow keys: Move in X-Y plane Shift and Shift_R: Move in Z axis @@ -74,14 +83,21 @@ For keyboard controls use the `spacebar` to enable control and the following key If you uploaded your dataset to the hub you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id.

- Figure shows the dataset visualizer + Figure shows the dataset visualizer +

+

+ Dataset visualizer

-

Dataset visualizer

- ## Train a policy To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: + ```bash python -m lerobot.scripts.train \ --dataset.repo_id=${HF_USER}/il_gym \ @@ -93,25 +109,29 @@ python -m lerobot.scripts.train \ ``` Let's explain the command: + 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/il_gym`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. -4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. -5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. +3. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. +4. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. Training should take several hours, 100k steps (which is the default) will take about 1h on Nvidia A100. You will find checkpoints in `outputs/train/il_sim_test/checkpoints`. #### Train using Collab + If your local computer doesn't have a powerful GPU you could utilize Google Collab to train your model by following the [ACT training notebook](./notebooks#training-act). #### Upload policy checkpoints Once training is done, upload the latest checkpoint with: + ```bash huggingface-cli upload ${HF_USER}/il_sim_test \ outputs/train/il_sim_test/checkpoints/last/pretrained_model ``` You can also upload intermediate checkpoints with: + ```bash CKPT=010000 huggingface-cli upload ${HF_USER}/il_sim_test${CKPT} \ @@ -144,9 +164,9 @@ mjpython -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym > [!WARNING] -> While the main workflow of training ACT in simulation is straightforward, there is significant room for exploring how to set up the task, define the initial state of the environment, and determine the type of data required during collection to learn the most effective policy. If your trained policy doesn't perform well, investigate the quality of the dataset it was trained on using our visualizers, as well as the action values and various hyperparameters related to ACT and the simulation. +> While the main workflow of training ACT in simulation is straightforward, there is significant room for exploring how to set up the task, define the initial state of the environment, and determine the type of data required during collection to learn the most effective policy. If your trained policy doesn't perform well, investigate the quality of the dataset it was trained on using our visualizers, as well as the action values and various hyperparameters related to ACT and the simulation. Congrats 🎉, you have finished this tutorial. If you want to continue with using LeRobot in simulation follow this [Tutorial on reinforcement learning in sim with HIL-SERL](https://huggingface.co/docs/lerobot/hilserl_sim) > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). diff --git a/docs/source/index.mdx b/docs/source/index.mdx index b8ff56ea7..a2f919e7d 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -1,6 +1,10 @@ diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 51474d8f7..13c3600b4 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -5,45 +5,56 @@ Currently only available from source. Download our source code: + ```bash git clone https://github.com/huggingface/lerobot.git cd lerobot ``` Create a virtual environment with Python 3.10, using [`Miniconda`](https://docs.anaconda.com/miniconda/install/#quick-command-line-install) + ```bash conda create -y -n lerobot python=3.10 ``` Then activate your conda environment, you have to do this each time you open a shell to use lerobot: + ```bash conda activate lerobot ``` When using `miniconda`, install `ffmpeg` in your environment: + ```bash conda install ffmpeg -c conda-forge ``` > [!TIP] > This usually installs `ffmpeg 7.X` for your platform compiled with the `libsvtav1` encoder. If `libsvtav1` is not supported (check supported encoders with `ffmpeg -encoders`), you can: -> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using: -> ```bash -> conda install ffmpeg=7.1.1 -c conda-forge -> ``` -> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. +> +> - _[On any platform]_ Explicitly install `ffmpeg 7.X` using: +> +> ```bash +> conda install ffmpeg=7.1.1 -c conda-forge +> ``` +> +> - _[On Linux only]_ If you want to bring your own ffmpeg: Install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), and make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. Install 🤗 LeRobot: + ```bash pip install -e . ``` ### Troubleshooting + If you encounter build errors, you may need to install additional dependencies: `cmake`, `build-essential`, and `ffmpeg libs`. To install these for linux run: + ```bash sudo apt-get install cmake build-essential python-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev pkg-config ``` + For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/installation.html#bring-your-own-ffmpeg) ## Optional dependencies @@ -51,20 +62,26 @@ For other systems, see: [Compiling PyAV](https://pyav.org/docs/develop/overview/ LeRobot provides optional extras for specific functionalities. Multiple extras can be combined (e.g., `.[aloha,feetech]`). For all available extras, refer to `pyproject.toml`. ### Simulations + Install environment packages: `aloha` ([gym-aloha](https://github.com/huggingface/gym-aloha)), `xarm` ([gym-xarm](https://github.com/huggingface/gym-xarm)), or `pusht` ([gym-pusht](https://github.com/huggingface/gym-pusht)) Example: + ```bash pip install -e ".[aloha]" # or "[pusht]" for example ``` ### Motor Control + For Koch v1.1 install the Dynamixel SDK, for SO100/SO101/Moss install the Feetech SDK. + ```bash pip install -e ".[feetech]" # or "[dynamixel]" for example ``` ### Experiment Tracking + To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with + ```bash wandb login ``` diff --git a/docs/source/integrate_hardware.mdx b/docs/source/integrate_hardware.mdx index 18d73d3cd..089126fcb 100644 --- a/docs/source/integrate_hardware.mdx +++ b/docs/source/integrate_hardware.mdx @@ -21,16 +21,13 @@ Please refer to the [`MotorsBus`](https://github.com/huggingface/lerobot/blob/ma For a good example of how it can be used, you can have a look at our own [SO101 follower implementation](https://github.com/huggingface/lerobot/blob/main/lerobot/robots/so101_follower/so101_follower.py) Use these if compatible. Otherwise, you'll need to find or write a Python interface (not covered in this tutorial): + - Find an existing SDK in Python (or use bindings to C/C++) - Or implement a basic communication wrapper (e.g., via pyserial, socket, or CANopen) You're not alone—many community contributions use custom boards or firmware! -For Feetech and Dynamixel, we currently support these servos: - - Feetech: - - STS & SMS series (protocol 0): `sts3215`, `sts3250`, `sm8512bl` - - SCS series (protocol 1): `scs0009` - - Dynamixel (protocol 2.0 only): `xl330-m077`, `xl330-m288`, `xl430-w250`, `xm430-w350`, `xm540-w270`, `xc430-w150` +For Feetech and Dynamixel, we currently support these servos: - Feetech: - STS & SMS series (protocol 0): `sts3215`, `sts3250`, `sm8512bl` - SCS series (protocol 1): `scs0009` - Dynamixel (protocol 2.0 only): `xl330-m077`, `xl330-m288`, `xl430-w250`, `xm430-w350`, `xm540-w270`, `xc430-w150` If you are using Feetech or Dynamixel servos that are not in this list, you can add those in the [Feetech table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/feetech/tables.py) or [Dynamixel table](https://github.com/huggingface/lerobot/blob/main/lerobot/motors/dynamixel/tables.py). Depending on the model, this will require you to add model-specific information. In most cases though, there shouldn't be a lot of additions to do. @@ -41,6 +38,8 @@ In the next sections, we'll use a `FeetechMotorsBus` as the motors interface for You’ll first need to specify the config class and a string identifier (`name`) for your robot. If your robot has special needs that you'd like to be able to change easily, it should go here (e.g. port/address, baudrate). Here, we'll add the port name and one camera by default for our robot: + + ```python from dataclasses import dataclass, field @@ -64,6 +63,7 @@ class MyCoolRobotConfig(RobotConfig): } ) ``` + Have a look at our [Cameras tutorial](./cameras) to understand how to detect and add your camera. @@ -71,6 +71,7 @@ Next, we'll create our actual robot class which inherits from `Robot`. This abst Here we'll create a simple 5-DoF robot with one camera. It could be a simple arm but notice that the `Robot` abstract class does not assume anything on your robot's form factor. You can let you imagination run wild when designing new robots! + ```python from lerobot.cameras import make_cameras_from_configs from lerobot.motors import Motor, MotorNormMode @@ -96,10 +97,11 @@ class MyCoolRobot(Robot): ) self.cameras = make_cameras_from_configs(config.cameras) ``` + ## Step 2: Define Observation and Action Features -These two properties define the *interface contract* between your robot and tools that consume it (such as data collection or learning pipelines). +These two properties define the _interface contract_ between your robot and tools that consume it (such as data collection or learning pipelines). > [!WARNING] > Note that these properties must be callable even if the robot is not yet connected, so avoid relying on runtime hardware state to define them. @@ -109,6 +111,8 @@ These two properties define the *interface contract* between your robot and tool This property should return a dictionary describing the structure of sensor outputs from your robot. The keys match what `get_observation()` returns, and the values describe either the shape (for arrays/images) or the type (for simple values). Example for our 5-DoF arm with one camera: + + ```python @property def _motors_ft(self) -> dict[str, type]: @@ -130,6 +134,8 @@ def _cameras_ft(self) -> dict[str, tuple]: def observation_features(self) -> dict: return {**self._motors_ft, **self._cameras_ft} ``` + + In this case, observations consist of a simple dict storing each motor's position and a camera image. ### `action_features` @@ -137,10 +143,13 @@ In this case, observations consist of a simple dict storing each motor's positio This property describes the commands your robot expects via `send_action()`. Again, keys must match the expected input format, and values define the shape/type of each command. Here, we simply use the same joints proprioceptive features (`self._motors_ft`) as with `observation_features`: the action sent will simply the goal position for each motor. + + ```python def action_features(self) -> dict: return self._motors_ft ``` + ## Step 3: Handle Connection and Disconnection @@ -150,16 +159,19 @@ These methods should handle opening and closing communication with your hardware This property should simply reflect that communication with the robot's hardware is established. When this property is `True`, it should be possible to read and write to the hardware using `get_observation()` and `send_action()`. + ```python @property def is_connected(self) -> bool: return self.bus.is_connected and all(cam.is_connected for cam in self.cameras.values()) ``` + ### `connect()` This method should establish communication with the hardware. Moreover, if your robot needs calibration and is not calibrated, it should start a calibration procedure by default. If your robot needs some specific configuration, this should also be called here. + ```python def connect(self, calibrate: bool = True) -> None: self.bus.connect() @@ -171,25 +183,31 @@ def connect(self, calibrate: bool = True) -> None: self.configure() ``` + ### `disconnect()` This method should gracefully terminate communication with the hardware: free any related resources (threads or processes), close ports, etc. Here, we already handle this in our `MotorsBus` and `Camera` classes so we just need to call their own `disconnect()` methods: + + ```python def disconnect(self) -> None: self.bus.disconnect() for cam in self.cameras.values(): cam.disconnect() ``` + ## Step 4: Support Calibration and Configuration LeRobot supports saving and loading calibration data automatically. This is useful for joint offsets, zero positions, or sensor alignment. > Note that depending on your hardware, this may not apply. If that's the case, you can simply leave these methods as no-ops: -> ```python + + +```python > @property > def is_calibrated(self) -> bool: > return True @@ -202,7 +220,8 @@ LeRobot supports saving and loading calibration data automatically. This is usef This should reflect whether your robot has the required calibration loaded. -```python +``` +python @property def is_calibrated(self) -> bool: return self.bus.is_calibrated @@ -216,6 +235,8 @@ The goal of the calibration is twofold: It should implement the logic for calibration (if relevant) and update the `self.calibration` dictionary. If you are using Feetech or Dynamixel motors, our bus interfaces already include methods to help with this. + + ```python def calibrate(self) -> None: self.bus.disable_torque() @@ -245,11 +266,13 @@ def calibrate(self) -> None: self._save_calibration() print("Calibration saved to", self.calibration_fpath) ``` + ### `configure()` Use this to set up any configuration for your hardware (servos control modes, controller gains, etc.). This should usually be run at connection time and be idempotent. + ```python def configure(self) -> None: with self.bus.torque_disabled(): @@ -260,6 +283,7 @@ def configure(self) -> None: self.bus.write("I_Coefficient", motor, 0) self.bus.write("D_Coefficient", motor, 32) ``` + ## Step 5: Implement Sensors Reading and Action Sending @@ -269,6 +293,7 @@ These are the most important runtime functions: the core I/O loop. Returns a dictionary of sensor values from the robot. These typically include motor states, camera frames, various sensors, etc. In the LeRobot framework, these observations are what will be fed to a policy in order to predict the actions to take. The dictionary keys and structure must match `observation_features`. + ```python def get_observation(self) -> dict[str, Any]: if not self.is_connected: @@ -284,6 +309,7 @@ def get_observation(self) -> dict[str, Any]: return obs_dict ``` + ### `send_action()` @@ -291,6 +317,7 @@ Takes a dictionary that matches `action_features`, and sends it to your hardware For simplicity, we won't be adding any modification of the actions in our example here. + ```python def send_action(self, action: dict[str, Any]) -> dict[str, Any]: goal_pos = {key.removesuffix(".pos"): val for key, val in action.items()} @@ -300,6 +327,7 @@ def send_action(self, action: dict[str, Any]) -> dict[str, Any]: return action ``` + ## Adding a Teleoperator diff --git a/docs/source/notebooks.mdx b/docs/source/notebooks.mdx index 729b31a99..6a9c3b103 100644 --- a/docs/source/notebooks.mdx +++ b/docs/source/notebooks.mdx @@ -10,8 +10,8 @@ This repository contains example notebooks for using LeRobot. These notebooks de We provide a ready-to-run Google Colab notebook to help you train ACT policies using datasets from the Hugging Face Hub, with optional logging to Weights & Biases. -| Notebook | Colab | -|:---------|:------| +| Notebook | Colab | +| :------------------------------------------------------------------------------------------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | [Train ACT with LeRobot](https://github.com/huggingface/notebooks/blob/main/lerobot/training-act.ipynb) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/lerobot/training-act.ipynb) | Expected training time for 100k steps: ~1.5 hours on an NVIDIA A100 GPU with batch size of `64`. diff --git a/docs/source/smolvla.mdx b/docs/source/smolvla.mdx index 17a2bdf18..880beaa1a 100644 --- a/docs/source/smolvla.mdx +++ b/docs/source/smolvla.mdx @@ -3,9 +3,18 @@ SmolVLA is Hugging Face’s lightweight foundation model for robotics. Designed for easy fine-tuning on LeRobot datasets, it helps accelerate your development!

- SmolVLA architecture. -
- Figure 1. SmolVLA takes as input (i) multiple cameras views, (ii) the robot’s current sensorimotor state, and (iii) a natural language instruction, encoded into contextual features used to condition the action expert when generating an action chunk. + SmolVLA architecture. +
+ + Figure 1. SmolVLA takes as input (i) multiple cameras views, (ii) the + robot’s current sensorimotor state, and (iii) a natural language + instruction, encoded into contextual features used to condition the action + expert when generating an action chunk. +

## Set Up Your Environment @@ -32,6 +41,7 @@ We recommend checking out the dataset linked below for reference that was used i In this dataset, we recorded 50 episodes across 5 distinct cube positions. For each position, we collected 10 episodes of pick-and-place interactions. This structure, repeating each variation several times, helped the model generalize better. We tried similar dataset with 25 episodes, and it was not enough leading to a bad performance. So, the data quality and quantity is definitely a key. After you have your dataset available on the Hub, you are good to go to use our finetuning script to adapt SmolVLA to your application. + ## Finetune SmolVLA on your data @@ -56,7 +66,8 @@ cd lerobot && python -m lerobot.scripts.train \ ``` -You can start with a small batch size and increase it incrementally, if the GPU allows it, as long as loading times remain short. + You can start with a small batch size and increase it incrementally, if the + GPU allows it, as long as loading times remain short. Fine-tuning is an art. For a complete overview of the options for finetuning, run @@ -66,12 +77,20 @@ python -m lerobot.scripts.train --help ```

- Comparison of SmolVLA across task variations. -
- Figure 2: Comparison of SmolVLA across task variations. From left to right: (1) pick-place cube counting, (2) pick-place cube counting, (3) pick-place cube counting under perturbations, and (4) generalization on pick-and-place of the lego block with real-world SO101. + Comparison of SmolVLA across task variations. +
+ + Figure 2: Comparison of SmolVLA across task variations. From left to right: + (1) pick-place cube counting, (2) pick-place cube counting, (3) pick-place + cube counting under perturbations, and (4) generalization on pick-and-place + of the lego block with real-world SO101. +

- ## Evaluate the finetuned model and run it in real-time Similarly for when recording an episode, it is recommended that you are logged in to the HuggingFace Hub. You can follow the corresponding steps: [Record a dataset](./getting_started_real_world_robot#record-a-dataset). diff --git a/examples/4_train_policy_with_script.md b/examples/4_train_policy_with_script.md index f17411b75..d6cd6cc23 100644 --- a/examples/4_train_policy_with_script.md +++ b/examples/4_train_policy_with_script.md @@ -1,6 +1,6 @@ This tutorial will explain the training script, how to use it, and particularly how to configure everything needed for the training run. -> **Note:** The following assumes you're running these commands on a machine equipped with a cuda GPU. If you don't have one (or if you're using a Mac), you can add `--policy.device=cpu` (`--policy.device=mps` respectively). However, be advised that the code executes much slower on cpu. +> **Note:** The following assumes you're running these commands on a machine equipped with a cuda GPU. If you don't have one (or if you're using a Mac), you can add `--policy.device=cpu` (`--policy.device=mps` respectively). However, be advised that the code executes much slower on cpu. ## The training script @@ -15,17 +15,22 @@ LeRobot offers a training script at [`lerobot/scripts/train.py`](../src/lerobot/ ## Overview of the configuration system In the training script, the main function `train` expects a `TrainPipelineConfig` object: + + ```python # train.py @parser.wrap() def train(cfg: TrainPipelineConfig): ``` + You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../src/lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option) When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated to this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.) Let's have a look at a simplified example. Amongst other attributes, the training config has the following attributes: + + ```python @dataclass class TrainPipelineConfig: @@ -33,7 +38,11 @@ class TrainPipelineConfig: env: envs.EnvConfig | None = None policy: PreTrainedConfig | None = None ``` + + in which `DatasetConfig` for example is defined as such: + + ```python @dataclass class DatasetConfig: @@ -41,16 +50,17 @@ class DatasetConfig: episodes: list[int] | None = None video_backend: str = "pyav" ``` + This creates a hierarchical relationship where, for example assuming we have a `cfg` instance of `TrainPipelineConfig`, we can access the `repo_id` value with `cfg.dataset.repo_id`. From the command line, we can specify this value by using a very similar syntax `--dataset.repo_id=repo/id`. By default, every field takes its default value specified in the dataclass. If a field doesn't have a default value, it needs to be specified either from the command line or from a config file – which path is also given in the command line (more in this below). In the example above, the `dataset` field doesn't have a default value which means it must be specified. - ## Specifying values from the CLI Let's say that we want to train [Diffusion Policy](../src/lerobot/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this: + ```bash python -m lerobot.scripts.train \ --dataset.repo_id=lerobot/pusht \ @@ -59,11 +69,13 @@ python -m lerobot.scripts.train \ ``` Let's break this down: + - To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`. - To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/policies](../src/lerobot/policies) - Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/envs/configs.py`](../src/lerobot/envs/configs.py) Let's see another example. Let's say you've been training [ACT](../src/lerobot/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with: + ```bash python -m lerobot.scripts.train \ --policy.type=act \ @@ -71,10 +83,12 @@ python -m lerobot.scripts.train \ --env.type=aloha \ --output_dir=outputs/train/act_aloha_insertion ``` + > Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`. We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task. Looking at the [`AlohaEnv`](../src/lerobot/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using: + ```bash python -m lerobot.scripts.train \ --policy.type=act \ @@ -87,6 +101,7 @@ python -m lerobot.scripts.train \ ## Loading from a config file Now, let's assume that we want to reproduce the run just above. That run has produced a `train_config.json` file in its checkpoints, which serializes the `TrainPipelineConfig` instance it used: + ```json { "dataset": { @@ -110,34 +125,40 @@ Now, let's assume that we want to reproduce the run just above. That run has pro ``` We can then simply load the config values from this file using: + ```bash python -m lerobot.scripts.train \ --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \ --output_dir=outputs/train/act_aloha_transfer_2 ``` + `--config_path` is also a special argument which allows to initialize the config from a local config file. It can point to a directory that contains `train_config.json` or to the config file itself directly. Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.: + ```bash python -m lerobot.scripts.train \ --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \ --output_dir=outputs/train/act_aloha_transfer_2 --policy.n_action_steps=80 ``` + > Note: While `--output_dir` is not required in general, in this case we need to specify it since it will otherwise take the value from the `train_config.json` (which is `outputs/train/act_aloha_transfer`). In order to prevent accidental deletion of previous run checkpoints, we raise an error if you're trying to write in an existing directory. This is not the case when resuming a run, which is what you'll learn next. `--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running: + ```bash python -m lerobot.scripts.train --config_path=lerobot/diffusion_pusht ``` -will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht) +will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht) ## Resume training Being able to resume a training run is important in case it crashed or aborted for any reason. We'll demonstrate how to do that here. Let's reuse the command from the previous run and add a few more options: + ```bash python -m lerobot.scripts.train \ --policy.type=act \ @@ -150,19 +171,24 @@ python -m lerobot.scripts.train \ ``` Here we've taken care to set up the log frequency and checkpointing frequency to low numbers so we can showcase resumption. You should be able to see some logging and have a first checkpoint within 1 minute (depending on hardware). Wait for the first checkpoint to happen, you should see a line that looks like this in your terminal: + ``` INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100 ``` + Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with: + ```bash python -m lerobot.scripts.train \ --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \ --resume=true ``` + You should see from the logging that your training picks up from where it left off. Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default. You could double the number of steps of the previous run with: + ```bash python -m lerobot.scripts.train \ --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \ @@ -171,7 +197,9 @@ python -m lerobot.scripts.train \ ``` ## Outputs of a run + In the output directory, there will be a folder called `checkpoints` with the following structure: + ```bash outputs/train/run_resumption/checkpoints ├── 000100 # checkpoint_dir for training step 100 @@ -194,6 +222,7 @@ outputs/train/run_resumption/checkpoints In addition to the features currently in Draccus, we've added a special `.path` argument for the policy, which allows to load a policy as you would with `PreTrainedPolicy.from_pretrained()`. In that case, `path` can be a local directory that contains a checkpoint or a repo_id pointing to a pretrained policy on the hub. For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with: + ```bash python -m lerobot.scripts.train \ --policy.path=lerobot/act_aloha_sim_transfer_cube_human \ @@ -209,15 +238,19 @@ When doing so, keep in mind that the features of the fine-tuning dataset would h When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you configured your run correctly. The final configuration will also be saved with the checkpoint. After that, you will see training log like this one: + ``` INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774 ``` + or evaluation log: + ``` INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266 ``` These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations: + - `smpl`: number of samples seen during training. - `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task. - `epch`: number of time all unique samples are seen (epoch). @@ -235,6 +268,7 @@ Some metrics are useful for initial performance profiling. For example, if you f We'll summarize here the main use cases to remember from this tutorial. #### Train a policy from scratch – CLI + ```bash python -m lerobot.scripts.train \ --policy.type=act \ # <- select 'act' policy @@ -243,6 +277,7 @@ python -m lerobot.scripts.train \ ``` #### Train a policy from scratch - config file + CLI + ```bash python -m lerobot.scripts.train \ --config_path=path/to/pretrained_model \ # <- can also be a repo_id @@ -250,6 +285,7 @@ python -m lerobot.scripts.train \ ``` #### Resume/continue a training run + ```bash python -m lerobot.scripts.train \ --config_path=checkpoint/pretrained_model/ \ @@ -258,6 +294,7 @@ python -m lerobot.scripts.train \ ``` #### Fine-tuning + ```bash python -m lerobot.scripts.train \ --policy.path=lerobot/act_aloha_sim_transfer_cube_human \ # <- can also be a local path to a checkpoint diff --git a/pyproject.toml b/pyproject.toml index 878a36dbe..e9539037b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + [project.urls] -homepage = "https://github.com/huggingface/lerobot" +homepage = "https://huggingface.co/lerobot" +documentation = "https://huggingface.co/docs/lerobot/index" +source = "https://github.com/huggingface/lerobot" issues = "https://github.com/huggingface/lerobot/issues" discord = "https://discord.gg/s3KuuzsPFb" @@ -21,109 +27,165 @@ discord = "https://discord.gg/s3KuuzsPFb" name = "lerobot" version = "0.1.0" description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch" +readme = "README.md" +license = { text = "Apache-2.0" } +requires-python = ">=3.10" authors = [ { name = "Rémi Cadène", email = "re.cadene@gmail.com" }, { name = "Simon Alibert", email = "alibert.sim@gmail.com" }, { name = "Alexander Soare", email = "alexander.soare159@gmail.com" }, { name = "Quentin Gallouédec", email = "quentin.gallouedec@ec-lyon.fr" }, - { name = "Adil Zouitine", email = "adilzouitinegm@gmail.com" }, - { name = "Thomas Wolf", email = "thomaswolfcontact@gmail.com" }, { name = "Steven Palma", email = "imstevenpmwork@ieee.org" }, + { name = "Pepijn Kooijmans", email = "pepijnkooijmans@outlook.com"}, + { name = "Michel Aractingi", email = "michel.aractingi@gmail.com"}, + { name = "Adil Zouitine", email = "adilzouitinegm@gmail.com" }, + { name = "Dana Aubakirova", email = "danaaubakirova17@gmail.com"}, + { name = "Caroline Pascal", email = "caroline8.pascal@gmail.com"}, + { name = "Martino Russi", email = "nopyeps@gmail.com"}, + { name = "Thomas Wolf", email = "thomaswolfcontact@gmail.com" }, ] -readme = "README.md" -license = { text = "Apache-2.0" } -requires-python = ">=3.10" -keywords = ["robotics", "deep learning", "pytorch"] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", - "Topic :: Software Development :: Build Tools", - "Topic :: Scientific/Engineering :: Artificial Intelligence", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.10", + "Topic :: Software Development :: Build Tools", + "Topic :: Scientific/Engineering :: Artificial Intelligence", ] +keywords = ["lerobot", "huggingface", "robotics", "machine learning", "artificial intelligence"] + dependencies = [ - "cmake>=3.29.0.1", - "datasets>=2.19.0,<=3.6.0", - "deepdiff>=7.0.1", + + # Hugging Face dependencies + "datasets>=2.19.0,<=3.6.0", # TODO: Bumb dependency "diffusers>=0.27.2", - "draccus==0.10.0", + "huggingface-hub[hf-transfer,cli]>=0.27.1", + + # Core dependencies + "cmake>=3.29.0.1", "einops>=0.8.0", - "flask>=3.0.3", - "gdown>=5.1.0", - "gymnasium==0.29.1", # TODO(rcadene, aliberts): Make gym 1.0.0 work - "h5py>=3.10.0", - "huggingface-hub[hf-transfer,cli]>=0.27.1 ; python_version < '4.0'", - "imageio[ffmpeg]>=2.34.0", - "jsonlines>=4.0.0", - "numba>=0.59.0", - "omegaconf>=2.3.0", "opencv-python-headless>=4.9.0", - "packaging>=24.2", "av>=14.2.0", - "pymunk>=6.6.0,<7.0.0", - "pynput>=1.7.7", - "pyserial>=3.5", - "pyzmq>=26.2.1", - "rerun-sdk>=0.21.0", - "termcolor>=2.4.0", "torch>=2.2.1", "torchcodec>=0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torchvision>=0.21.0", + "jsonlines>=4.0.0", + "packaging>=24.2", + "pynput>=1.7.7", + "pyserial>=3.5", "wandb>=0.16.3", - "zarr>=2.17.0", + + "draccus==0.10.0", # TODO: Remove == + "gymnasium>=0.29.1,<1.0.0", # TODO: Bumb dependency + "rerun-sdk>=0.21.0,<0.23.0", # TODO: Bumb dependency + + # Support dependencies + "deepdiff>=7.0.1,<9.0.0", + "flask>=3.0.3,<4.0.0", + "imageio[ffmpeg]>=2.34.0,<3.0.0", + "termcolor>=2.4.0,<4.0.0", ] +# Optional dependencies [project.optional-dependencies] -aloha = ["gym-aloha>=0.1.1 ; python_version < '4.0'"] -docs = ["hf-doc-builder @ git+https://github.com/huggingface/doc-builder.git@main", "watchdog >= 6.0.0"] -dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1", "grpcio-tools==1.71.0"] -dora = [ - "gym-dora @ git+https://github.com/dora-rs/dora-lerobot.git#subdirectory=gym_dora ; python_version < '4.0'", -] -dynamixel = ["dynamixel-sdk>=3.7.31"] + +# Common +pygame-dep = ["pygame>=2.5.1"] +placo-dep = ["placo>=0.9.6"] +transformers-dep = ["transformers>=4.50.3,<4.52.0"] # TODO: Bumb dependency +grpcio-dep = ["grpcio==1.71.0"] + +# Motors feetech = ["feetech-servo-sdk>=1.0.0"] -gamepad = ["pygame>=2.5.1", "hidapi>=0.14.0"] -hopejr = ["feetech-servo-sdk>=1.0.0", "pygame>=2.5.1"] -kinematics = ["placo>=0.9.6"] +dynamixel = ["dynamixel-sdk>=3.7.31"] + +# Robots +gamepad = ["lerobot[pygame-dep]", "hidapi>=0.14.0"] +hopejr = ["lerobot[feetech]", "lerobot[pygame-dep]"] +lekiwi = ["lerobot[feetech]", "pyzmq>=26.2.1"] +kinematics = ["lerobot[placo-dep]"] intelrealsense = [ "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'", "pyrealsense2-macosx>=2.54 ; sys_platform == 'darwin'", ] -pi0 = ["transformers>=4.50.3"] -smolvla = ["transformers>=4.50.3", "num2words>=0.5.14", "accelerate>=1.7.0", "safetensors>=0.4.3"] -pusht = ["gym-pusht>=0.1.5 ; python_version < '4.0'"] stretch = [ - "hello-robot-stretch-body>=0.7.27 ; python_version < '4.0' and sys_platform == 'linux'", + "hello-robot-stretch-body>=0.7.27 ; sys_platform == 'linux'", "pyrender @ git+https://github.com/mmatl/pyrender.git ; sys_platform == 'linux'", "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'" -] -test = ["pytest>=8.1.0", "pytest-timeout>=2.4.0", "pytest-cov>=5.0.0", "pyserial>=3.5", "mock-serial>=0.0.1 ; sys_platform != 'win32'"] -hilserl = ["transformers>=4.50.3", "gym-hil>=0.1.9", "protobuf>=5.29.3", "grpcio==1.71.0", "placo>=0.9.6"] -umi = ["imagecodecs>=2024.1.1"] -video_benchmark = ["scikit-image>=0.23.2", "pandas>=2.2.2"] -xarm = ["gym-xarm>=0.1.1 ; python_version < '4.0'"] -async = ["grpcio==1.71.0", "matplotlib>=3.10.3"] +] # TODO: Currently not supported -[tool.poetry] -requires-poetry = ">=2.1" -packages = [ - { include = "lerobot", from = "src" } -] +# Policies +pi0 = ["lerobot[transformers-dep]"] +smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14", "accelerate>=1.7.0", "safetensors>=0.4.3"] +hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.9", "protobuf>=5.29.3", "lerobot[grpcio-dep]", "lerobot[placo-dep]"] + +# Features +async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3"] + +# Development +docs = ["hf-doc-builder @ git+https://github.com/huggingface/doc-builder.git@main", "watchdog >= 6.0.0"] +dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1", "grpcio-tools==1.71.0"] +test = ["pytest>=8.1.0", "pytest-timeout>=2.4.0", "pytest-cov>=5.0.0", "mock-serial>=0.0.1 ; sys_platform != 'win32'"] +video_benchmark = ["scikit-image>=0.23.2", "pandas>=2.2.2"] + +# Simulation +aloha = ["gym-aloha>=0.1.1"] +pusht = ["gym-pusht>=0.1.5", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead +xarm = ["gym-xarm>=0.1.1"] + +# ---------------- Tool Configurations ---------------- +[tool.setuptools.packages.find] +where = ["src"] [tool.ruff] -line-length = 110 target-version = "py310" +line-length = 110 exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"] [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"] +# E, W: pycodestyle errors and warnings +# F: PyFlakes +# I: isort +# UP: pyupgrade +# B: flake8-bugbear (good practices, potential bugs) +# C4: flake8-comprehensions (more concise comprehensions) +# A: flake8-builtins (shadowing builtins) +# SIM: flake8-simplify +# RUF: Ruff-specific rules +# D: pydocstyle (for docstring style/formatting) +# S: flake8-bandit (some security checks, complements Bandit) +# T20: flake8-print (discourage print statements in production code) +# N: pep8-naming +# TODO: Uncomment rules when ready to use +select = [ + "E", "W", "F", "I", "B", "C4", "T20", "N" # "SIM", "A", "S", "D", "RUF", "UP" +] +ignore = [ + "E501", # Line too long + "T201", # Print statement found + "T203", # Pprint statement found + "B008", # Perform function call in argument defaults +] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401", "F403"] +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["lerobot"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = true + [tool.bandit] exclude_dirs = [ "tests", @@ -148,6 +210,24 @@ default.extend-ignore-identifiers-re = [ "ein", ] -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +# TODO: Uncomment when ready to use +# [tool.interrogate] +# ignore-init-module = true +# ignore-init-method = true +# ignore-nested-functions = false +# ignore-magic = false +# ignore-semiprivate = false +# ignore-private = false +# ignore-property-decorators = false +# ignore-module = false +# ignore-setters = false +# fail-under = 80 +# output-format = "term-missing" +# color = true +# paths = ["src/lerobot"] + +# [tool.mypy] +# python_version = "3.10" +# warn_return_any = true +# warn_unused_configs = true +# ignore_missing_imports = false diff --git a/src/lerobot/cameras/camera.py b/src/lerobot/cameras/camera.py index 1937205b1..e435c7309 100644 --- a/src/lerobot/cameras/camera.py +++ b/src/lerobot/cameras/camera.py @@ -15,7 +15,7 @@ # limitations under the License. import abc -from typing import Any, Dict, List +from typing import Any import numpy as np @@ -69,7 +69,7 @@ class Camera(abc.ABC): @staticmethod @abc.abstractmethod - def find_cameras() -> List[Dict[str, Any]]: + def find_cameras() -> list[dict[str, Any]]: """Detects available cameras connected to the system. Returns: List[Dict[str, Any]]: A list of dictionaries, diff --git a/src/lerobot/cameras/opencv/camera_opencv.py b/src/lerobot/cameras/opencv/camera_opencv.py index 1d7a1645d..7ad9988cc 100644 --- a/src/lerobot/cameras/opencv/camera_opencv.py +++ b/src/lerobot/cameras/opencv/camera_opencv.py @@ -23,7 +23,7 @@ import platform import time from pathlib import Path from threading import Event, Lock, Thread -from typing import Any, Dict, List +from typing import Any # Fix MSMF hardware transform compatibility for Windows before importing cv2 if platform.system() == "Windows" and "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS" not in os.environ: @@ -245,7 +245,7 @@ class OpenCVCamera(Camera): ) @staticmethod - def find_cameras() -> List[Dict[str, Any]]: + def find_cameras() -> list[dict[str, Any]]: """ Detects available OpenCV cameras connected to the system. diff --git a/src/lerobot/cameras/realsense/camera_realsense.py b/src/lerobot/cameras/realsense/camera_realsense.py index 96531b694..74b055fa4 100644 --- a/src/lerobot/cameras/realsense/camera_realsense.py +++ b/src/lerobot/cameras/realsense/camera_realsense.py @@ -19,7 +19,7 @@ Provides the RealSenseCamera class for capturing frames from Intel RealSense cam import logging import time from threading import Event, Lock, Thread -from typing import Any, Dict, List +from typing import Any import cv2 import numpy as np @@ -194,7 +194,7 @@ class RealSenseCamera(Camera): logger.info(f"{self} connected.") @staticmethod - def find_cameras() -> List[Dict[str, Any]]: + def find_cameras() -> list[dict[str, Any]]: """ Detects available Intel RealSense cameras connected to the system. diff --git a/src/lerobot/cameras/realsense/configuration_realsense.py b/src/lerobot/cameras/realsense/configuration_realsense.py index 82e7c0d36..36a86876d 100644 --- a/src/lerobot/cameras/realsense/configuration_realsense.py +++ b/src/lerobot/cameras/realsense/configuration_realsense.py @@ -28,12 +28,12 @@ class RealSenseCameraConfig(CameraConfig): Example configurations for Intel RealSense D405: ```python # Basic configurations - RealSenseCameraConfig("0123456789", 30, 1280, 720) # 1280x720 @ 30FPS - RealSenseCameraConfig("0123456789", 60, 640, 480) # 640x480 @ 60FPS + RealSenseCameraConfig("0123456789", 30, 1280, 720) # 1280x720 @ 30FPS + RealSenseCameraConfig("0123456789", 60, 640, 480) # 640x480 @ 60FPS # Advanced configurations RealSenseCameraConfig("0123456789", 30, 640, 480, use_depth=True) # With depth sensing - RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation + RealSenseCameraConfig("0123456789", 30, 640, 480, rotation=Cv2Rotation.ROTATE_90) # With 90° rotation ``` Attributes: diff --git a/src/lerobot/configs/parser.py b/src/lerobot/configs/parser.py index 1da7ad83f..2296eaa20 100644 --- a/src/lerobot/configs/parser.py +++ b/src/lerobot/configs/parser.py @@ -16,9 +16,9 @@ import inspect import pkgutil import sys from argparse import ArgumentError +from collections.abc import Sequence from functools import wraps from pathlib import Path -from typing import Sequence import draccus @@ -76,9 +76,8 @@ def parse_plugin_args(plugin_arg_suffix: str, args: Sequence[str]) -> dict: - Values are the corresponding argument values Example: - >>> args = ['--env.discover_packages_path=my_package', - ... '--other_arg=value'] - >>> parse_plugin_args('discover_packages_path', args) + >>> args = ["--env.discover_packages_path=my_package", "--other_arg=value"] + >>> parse_plugin_args("discover_packages_path", args) {'env.discover_packages_path': 'my_package'} """ plugin_args = {} @@ -111,7 +110,7 @@ def load_plugin(plugin_path: str) -> None: PluginLoadError: If the plugin cannot be loaded due to import errors or if the package path is invalid. Examples: - >>> load_plugin("external_plugin.core") # Loads plugin from external package + >>> load_plugin("external_plugin.core") # Loads plugin from external package Notes: - The plugin package should handle its own registration during import diff --git a/src/lerobot/configs/policies.py b/src/lerobot/configs/policies.py index 05f3296b8..c5b2fa09e 100644 --- a/src/lerobot/configs/policies.py +++ b/src/lerobot/configs/policies.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import abc +import builtins import json import logging import os import tempfile from dataclasses import dataclass, field from pathlib import Path -from typing import Type, TypeVar +from typing import TypeVar import draccus from huggingface_hub import hf_hub_download @@ -31,7 +32,6 @@ from lerobot.optim.schedulers import LRSchedulerConfig from lerobot.utils.hub import HubMixin from lerobot.utils.utils import auto_select_torch_device, is_amp_available, is_torch_device_available -# Generic variable that is either PreTrainedConfig or a subclass thereof T = TypeVar("T", bound="PreTrainedConfig") @@ -148,7 +148,7 @@ class PreTrainedConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC): @classmethod def from_pretrained( - cls: Type[T], + cls: builtins.type[T], pretrained_name_or_path: str | Path, *, force_download: bool = False, diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py index c088a5fa1..60a4d81d5 100644 --- a/src/lerobot/configs/train.py +++ b/src/lerobot/configs/train.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import builtins import datetime as dt import os from dataclasses import dataclass, field from pathlib import Path -from typing import Type import draccus from huggingface_hub import hf_hub_download @@ -135,7 +135,7 @@ class TrainPipelineConfig(HubMixin): @classmethod def from_pretrained( - cls: Type["TrainPipelineConfig"], + cls: builtins.type["TrainPipelineConfig"], pretrained_name_or_path: str | Path, *, force_download: bool = False, diff --git a/src/lerobot/datasets/card_template.md b/src/lerobot/datasets/card_template.md index 7ee27df95..ee26a78f5 100644 --- a/src/lerobot/datasets/card_template.md +++ b/src/lerobot/datasets/card_template.md @@ -1,7 +1,8 @@ --- # For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1 # Doc / guide: https://huggingface.co/docs/hub/datasets-cards -{{ card_data }} +# prettier-ignore +{{card_data}} --- This dataset was created using [LeRobot](https://github.com/huggingface/lerobot). diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 1a3dd1e1b..46feed2bf 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -16,8 +16,8 @@ import contextlib import logging import shutil +from collections.abc import Callable from pathlib import Path -from typing import Callable import datasets import numpy as np diff --git a/src/lerobot/datasets/push_dataset_to_hub/utils.py b/src/lerobot/datasets/push_dataset_to_hub/utils.py index 6aca7b03b..5f6363a77 100644 --- a/src/lerobot/datasets/push_dataset_to_hub/utils.py +++ b/src/lerobot/datasets/push_dataset_to_hub/utils.py @@ -16,7 +16,6 @@ import inspect from concurrent.futures import ThreadPoolExecutor from pathlib import Path -from typing import Dict import datasets import numpy @@ -77,7 +76,7 @@ def check_repo_id(repo_id: str) -> None: # TODO(aliberts): remove -def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> Dict[str, torch.Tensor]: +def calculate_episode_data_index(hf_dataset: datasets.Dataset) -> dict[str, torch.Tensor]: """ Calculate episode data index for the provided HuggingFace Dataset. Relies on episode_index column of hf_dataset. diff --git a/src/lerobot/datasets/sampler.py b/src/lerobot/datasets/sampler.py index 2f6c15c15..79ac7a4b2 100644 --- a/src/lerobot/datasets/sampler.py +++ b/src/lerobot/datasets/sampler.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterator, Union +from collections.abc import Iterator import torch @@ -22,7 +22,7 @@ class EpisodeAwareSampler: def __init__( self, episode_data_index: dict, - episode_indices_to_use: Union[list, None] = None, + episode_indices_to_use: list | None = None, drop_n_first_frames: int = 0, drop_n_last_frames: int = 0, shuffle: bool = False, diff --git a/src/lerobot/datasets/transforms.py b/src/lerobot/datasets/transforms.py index 3ac1d5771..f992275b7 100644 --- a/src/lerobot/datasets/transforms.py +++ b/src/lerobot/datasets/transforms.py @@ -14,13 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections +from collections.abc import Callable, Sequence from dataclasses import dataclass, field -from typing import Any, Callable, Sequence +from typing import Any import torch from torchvision.transforms import v2 -from torchvision.transforms.v2 import Transform -from torchvision.transforms.v2 import functional as F # noqa: N812 +from torchvision.transforms.v2 import ( + Transform, + functional as F, # noqa: N812 +) class RandomSubsetApply(Transform): diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index de969d618..ef381e9e7 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -14,7 +14,7 @@ import abc from dataclasses import dataclass, field -from typing import Any, Optional +from typing import Any import draccus @@ -179,10 +179,10 @@ class EnvTransformConfig: add_joint_velocity_to_observation: bool = False add_current_to_observation: bool = False add_ee_pose_to_observation: bool = False - crop_params_dict: Optional[dict[str, tuple[int, int, int, int]]] = None - resize_size: Optional[tuple[int, int]] = None + crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None + resize_size: tuple[int, int] | None = None control_time_s: float = 20.0 - fixed_reset_joint_positions: Optional[Any] = None + fixed_reset_joint_positions: Any | None = None reset_time_s: float = 5.0 use_gripper: bool = True gripper_quantization_threshold: float | None = 0.8 @@ -195,21 +195,21 @@ class EnvTransformConfig: class HILSerlRobotEnvConfig(EnvConfig): """Configuration for the HILSerlRobotEnv environment.""" - robot: Optional[RobotConfig] = None - teleop: Optional[TeleoperatorConfig] = None - wrapper: Optional[EnvTransformConfig] = None + robot: RobotConfig | None = None + teleop: TeleoperatorConfig | None = None + wrapper: EnvTransformConfig | None = None fps: int = 10 name: str = "real_robot" mode: str = None # Either "record", "replay", None - repo_id: Optional[str] = None - dataset_root: Optional[str] = None + repo_id: str | None = None + dataset_root: str | None = None task: str = "" num_episodes: int = 10 # only for record mode episode: int = 0 device: str = "cuda" push_to_hub: bool = True - pretrained_policy_name_or_path: Optional[str] = None - reward_classifier_pretrained_path: Optional[str] = None + pretrained_policy_name_or_path: str | None = None + reward_classifier_pretrained_path: str | None = None # For the reward classifier, to record more positive examples after a success number_of_steps_after_success: int = 0 @@ -248,18 +248,18 @@ class HILEnvConfig(EnvConfig): } ) ################# args from hilserlrobotenv - reward_classifier_pretrained_path: Optional[str] = None - robot_config: Optional[RobotConfig] = None - teleop_config: Optional[TeleoperatorConfig] = None - wrapper: Optional[EnvTransformConfig] = None + reward_classifier_pretrained_path: str | None = None + robot_config: RobotConfig | None = None + teleop_config: TeleoperatorConfig | None = None + wrapper: EnvTransformConfig | None = None mode: str = None # Either "record", "replay", None - repo_id: Optional[str] = None - dataset_root: Optional[str] = None + repo_id: str | None = None + dataset_root: str | None = None num_episodes: int = 10 # only for record mode episode: int = 0 device: str = "cuda" push_to_hub: bool = True - pretrained_policy_name_or_path: Optional[str] = None + pretrained_policy_name_or_path: str | None = None # For the reward classifier, to record more positive examples after a success number_of_steps_after_success: int = 0 ############################ diff --git a/src/lerobot/find_cameras.py b/src/lerobot/find_cameras.py index aff2f8c19..be8f272ee 100644 --- a/src/lerobot/find_cameras.py +++ b/src/lerobot/find_cameras.py @@ -32,7 +32,7 @@ import concurrent.futures import logging import time from pathlib import Path -from typing import Any, Dict, List +from typing import Any import numpy as np from PIL import Image @@ -46,14 +46,14 @@ from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraCon logger = logging.getLogger(__name__) -def find_all_opencv_cameras() -> List[Dict[str, Any]]: +def find_all_opencv_cameras() -> list[dict[str, Any]]: """ Finds all available OpenCV cameras plugged into the system. Returns: A list of all available OpenCV cameras with their metadata. """ - all_opencv_cameras_info: List[Dict[str, Any]] = [] + all_opencv_cameras_info: list[dict[str, Any]] = [] logger.info("Searching for OpenCV cameras...") try: opencv_cameras = OpenCVCamera.find_cameras() @@ -66,14 +66,14 @@ def find_all_opencv_cameras() -> List[Dict[str, Any]]: return all_opencv_cameras_info -def find_all_realsense_cameras() -> List[Dict[str, Any]]: +def find_all_realsense_cameras() -> list[dict[str, Any]]: """ Finds all available RealSense cameras plugged into the system. Returns: A list of all available RealSense cameras with their metadata. """ - all_realsense_cameras_info: List[Dict[str, Any]] = [] + all_realsense_cameras_info: list[dict[str, Any]] = [] logger.info("Searching for RealSense cameras...") try: realsense_cameras = RealSenseCamera.find_cameras() @@ -88,7 +88,7 @@ def find_all_realsense_cameras() -> List[Dict[str, Any]]: return all_realsense_cameras_info -def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]: +def find_and_print_cameras(camera_type_filter: str | None = None) -> list[dict[str, Any]]: """ Finds available cameras based on an optional filter and prints their information. @@ -99,7 +99,7 @@ def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[s Returns: A list of all available cameras matching the filter, with their metadata. """ - all_cameras_info: List[Dict[str, Any]] = [] + all_cameras_info: list[dict[str, Any]] = [] if camera_type_filter: camera_type_filter = camera_type_filter.lower() @@ -153,7 +153,7 @@ def save_image( logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}") -def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None: +def create_camera_instance(cam_meta: dict[str, Any]) -> dict[str, Any] | None: """Create and connect to a camera instance based on metadata.""" cam_type = cam_meta.get("type") cam_id = cam_meta.get("id") @@ -190,7 +190,7 @@ def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None: def process_camera_image( - cam_dict: Dict[str, Any], output_dir: Path, current_time: float + cam_dict: dict[str, Any], output_dir: Path, current_time: float ) -> concurrent.futures.Future | None: """Capture and process an image from a single camera.""" cam = cam_dict["instance"] @@ -216,7 +216,7 @@ def process_camera_image( return None -def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]): +def cleanup_cameras(cameras_to_use: list[dict[str, Any]]): """Disconnect all cameras.""" logger.info(f"Disconnecting {len(cameras_to_use)} cameras...") for cam_dict in cameras_to_use: diff --git a/src/lerobot/motors/motors_bus.py b/src/lerobot/motors/motors_bus.py index 26522c7c9..597bcd3c4 100644 --- a/src/lerobot/motors/motors_bus.py +++ b/src/lerobot/motors/motors_bus.py @@ -224,7 +224,7 @@ class MotorsBus(abc.ABC): ```bash python -m lerobot.find_port.py >>> Finding all available ports for the MotorsBus. - >>> ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751'] + >>> ["/dev/tty.usbmodem575E0032081", "/dev/tty.usbmodem575E0031751"] >>> Remove the usb cable from your MotorsBus and press Enter when done. >>> The port of this MotorsBus is /dev/tty.usbmodem575E0031751. >>> Reconnect the usb cable. diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py index aa81d3cd2..4a048e63d 100644 --- a/src/lerobot/policies/act/modeling_act.py +++ b/src/lerobot/policies/act/modeling_act.py @@ -21,8 +21,8 @@ The majority of changes here involve removing unused code, unifying naming, and import math from collections import deque +from collections.abc import Callable from itertools import chain -from typing import Callable import einops import numpy as np @@ -216,7 +216,7 @@ class ACTTemporalEnsembler: continue avg *= exp_weights[:i].sum() avg += item * exp_weights[i] - avg /= exp_weights[:i+1].sum() + avg /= exp_weights[: i + 1].sum() print("online", avg) ``` """ diff --git a/src/lerobot/policies/diffusion/modeling_diffusion.py b/src/lerobot/policies/diffusion/modeling_diffusion.py index 6dad8fb89..24b273967 100644 --- a/src/lerobot/policies/diffusion/modeling_diffusion.py +++ b/src/lerobot/policies/diffusion/modeling_diffusion.py @@ -22,7 +22,7 @@ TODO(alexander-soare): import math from collections import deque -from typing import Callable +from collections.abc import Callable import einops import numpy as np diff --git a/src/lerobot/policies/pi0/paligemma_with_expert.py b/src/lerobot/policies/pi0/paligemma_with_expert.py index f0f5713e5..edc34b7c5 100644 --- a/src/lerobot/policies/pi0/paligemma_with_expert.py +++ b/src/lerobot/policies/pi0/paligemma_with_expert.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Optional, Union import torch import torch.version @@ -228,12 +227,12 @@ class PaliGemmaWithExpertModel(PreTrainedModel): # TODO: break down this huge forward into modules or functions def forward( self, - attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, - inputs_embeds: List[torch.FloatTensor] = None, - use_cache: Optional[bool] = None, - fill_kv_cache: Optional[bool] = None, + attention_mask: torch.Tensor | None = None, + position_ids: torch.LongTensor | None = None, + past_key_values: list[torch.FloatTensor] | Cache | None = None, + inputs_embeds: list[torch.FloatTensor] = None, + use_cache: bool | None = None, + fill_kv_cache: bool | None = None, ): models = [self.paligemma.language_model, self.gemma_expert.model] diff --git a/src/lerobot/policies/pretrained.py b/src/lerobot/policies/pretrained.py index d18b798a8..d745c901c 100644 --- a/src/lerobot/policies/pretrained.py +++ b/src/lerobot/policies/pretrained.py @@ -12,20 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. import abc +import builtins import logging import os from importlib.resources import files from pathlib import Path from tempfile import TemporaryDirectory -from typing import List, Type, TypeVar +from typing import TypeVar import packaging import safetensors from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE from huggingface_hub.errors import HfHubHTTPError -from safetensors.torch import load_model as load_model_as_safetensor -from safetensors.torch import save_model as save_model_as_safetensor +from safetensors.torch import load_model as load_model_as_safetensor, save_model as save_model_as_safetensor from torch import Tensor, nn from lerobot.configs.policies import PreTrainedConfig @@ -67,7 +67,7 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC): @classmethod def from_pretrained( - cls: Type[T], + cls: builtins.type[T], pretrained_name_or_path: str | Path, *, config: PreTrainedConfig | None = None, @@ -223,7 +223,7 @@ class PreTrainedPolicy(nn.Module, HubMixin, abc.ABC): logging.info(f"Model pushed to {commit_info.repo_url.url}") def generate_model_card( - self, dataset_repo_id: str, model_type: str, license: str | None, tags: List[str] | None + self, dataset_repo_id: str, model_type: str, license: str | None, tags: list[str] | None ) -> ModelCard: base_model = "lerobot/smolvla_base" if model_type == "smolvla" else None # Set a base model diff --git a/src/lerobot/policies/sac/modeling_sac.py b/src/lerobot/policies/sac/modeling_sac.py index 93cfe6c93..878f3cdd8 100644 --- a/src/lerobot/policies/sac/modeling_sac.py +++ b/src/lerobot/policies/sac/modeling_sac.py @@ -16,8 +16,9 @@ # limitations under the License. import math +from collections.abc import Callable from dataclasses import asdict -from typing import Callable, Literal +from typing import Literal import einops import numpy as np diff --git a/src/lerobot/policies/smolvla/smolvlm_with_expert.py b/src/lerobot/policies/smolvla/smolvlm_with_expert.py index 07eae8089..f3d1a693a 100644 --- a/src/lerobot/policies/smolvla/smolvlm_with_expert.py +++ b/src/lerobot/policies/smolvla/smolvlm_with_expert.py @@ -13,7 +13,6 @@ # limitations under the License. import copy -from typing import List, Optional import torch from torch import nn @@ -403,12 +402,12 @@ class SmolVLMWithExpertModel(nn.Module): def forward( self, - attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: List[torch.FloatTensor] = None, - use_cache: Optional[bool] = None, - fill_kv_cache: Optional[bool] = None, + attention_mask: torch.Tensor | None = None, + position_ids: torch.LongTensor | None = None, + past_key_values: list[torch.FloatTensor] | None = None, + inputs_embeds: list[torch.FloatTensor] = None, + use_cache: bool | None = None, + fill_kv_cache: bool | None = None, ): models = [self.get_vlm_model().text_model, self.lm_expert] model_layers = self.get_model_layers(models) diff --git a/src/lerobot/policies/tdmpc/modeling_tdmpc.py b/src/lerobot/policies/tdmpc/modeling_tdmpc.py index c27689387..664fe863d 100644 --- a/src/lerobot/policies/tdmpc/modeling_tdmpc.py +++ b/src/lerobot/policies/tdmpc/modeling_tdmpc.py @@ -24,9 +24,9 @@ The comments in this code may sometimes refer to these references: # ruff: noqa: N806 from collections import deque +from collections.abc import Callable from copy import deepcopy from functools import partial -from typing import Callable import einops import numpy as np diff --git a/src/lerobot/policies/vqbet/modeling_vqbet.py b/src/lerobot/policies/vqbet/modeling_vqbet.py index 59c820a96..b271298a3 100644 --- a/src/lerobot/policies/vqbet/modeling_vqbet.py +++ b/src/lerobot/policies/vqbet/modeling_vqbet.py @@ -18,7 +18,7 @@ import warnings from collections import deque -from typing import Callable, List +from collections.abc import Callable import einops import numpy as np @@ -901,7 +901,7 @@ class MLP(torch.nn.Sequential): def __init__( self, in_channels: int, - hidden_channels: List[int], + hidden_channels: list[int], ): layers = [] in_dim = in_channels diff --git a/src/lerobot/policies/vqbet/vqbet_utils.py b/src/lerobot/policies/vqbet/vqbet_utils.py index 03b02a280..e0afe5585 100644 --- a/src/lerobot/policies/vqbet/vqbet_utils.py +++ b/src/lerobot/policies/vqbet/vqbet_utils.py @@ -17,10 +17,10 @@ # limitations under the License. import math +from collections.abc import Callable from functools import partial from math import ceil from random import randrange -from typing import Callable import torch import torch.distributed as distributed @@ -198,7 +198,7 @@ class GPT(nn.Module): # report number of parameters n_params = sum(p.numel() for p in self.parameters()) - print("number of parameters: {:.2f}M".format(n_params / 1e6)) + print(f"number of parameters: {n_params / 1e6:.2f}M") def forward(self, input, targets=None): device = input.device @@ -255,7 +255,7 @@ class GPT(nn.Module): blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding) for mn, m in self.named_modules(): for pn, _p in m.named_parameters(): - fpn = "{}.{}".format(mn, pn) if mn else pn # full param name + fpn = f"{mn}.{pn}" if mn else pn # full param name if pn.endswith("bias"): # all biases will not be decayed no_decay.add(fpn) diff --git a/src/lerobot/record.py b/src/lerobot/record.py index c8184d40b..0b1af192e 100644 --- a/src/lerobot/record.py +++ b/src/lerobot/record.py @@ -62,7 +62,6 @@ import time from dataclasses import asdict, dataclass from pathlib import Path from pprint import pformat -from typing import List from lerobot.cameras import ( # noqa: F401 CameraConfig, # noqa: F401 @@ -190,7 +189,7 @@ def record_loop( events: dict, fps: int, dataset: LeRobotDataset | None = None, - teleop: Teleoperator | List[Teleoperator] | None = None, + teleop: Teleoperator | list[Teleoperator] | None = None, policy: PreTrainedPolicy | None = None, control_time_s: int | None = None, single_task: str | None = None, diff --git a/src/lerobot/robots/hope_jr/hope_jr.mdx b/src/lerobot/robots/hope_jr/hope_jr.mdx index 2f9ec9d89..72aa8f923 100644 --- a/src/lerobot/robots/hope_jr/hope_jr.mdx +++ b/src/lerobot/robots/hope_jr/hope_jr.mdx @@ -9,6 +9,7 @@ Follow the [installation instructions](https://github.com/huggingface/lerobot#installation) to install LeRobot. Install LeRobot with HopeJR dependencies: + ```bash pip install -e ".[hopejr]" ``` @@ -40,35 +41,39 @@ python -m lerobot.calibrate \ When running the calibration script, a calibration GUI will pop up. Finger joints are named as follows: **Thumb**: + - **CMC**: base joint connecting thumb to hand - **MCP**: knuckle joint - **PIP**: first finger joint - **DIP** : fingertip joint **Index, Middle, Ring, and Pinky fingers**: + - **Radial flexor**: Moves base of finger towards the thumb - **Ulnar flexor**: Moves base of finger towards the pinky - **PIP/DIP**: Flexes the distal and proximal phalanx of the finger Each one of these will need to be calibrated individually via the GUI. - Note that ulnar and radial flexors should have ranges of the same size (but with different offsets) in order to get symmetric movement. +Note that ulnar and radial flexors should have ranges of the same size (but with different offsets) in order to get symmetric movement.

- Setting boundaries in the hand calibration GUI - + width="100%" + >

Use the calibration interface to set the range boundaries for each joint as shown above.

- Saving calibration values - + width="100%" + >

Once you have set the appropriate boundaries for all joints, click "Save" to save the calibration values to the motors. @@ -122,16 +127,18 @@ python -m lerobot.calibrate \ ``` This will open a calibration GUI where you can set the range limits for each motor. The arm motions are organized as follows: + - **Shoulder**: pitch, yaw, and roll - **Elbow**: flex - **Wrist**: pitch, yaw, and roll

- Setting boundaries in the arm calibration GUI - + width="100%" + >

Use the calibration interface to set the range boundaries for each joint. Move each joint through its full range of motion and adjust the minimum and maximum values accordingly. Once you have set the appropriate boundaries for all joints, save the calibration. @@ -169,6 +176,7 @@ Calibration saved to /Users/your_username/.cache/huggingface/lerobot/calibration Due to global variable conflicts in the Feetech middleware, teleoperation for arm and hand must run in separate shell sessions: ### Hand + ```bash python -m lerobot.teleoperate \ --robot.type=hope_jr_hand \ @@ -184,6 +192,7 @@ python -m lerobot.teleoperate \ ``` ### Arm + ```bash python -m lerobot.teleoperate \ --robot.type=hope_jr_arm \ diff --git a/src/lerobot/robots/koch_follower/koch.mdx b/src/lerobot/robots/koch_follower/koch.mdx index f70a1802c..d0b991e74 100644 --- a/src/lerobot/robots/koch_follower/koch.mdx +++ b/src/lerobot/robots/koch_follower/koch.mdx @@ -10,15 +10,16 @@ For a visual walkthrough of the assembly process, you can refer to [this video t > [!WARNING] > Since the production of this video, we simplified the configuration phase. Because of this, two things differ from the instructions in that video: +> > - Don't plug in all the motor cables right away and wait to be instructed to do so in [Configure the motors](#configure-the-motors). > - Don't screw in the controller board (PCB) to the base right away and wait for being instructed to do so in [Configure the motors](#configure-the-motors). - ## Install LeRobot 🤗 To install LeRobot follow, our [Installation Guide](./installation) In addition to these instructions, you need to install the Dynamixel SDK: + ```bash pip install -e ".[dynamixel]" ``` @@ -28,6 +29,7 @@ pip install -e ".[dynamixel]" ### 1. Find the USB ports associated with each arm To find the port for each bus servo adapter, run this script: + ```bash python -m lerobot.find_port ``` @@ -54,6 +56,7 @@ Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your le On Linux, you might need to give access to the USB ports by running: + ```bash sudo chmod 666 /dev/ttyACM0 sudo chmod 666 /dev/ttyACM1 @@ -99,9 +102,11 @@ python -m lerobot.setup_motors \ --robot.type=koch_follower \ --robot.port=/dev/tty.usbmodem575E0031751 # <- paste here the port found at previous step ``` + + ```python from lerobot.robots.koch_follower import KochFollower, KochFollowerConfig @@ -112,10 +117,13 @@ config = KochFollowerConfig( follower = KochFollower(config) follower.setup_motors() ``` + + You should see the following instruction. + ``` Connect the controller board to the 'gripper' motor only and press enter. ``` @@ -125,22 +133,26 @@ As instructed, plug the gripper's motor. Make sure it's the only motor connected
Troubleshooting - If you get an error at that point, check your cables and make sure they are plugged in properly: -
    -
  • Power supply
  • -
  • USB cable between your computer and the controller board
  • -
  • The 3-pin cable from the controller board to the motor
  • -
+If you get an error at that point, check your cables and make sure they are plugged in properly: + +
    +
  • Power supply
  • +
  • USB cable between your computer and the controller board
  • +
  • The 3-pin cable from the controller board to the motor
  • +
+ +If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB). - If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB).
You should then see the following message: + ``` 'gripper' motor id set to 6 ``` Followed by the next instruction: + ``` Connect the controller board to the 'wrist_roll' motor only and press enter. ``` @@ -155,6 +167,7 @@ Repeat the operation for each motor as instructed. When you are done, the script will simply finish, at which point the motors are ready to be used. You can now plug the 3-pin cable from each motor to the next one, and the cable from the first motor (the 'shoulder pan' with id=1) to the controller board, which can now be attached to the base of the arm. #### Leader + Do the same steps for the leader arm but modify the command or script accordingly. @@ -165,9 +178,11 @@ python -m lerobot.setup_motors \ --teleop.type=koch_leader \ --teleop.port=/dev/tty.usbmodem575E0031751 \ # <- paste here the port found at previous step ``` + + ```python from lerobot.teleoperators.koch_leader import KochLeader, KochLeaderConfig @@ -178,6 +193,8 @@ config = KochLeaderConfig( leader = KochLeader(config) leader.setup_motors() ``` + + @@ -199,9 +216,11 @@ python -m lerobot.calibrate \ --robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --robot.id=my_awesome_follower_arm # <- Give the robot a unique name ``` + + ```python from lerobot.robots.koch_follower import KochFollowerConfig, KochFollower @@ -215,6 +234,8 @@ follower.connect(calibrate=False) follower.calibrate() follower.disconnect() ``` + + @@ -233,9 +254,11 @@ python -m lerobot.calibrate \ --teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --teleop.id=my_awesome_leader_arm # <- Give the robot a unique name ``` + + ```python from lerobot.teleoperators.koch_leader import KochLeaderConfig, KochLeader @@ -249,10 +272,12 @@ leader.connect(calibrate=False) leader.calibrate() leader.disconnect() ``` + + Congrats 🎉, your robot is all set to learn a task on its own. Start training it by following this tutorial: [Getting started with real-world robots](./getting_started_real_world_robot) > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). diff --git a/src/lerobot/robots/lekiwi/lekiwi.mdx b/src/lerobot/robots/lekiwi/lekiwi.mdx index 61b1c05c1..bb70fd26b 100644 --- a/src/lerobot/robots/lekiwi/lekiwi.mdx +++ b/src/lerobot/robots/lekiwi/lekiwi.mdx @@ -8,31 +8,43 @@ Follow this [README](https://github.com/SIGRobotics-UIUC/LeKiwi). It contains th And advise if it's your first time printing or if you don't own a 3D printer. ### Wired version + If you have the **wired** LeKiwi version, you can skip the installation of the Raspberry Pi and setting up SSH. You can also run all commands directly on your PC for both the LeKiwi scripts and the leader arm scripts for teleoperating. ## Install software on Pi + Now we have to set up the remote PC that will run on the LeKiwi Robot. This is normally a Raspberry Pi, but can be any PC that can run on 5V and has enough usb ports (2 or more) for the cameras and motor control board. ### Install OS + For setting up the Raspberry Pi and its SD-card see: [Setup PI](https://www.raspberrypi.com/documentation/computers/getting-started.html). Here is explained how to download the [Imager](https://www.raspberrypi.com/software/) to install Raspberry Pi OS or Ubuntu. ### Setup SSH + After setting up your Pi, you should enable and set up [SSH](https://www.raspberrypi.com/news/coding-on-raspberry-pi-remotely-with-visual-studio-code/) (Secure Shell Protocol) so you can log in to the Pi from your laptop without requiring a screen, keyboard, and mouse on the Pi. A great tutorial on how to do this can be found [here](https://www.raspberrypi.com/documentation/computers/remote-access.html#ssh). Logging into your Pi can be done in your Command Prompt (cmd) or, if you use VSCode you can use [this](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-ssh) extension. ### Install LeRobot on Pi 🤗 On your Raspberry Pi install LeRobot using our [Installation Guide](./installation) -In addition to these instructions, you need to install the Feetech sdk on your Pi: +In addition to these instructions, you need to install the Feetech SDK & ZeroMQ on your Pi: + ```bash -pip install -e ".[feetech]" +pip install -e ".[lekiwi]" ``` ## Install LeRobot locally + If you already have installed LeRobot on your laptop/pc you can skip this step; otherwise, please follow along as we do the same steps we did on the Pi. Follow our [Installation Guide](./installation) +In addition to these instructions, you need to install the Feetech SDK & ZeroMQ on your laptop/pc: + +```bash +pip install -e ".[lekiwi]" +``` + Great :hugs:! You are now done installing LeRobot, and we can begin assembling the SO100/SO101 arms and the mobile base :robot:. Every time you now want to use LeRobot, you can go to the `~/lerobot` folder where we installed LeRobot and run one of the commands. @@ -46,6 +58,7 @@ First, we will assemble the two SO100/SO101 arms. One to attach to the mobile ba ### Find the USB ports associated with motor board To find the port for each bus servo adapter, run this script: + ```bash python -m lerobot.find_port ``` @@ -72,6 +85,7 @@ Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your bo On Linux, you might need to give access to the USB ports by running: + ```bash sudo chmod 666 /dev/ttyACM0 sudo chmod 666 /dev/ttyACM1 @@ -96,6 +110,7 @@ Where the found port is: `/dev/ttyACM0` corresponding to your board. ### Configure motors + The instructions for configuring the motors can be found in the SO101 [docs](./so101#configure-the-motors). Besides the ids for the arm motors, we also need to set the motor ids for the mobile base. These need to be in a specific order to work. Below an image of the motor ids and motor mounting positions for the mobile base. Note that we only use one Motor Control board on LeKiwi. This means the motor ids for the wheels are 7, 8 and 9. You can run this command to setup motors for LeKiwi. It will first setup the motors for arm (id 6..1) and then setup motors for wheels (9,8,7) @@ -113,27 +128,36 @@ python -m lerobot.setup_motors \ If you are having trouble connecting to the Mobile SO100, follow these steps to diagnose and resolve the issue. #### 1. Verify IP Address Configuration + Make sure that the correct IP for the Pi is used in the commands or in your code. To check the Raspberry Pi's IP address, run (on the Pi command line): + ```bash hostname -I ``` #### 2. Check if Pi is reachable from laptop/pc + Try pinging the Raspberry Pi from your laptop: + ```bach ping ``` If the ping fails: + - Ensure the Pi is powered on and connected to the same network. - Check if SSH is enabled on the Pi. #### 3. Try SSH connection + If you can't SSH into the Pi, it might not be properly connected. Use: + ```bash ssh @ ``` + If you get a connection error: + - Ensure SSH is enabled on the Pi by running: ```bash sudo raspi-config @@ -158,10 +182,13 @@ python -m lerobot.calibrate \ We unified the calibration method for most robots, thus, the calibration steps for this SO100 arm are the same as the steps for the Koch and SO101. First, we have to move the robot to the position where each joint is in the middle of its range, then we press `Enter`. Secondly, we move all joints through their full range of motion. A video of this same process for the SO101 as reference can be found [here](https://huggingface.co/docs/lerobot/en/so101#calibration-video). ### Wired version + If you have the **wired** LeKiwi version, please run all commands on your laptop. ### Calibrate leader arm + Then, to calibrate the leader arm (which is attached to the laptop/pc). Run the following command of API example on your laptop: + @@ -171,9 +198,11 @@ python -m lerobot.calibrate \ --teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --teleop.id=my_awesome_leader_arm # <- Give the robot a unique name ``` + + ```python from lerobot.teleoperators.so100_leader import SO100LeaderConfig, SO100Leader @@ -187,6 +216,8 @@ leader.connect(calibrate=False) leader.calibrate() leader.disconnect() ``` + + @@ -196,6 +227,7 @@ leader.disconnect() > If you're using a Mac, you might need to give Terminal permission to access your keyboard for teleoperation. Go to System Preferences > Security & Privacy > Input Monitoring and check the box for Terminal. To teleoperate, SSH into your Raspberry Pi, and run `conda activate lerobot` and this command: + ```bash python -m lerobot.robots.lekiwi.lekiwi_host --robot.id=my_awesome_kiwi ``` @@ -206,7 +238,7 @@ Then on your laptop, also run `conda activate lerobot` and run the API example, python examples/lekiwi/teleoperate.py ``` -You should see on your laptop something like this: ```[INFO] Connected to remote robot at tcp://172.17.133.91:5555 and video stream at tcp://172.17.133.91:5556.``` Now you can move the leader arm and use the keyboard (w,a,s,d) to drive forward, left, backwards, right. And use (z,x) to turn left or turn right. You can use (r,f) to increase and decrease the speed of the mobile robot. There are three speed modes, see the table below: +You should see on your laptop something like this: `[INFO] Connected to remote robot at tcp://172.17.133.91:5555 and video stream at tcp://172.17.133.91:5556.` Now you can move the leader arm and use the keyboard (w,a,s,d) to drive forward, left, backwards, right. And use (z,x) to turn left or turn right. You can use (r,f) to increase and decrease the speed of the mobile robot. There are three speed modes, see the table below: | Speed Mode | Linear Speed (m/s) | Rotation Speed (deg/s) | | ---------- | ------------------ | ---------------------- | @@ -214,7 +246,6 @@ You should see on your laptop something like this: ```[INFO] Connected to remote | Medium | 0.25 | 60 | | Slow | 0.1 | 30 | - | Key | Action | | --- | -------------- | | W | Move forward | @@ -227,9 +258,10 @@ You should see on your laptop something like this: ```[INFO] Connected to remote | F | Decrease speed | > [!TIP] -> If you use a different keyboard, you can change the keys for each command in the [`LeKiwiConfig`](../src/lerobot/robot_devices/robots/configs.py). +> If you use a different keyboard, you can change the keys for each command in the [`LeKiwiConfig`](../src/lerobot/robot_devices/robots/configs.py). ### Wired version + If you have the **wired** LeKiwi version, please run all commands on your laptop. ## Record a dataset @@ -239,26 +271,32 @@ Once you're familiar with teleoperation, you can record your first dataset. We use the Hugging Face hub features for uploading your dataset. If you haven't previously used the Hub, make sure you can login via the cli using a write-access token, this token can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens). Add your token to the CLI by running this command: + ```bash huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential ``` Then store your Hugging Face repository name in a variable: + ```bash HF_USER=$(huggingface-cli whoami | head -n 1) echo $HF_USER ``` Now you can record a dataset. To record episodes and upload your dataset to the hub, execute this API example tailored for LeKiwi. Make sure to first adapt the `remote_ip`, `repo_id`, `port` and `task` in the script. If you would like to run the script for longer you can increase `NB_CYCLES_CLIENT_CONNECTION`. + ```bash python examples/lekiwi/record.py ``` #### Dataset upload + Locally, your dataset is stored in this folder: `~/.cache/huggingface/lerobot/{repo-id}`. At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/so101_test) that you can obtain by running: + ```bash echo https://huggingface.co/datasets/${HF_USER}/so101_test ``` + Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example). You can look for other LeRobot datasets on the hub by searching for `LeRobot` [tags](https://huggingface.co/datasets?other=LeRobot). @@ -274,14 +312,13 @@ Avoid adding too much variation too quickly, as it may hinder your results. If you want to dive deeper into this important topic, you can check out the [blog post](https://huggingface.co/blog/lerobot-datasets#what-makes-a-good-dataset) we wrote on what makes a good dataset. #### Troubleshooting: -- On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux). +- On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux). ## Replay an episode To replay an episode run the API example below, make sure to change `remote_ip`, `port`, LeRobotDatasetId and episode index. - ```bash python examples/lekiwi/replay.py ``` @@ -297,4 +334,4 @@ python examples/lekiwi/evaluate.py ``` > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). diff --git a/src/lerobot/robots/lekiwi/lekiwi_client.py b/src/lerobot/robots/lekiwi/lekiwi_client.py index 0ce259bb6..9a8001401 100644 --- a/src/lerobot/robots/lekiwi/lekiwi_client.py +++ b/src/lerobot/robots/lekiwi/lekiwi_client.py @@ -18,11 +18,10 @@ import base64 import json import logging from functools import cached_property -from typing import Any, Dict, Optional, Tuple +from typing import Any import cv2 import numpy as np -import zmq from lerobot.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError @@ -35,6 +34,9 @@ class LeKiwiClient(Robot): name = "lekiwi_client" def __init__(self, config: LeKiwiClientConfig): + import zmq + + self._zmq = zmq super().__init__(config) self.config = config self.id = config.id @@ -117,6 +119,7 @@ class LeKiwiClient(Robot): "LeKiwi Daemon is already connected. Do not run `robot.connect()` twice." ) + zmq = self._zmq self.zmq_context = zmq.Context() self.zmq_cmd_socket = self.zmq_context.socket(zmq.PUSH) zmq_cmd_locator = f"tcp://{self.remote_ip}:{self.port_zmq_cmd}" @@ -139,8 +142,9 @@ class LeKiwiClient(Robot): def calibrate(self) -> None: pass - def _poll_and_get_latest_message(self) -> Optional[str]: + def _poll_and_get_latest_message(self) -> str | None: """Polls the ZMQ socket for a limited time and returns the latest message string.""" + zmq = self._zmq poller = zmq.Poller() poller.register(self.zmq_observation_socket, zmq.POLLIN) @@ -167,7 +171,7 @@ class LeKiwiClient(Robot): return last_msg - def _parse_observation_json(self, obs_string: str) -> Optional[Dict[str, Any]]: + def _parse_observation_json(self, obs_string: str) -> dict[str, Any] | None: """Parses the JSON observation string.""" try: return json.loads(obs_string) @@ -175,7 +179,7 @@ class LeKiwiClient(Robot): logging.error(f"Error decoding JSON observation: {e}") return None - def _decode_image_from_b64(self, image_b64: str) -> Optional[np.ndarray]: + def _decode_image_from_b64(self, image_b64: str) -> np.ndarray | None: """Decodes a base64 encoded image string to an OpenCV image.""" if not image_b64: return None @@ -191,18 +195,18 @@ class LeKiwiClient(Robot): return None def _remote_state_from_obs( - self, observation: Dict[str, Any] - ) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]: + self, observation: dict[str, Any] + ) -> tuple[dict[str, np.ndarray], dict[str, Any]]: """Extracts frames, and state from the parsed observation.""" flat_state = {key: observation.get(key, 0.0) for key in self._state_order} state_vec = np.array([flat_state[key] for key in self._state_order], dtype=np.float32) - obs_dict: Dict[str, Any] = {**flat_state, "observation.state": state_vec} + obs_dict: dict[str, Any] = {**flat_state, "observation.state": state_vec} # Decode images - current_frames: Dict[str, np.ndarray] = {} + current_frames: dict[str, np.ndarray] = {} for cam_name, image_b64 in observation.items(): if cam_name not in self._cameras_ft: continue @@ -212,7 +216,7 @@ class LeKiwiClient(Robot): return current_frames, obs_dict - def _get_data(self) -> Tuple[Dict[str, np.ndarray], Dict[str, Any], Dict[str, Any]]: + def _get_data(self) -> tuple[dict[str, np.ndarray], dict[str, Any], dict[str, Any]]: """ Polls the video socket for the latest observation data. diff --git a/src/lerobot/robots/robot.py b/src/lerobot/robots/robot.py index 6820645cc..2a9004380 100644 --- a/src/lerobot/robots/robot.py +++ b/src/lerobot/robots/robot.py @@ -13,8 +13,9 @@ # limitations under the License. import abc +import builtins from pathlib import Path -from typing import Any, Type +from typing import Any import draccus @@ -39,7 +40,7 @@ class Robot(abc.ABC): """ # Set these in ALL subclasses - config_class: Type[RobotConfig] + config_class: builtins.type[RobotConfig] name: str def __init__(self, config: RobotConfig): diff --git a/src/lerobot/robots/so100_follower/so100.mdx b/src/lerobot/robots/so100_follower/so100.mdx index f5eea6aef..d9ff922c5 100644 --- a/src/lerobot/robots/so100_follower/so100.mdx +++ b/src/lerobot/robots/so100_follower/so100.mdx @@ -11,6 +11,7 @@ Follow this [README](https://github.com/TheRobotStudio/SO-ARM100/blob/main/SO100 To install LeRobot, follow our [Installation Guide](./installation) In addition to these instructions, you need to install the Feetech SDK: + ```bash pip install -e ".[feetech]" ``` @@ -23,6 +24,7 @@ Unlike the SO-101, the motor connectors are not easily accessible once the arm i ### 1. Find the USB ports associated with each arm To find the port for each bus servo adapter, run this script: + ```bash python -m lerobot.find_port ``` @@ -49,6 +51,7 @@ Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your le On Linux, you might need to give access to the USB ports by running: + ```bash sudo chmod 666 /dev/ttyACM0 sudo chmod 666 /dev/ttyACM1 @@ -94,9 +97,11 @@ python -m lerobot.setup_motors \ --robot.type=so100_follower \ --robot.port=/dev/tty.usbmodem585A0076841 # <- paste here the port found at previous step ``` + + ```python from lerobot.robots.so100_follower import SO100Follower, SO100FollowerConfig @@ -107,10 +112,13 @@ config = SO100FollowerConfig( follower = SO100Follower(config) follower.setup_motors() ``` + + You should see the following instruction + ``` Connect the controller board to the 'gripper' motor only and press enter. ``` @@ -120,22 +128,26 @@ As instructed, plug the gripper's motor. Make sure it's the only motor connected
Troubleshooting - If you get an error at that point, check your cables and make sure they are plugged in properly: -
    -
  • Power supply
  • -
  • USB cable between your computer and the controller board
  • -
  • The 3-pin cable from the controller board to the motor
  • -
+If you get an error at that point, check your cables and make sure they are plugged in properly: + +
    +
  • Power supply
  • +
  • USB cable between your computer and the controller board
  • +
  • The 3-pin cable from the controller board to the motor
  • +
If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB). +
You should then see the following message: + ``` 'gripper' motor id set to 6 ``` Followed by the next instruction: + ``` Connect the controller board to the 'wrist_roll' motor only and press enter. ``` @@ -150,6 +162,7 @@ Repeat the operation for each motor as instructed. When you are done, the script will simply finish, at which point the motors are ready to be used. You can now plug the 3-pin cable from each motor to the next one, and the cable from the first motor (the 'shoulder pan' with id=1) to the controller board, which can now be attached to the base of the arm. #### Leader + Do the same steps for the leader arm. @@ -162,6 +175,7 @@ python -m lerobot.setup_motors \
+ ```python from lerobot.teleoperators.so100_leader import SO100Leader, SO100LeaderConfig @@ -172,6 +186,8 @@ config = SO100LeaderConfig( leader = SO100Leader(config) leader.setup_motors() ``` + + @@ -184,7 +200,10 @@ leader.setup_motors()
@@ -193,6 +212,7 @@ leader.setup_motors() Follow the video for removing gears. You need to remove the gear for the motors of the leader arm. As a result, you will only use the position encoding of the motor and reduce friction to more easily operate the leader arm. ### Clean Parts + Remove all support material from the 3D-printed parts. The easiest way to do this is using a small screwdriver to get underneath the support material. ### Additional Guidance @@ -202,7 +222,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -216,75 +239,117 @@ This video provides visual guidance for assembling the arms, but it doesn't spec ### First Motor **Step 2: Insert Wires** + - Insert two wires into the first motor. - + **Step 3: Install in Base** + - Place the first motor into the base. - + **Step 4: Secure Motor** + - Fasten the motor with 4 screws. Two from the bottom and two from top. **Step 5: Attach Motor Holder** + - Slide over the first motor holder and fasten it using two screws (one on each side). - + **Step 6: Attach Motor Horns** + - Install both motor horns, securing the top horn with a screw. Try not to move the motor position when attaching the motor horn, especially for the leader arms, where we removed the gears. - +
- Video adding motor horn + + Video adding motor horn +
**Step 7: Attach Shoulder Part** + - Route one wire to the back of the robot and the other to the left or towards you (see photo). - Attach the shoulder part. - + **Step 8: Secure Shoulder** + - Tighten the shoulder part with 4 screws on top and 4 on the bottom -*(access bottom holes by turning the shoulder).* + _(access bottom holes by turning the shoulder)._ --- ### Second Motor Assembly **Step 9: Install Motor 2** + - Slide the second motor in from the top and link the wire from motor 1 to motor 2. - + **Step 10: Attach Shoulder Holder** + - Add the shoulder motor holder. - Ensure the wire from motor 1 to motor 2 goes behind the holder while the other wire is routed upward (see photo). - This part can be tight to assemble, you can use a workbench like the image or a similar setup to push the part around the motor.
- - - + + +
**Step 11: Secure Motor 2** + - Fasten the second motor with 4 screws. **Step 12: Attach Motor Horn** + - Attach both motor horns to motor 2, again use the horn screw. **Step 13: Attach Base** + - Install the base attachment using 2 screws. **Step 14: Attach Upper Arm** + - Attach the upper arm with 4 screws on each side. @@ -294,89 +359,144 @@ This video provides visual guidance for assembling the arms, but it doesn't spec ### Third Motor Assembly **Step 15: Install Motor 3** + - Route the motor cable from motor 2 through the cable holder to motor 3, then secure motor 3 with 4 screws. **Step 16: Attach Motor Horn** + - Attach both motor horns to motor 3 and secure one again with a horn screw. - + **Step 17: Attach Forearm** + - Connect the forearm to motor 3 using 4 screws on each side. - + --- ### Fourth Motor Assembly **Step 18: Install Motor 4** + - Slide in motor 4, attach the cable from motor 3, and secure the cable in its holder with a screw.
- - + +
**Step 19: Attach Motor Holder 4** + - Install the fourth motor holder (a tight fit). Ensure one wire is routed upward and the wire from motor 3 is routed downward (see photo). - + **Step 20: Secure Motor 4 & Attach Horn** + - Fasten motor 4 with 4 screws and attach its motor horns, use for one a horn screw. - + --- ### Wrist Assembly **Step 21: Install Motor 5** + - Insert motor 5 into the wrist holder and secure it with 2 front screws. - + **Step 22: Attach Wrist** + - Connect the wire from motor 4 to motor 5. And already insert the other wire for the gripper. - Secure the wrist to motor 4 using 4 screws on both sides. - + **Step 23: Attach Wrist Horn** + - Install only one motor horn on the wrist motor and secure it with a horn screw. - + --- ### Follower Configuration **Step 24: Attach Gripper** + - Attach the gripper to motor 5. - + **Step 25: Install Gripper Motor** + - Insert the gripper motor, connect the motor wire from motor 5 to motor 6, and secure it with 3 screws on each side. - + **Step 26: Attach Gripper Horn & Claw** + - Attach the motor horns and again use a horn screw. - Install the gripper claw and secure it with 4 screws on both sides. - + **Step 27: Mount Controller** + - Attach the motor controller to the back of the robot.
- - + +
-*Assembly complete – proceed to Leader arm assembly.* +_Assembly complete – proceed to Leader arm assembly._ --- @@ -385,31 +505,54 @@ This video provides visual guidance for assembling the arms, but it doesn't spec For the leader configuration, perform **Steps 1–23**. Make sure that you removed the motor gears from the motors. **Step 24: Attach Leader Holder** + - Mount the leader holder onto the wrist and secure it with a screw. - + **Step 25: Attach Handle** + - Attach the handle to motor 5 using 4 screws. - + **Step 26: Install Gripper Motor** + - Insert the gripper motor, secure it with 3 screws on each side, attach a motor horn using a horn screw, and connect the motor wire. - + **Step 27: Attach Trigger** + - Attach the follower trigger with 4 screws. - + **Step 28: Mount Controller** + - Attach the motor controller to the back of the robot.
- - + +
## Calibrate @@ -430,9 +573,11 @@ python -m lerobot.calibrate \ --robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --robot.id=my_awesome_follower_arm # <- Give the robot a unique name ``` + + ```python from lerobot.robots.so100_follower import SO100FollowerConfig, SO100Follower @@ -446,6 +591,8 @@ follower.connect(calibrate=False) follower.calibrate() follower.disconnect() ``` + + @@ -464,9 +611,11 @@ python -m lerobot.calibrate \ --teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --teleop.id=my_awesome_leader_arm # <- Give the robot a unique name ``` + + ```python from lerobot.teleoperators.so100_leader import SO100LeaderConfig, SO100Leader @@ -480,10 +629,12 @@ leader.connect(calibrate=False) leader.calibrate() leader.disconnect() ``` + + Congrats 🎉, your robot is all set to learn a task on its own. Start training it by following this tutorial: [Getting started with real-world robots](./getting_started_real_world_robot) > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). diff --git a/src/lerobot/robots/so101_follower/so101.mdx b/src/lerobot/robots/so101_follower/so101.mdx index c49807d93..e84336e17 100644 --- a/src/lerobot/robots/so101_follower/so101.mdx +++ b/src/lerobot/robots/so101_follower/so101.mdx @@ -12,6 +12,7 @@ And advise if it's your first time printing or if you don't own a 3D printer. To install LeRobot, follow our [Installation Guide](./installation) In addition to these instructions, you need to install the Feetech SDK: + ```bash pip install -e ".[feetech]" ``` @@ -20,16 +21,17 @@ pip install -e ".[feetech]" The follower arm uses 6x STS3215 motors with 1/345 gearing. The leader, however, uses three differently geared motors to make sure it can both sustain its own weight and it can be moved without requiring much force. Which motor is needed for which joint is shown in the table below. -| Leader-Arm Axis | Motor | Gear Ratio | -|-----------------|:-------:|:----------:| -| Base / Shoulder Pan | 1 | 1 / 191 | -| Shoulder Lift | 2 | 1 / 345 | -| Elbow Flex | 3 | 1 / 191 | -| Wrist Flex | 4 | 1 / 147 | -| Wrist Roll | 5 | 1 / 147 | -| Gripper | 6 | 1 / 147 | +| Leader-Arm Axis | Motor | Gear Ratio | +| ------------------- | :---: | :--------: | +| Base / Shoulder Pan | 1 | 1 / 191 | +| Shoulder Lift | 2 | 1 / 345 | +| Elbow Flex | 3 | 1 / 191 | +| Wrist Flex | 4 | 1 / 147 | +| Wrist Roll | 5 | 1 / 147 | +| Gripper | 6 | 1 / 147 | ### Clean Parts + Remove all support material from the 3D-printed parts. The easiest way to do this is using a small screwdriver to get underneath the support material. ### Joint 1 @@ -44,7 +46,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -57,7 +62,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -69,7 +77,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -81,7 +92,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -93,7 +107,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -109,7 +126,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -123,7 +143,10 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi
@@ -135,6 +158,7 @@ Remove all support material from the 3D-printed parts. The easiest way to do thi ### 1. Find the USB ports associated with each arm To find the port for each bus servo adapter, run this script: + ```bash python -m lerobot.find_port ``` @@ -161,6 +185,7 @@ Where the found port is: `/dev/tty.usbmodem575E0032081` corresponding to your le On Linux, you might need to give access to the USB ports by running: + ```bash sudo chmod 666 /dev/ttyACM0 sudo chmod 666 /dev/ttyACM1 @@ -198,7 +223,10 @@ The video below shows the sequence of steps for setting the motor ids.
@@ -214,9 +242,11 @@ python -m lerobot.setup_motors \ --robot.type=so101_follower \ --robot.port=/dev/tty.usbmodem585A0076841 # <- paste here the port found at previous step ``` +
+ ```python from lerobot.robots.so101_follower import SO101Follower, SO101FollowerConfig @@ -227,10 +257,13 @@ config = SO101FollowerConfig( follower = SO101Follower(config) follower.setup_motors() ``` + + You should see the following instruction + ```bash Connect the controller board to the 'gripper' motor only and press enter. ``` @@ -240,22 +273,26 @@ As instructed, plug the gripper's motor. Make sure it's the only motor connected
Troubleshooting - If you get an error at that point, check your cables and make sure they are plugged in properly: -
    -
  • Power supply
  • -
  • USB cable between your computer and the controller board
  • -
  • The 3-pin cable from the controller board to the motor
  • -
+If you get an error at that point, check your cables and make sure they are plugged in properly: + +
    +
  • Power supply
  • +
  • USB cable between your computer and the controller board
  • +
  • The 3-pin cable from the controller board to the motor
  • +
+ +If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB). - If you are using a Waveshare controller board, make sure that the two jumpers are set on the `B` channel (USB).
You should then see the following message: + ```bash 'gripper' motor id set to 6 ``` Followed by the next instruction: + ```bash Connect the controller board to the 'wrist_roll' motor only and press enter. ``` @@ -270,6 +307,7 @@ Repeat the operation for each motor as instructed. When you are done, the script will simply finish, at which point the motors are ready to be used. You can now plug the 3-pin cable from each motor to the next one, and the cable from the first motor (the 'shoulder pan' with id=1) to the controller board, which can now be attached to the base of the arm. #### Leader + Do the same steps for the leader arm. @@ -280,9 +318,11 @@ python -m lerobot.setup_motors \ --teleop.type=so101_leader \ --teleop.port=/dev/tty.usbmodem575E0031751 # <- paste here the port found at previous step ``` + + ```python from lerobot.teleoperators.so101_leader import SO101Leader, SO101LeaderConfig @@ -293,6 +333,8 @@ config = SO101LeaderConfig( leader = SO101Leader(config) leader.setup_motors() ``` + + @@ -314,9 +356,11 @@ python -m lerobot.calibrate \ --robot.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --robot.id=my_awesome_follower_arm # <- Give the robot a unique name ``` + + ```python from lerobot.robots.so101_follower import SO101FollowerConfig, SO101Follower @@ -330,6 +374,8 @@ follower.connect(calibrate=False) follower.calibrate() follower.disconnect() ``` + + @@ -339,7 +385,10 @@ The video below shows how to perform the calibration. First you need to move the
@@ -356,9 +405,11 @@ python -m lerobot.calibrate \ --teleop.port=/dev/tty.usbmodem58760431551 \ # <- The port of your robot --teleop.id=my_awesome_leader_arm # <- Give the robot a unique name ``` + + ```python from lerobot.teleoperators.so101_leader import SO101LeaderConfig, SO101Leader @@ -372,10 +423,12 @@ leader.connect(calibrate=False) leader.calibrate() leader.disconnect() ``` + + Congrats 🎉, your robot is all set to learn a task on its own. Start training it by following this tutorial: [Getting started with real-world robots](./getting_started_real_world_robot) > [!TIP] -> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). +> If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb). diff --git a/src/lerobot/robots/stretch3/README.md b/src/lerobot/robots/stretch3/README.md index 982e72571..724732286 100644 --- a/src/lerobot/robots/stretch3/README.md +++ b/src/lerobot/robots/stretch3/README.md @@ -5,16 +5,17 @@ This tutorial explains how to use [Stretch 3](https://hello-robot.com/stretch-3- Familiarize yourself with Stretch by following its [tutorials](https://docs.hello-robot.com/0.3/getting_started/hello_robot/) (recommended). To use LeRobot on Stretch, 3 options are available: + - [tethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup) - [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup) - ssh directly into Stretch (you will first need to install and configure openssh-server on stretch using one of the two above setups) - ## Install LeRobot On Stretch's CLI, follow these steps: 1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install): + ```bash mkdir -p ~/miniconda3 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh @@ -24,6 +25,7 @@ rm ~/miniconda3/miniconda.sh ``` 2. Comment out these lines in `~/.profile` (this can mess up paths used by conda and ~/.local/bin should already be in your PATH) + ``` # set PATH so it includes user's private bin if it exists if [ -d "$HOME/.local/bin" ] ; then @@ -34,21 +36,25 @@ fi 3. Restart shell or `source ~/.bashrc` 4. Create and activate a fresh conda environment for lerobot + ```bash conda create -y -n lerobot python=3.10 && conda activate lerobot ``` 5. Clone LeRobot: + ```bash git clone https://github.com/huggingface/lerobot.git ~/lerobot ``` 6. When using `miniconda`, install `ffmpeg` in your environment: + ```bash conda install ffmpeg -c conda-forge ``` 7. Install LeRobot with stretch dependencies: + ```bash cd ~/lerobot && pip install -e ".[stretch]" ``` @@ -56,6 +62,7 @@ cd ~/lerobot && pip install -e ".[stretch]" > **Note:** If you get this message, you can ignore it: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.` 8. Run a [system check](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#system-check) to make sure your robot is ready: + ```bash stretch_system_check.py ``` @@ -63,6 +70,7 @@ stretch_system_check.py > **Note:** You may need to free the "robot process" after booting Stretch by running `stretch_free_robot_process.py`. For more info this Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#turning-off-gamepad-teleoperation). You should get something like this: + ```bash For use with S T R E T C H (R) from Hello Robot Inc. --------------------------------------------------------------------- @@ -89,11 +97,13 @@ Serial Number = stretch-se3-3054 **Calibrate (Optional)** Before operating Stretch, you need to [home](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#homing) it first. Be mindful about giving Stretch some space as this procedure will move the robot's arm and gripper. Now run this command: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=stretch \ --control.type=calibrate ``` + This is equivalent to running `stretch_robot_home.py` > **Note:** If you run any of the LeRobot scripts below and Stretch is not properly homed, it will automatically home/calibrate first. @@ -104,28 +114,33 @@ Before trying teleoperation, you need to activate the gamepad controller by pres Now try out teleoperation (see above documentation to learn about the gamepad controls): > **NOTE:** To visualize the data, enable `--control.display_data=true`. This streams the data using `rerun`. + ```bash python lerobot/scripts/control_robot.py \ --robot.type=stretch \ --control.type=teleoperate ``` + This is essentially the same as running `stretch_gamepad_teleop.py` **Record a dataset** Once you're familiar with the gamepad controls and after a bit of practice, you can try to record your first dataset with Stretch. If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens): + ```bash huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential ``` Store your Hugging Face repository name in a variable to run these commands: + ```bash HF_USER=$(huggingface-cli whoami | head -n 1) echo $HF_USER ``` Record one episode: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=stretch \ @@ -145,6 +160,7 @@ python lerobot/scripts/control_robot.py \ **Replay an episode** Now try to replay this episode (make sure the robot's initial position is the same): + ```bash python lerobot/scripts/control_robot.py \ --robot.type=stretch \ diff --git a/src/lerobot/robots/viperx/README.md b/src/lerobot/robots/viperx/README.md index 445368e7a..4e90c99c7 100644 --- a/src/lerobot/robots/viperx/README.md +++ b/src/lerobot/robots/viperx/README.md @@ -4,12 +4,12 @@ This tutorial explains how to use [Aloha and Aloha 2 stationary](https://www.tro Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/2.0/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer. - ## Install LeRobot On your computer: 1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install): + ```bash mkdir -p ~/miniconda3 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh @@ -21,29 +21,34 @@ rm ~/miniconda3/miniconda.sh 2. Restart shell or `source ~/.bashrc` 3. Create and activate a fresh conda environment for lerobot + ```bash conda create -y -n lerobot python=3.10 && conda activate lerobot ``` 4. Clone LeRobot: + ```bash git clone https://github.com/huggingface/lerobot.git ~/lerobot ``` 5. When using `miniconda`, install `ffmpeg` in your environment: + ```bash conda install ffmpeg -c conda-forge ``` 6. Install LeRobot with dependencies for the Aloha motors (dynamixel) and cameras (intelrealsense): + ```bash cd ~/lerobot && pip install -e ".[dynamixel, intelrealsense]" ``` ## Teleoperate -**/!\ FOR SAFETY, READ THIS /!\** +\*\*/!\ FOR SAFETY, READ THIS /!\*\* Teleoperation consists in manually operating the leader arms to move the follower arms. Importantly: + 1. Make sure your leader arms are in the same position as the follower arms, so that the follower arms don't move too fast to match the leader arms, 2. Our code assumes that your robot has been assembled following Trossen Robotics instructions. This allows us to skip calibration, as we use the pre-defined calibration files in `.cache/calibration/aloha_default`. If you replace a motor, make sure you follow the exact instructions from Trossen Robotics. @@ -59,6 +64,7 @@ python lerobot/scripts/control_robot.py \ ``` By adding `--robot.max_relative_target=5`, we override the default value for `max_relative_target` defined in [`AlohaRobotConfig`](lerobot/robot_devices/robots/configs.py). It is expected to be `5` to limit the magnitude of the movement for more safety, but the teleoperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot.max_relative_target=null` to the command line: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=aloha \ @@ -71,17 +77,20 @@ python lerobot/scripts/control_robot.py \ Once you're familiar with teleoperation, you can record your first dataset with Aloha. If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens): + ```bash huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential ``` Store your Hugging Face repository name in a variable to run these commands: + ```bash HF_USER=$(huggingface-cli whoami | head -n 1) echo $HF_USER ``` Record 2 episodes and upload your dataset to the hub: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=aloha \ @@ -101,11 +110,13 @@ python lerobot/scripts/control_robot.py \ ## Visualize a dataset If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by: + ```bash echo ${HF_USER}/aloha_test ``` If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with: + ```bash python -m lerobot.scripts.visualize_dataset_html \ --repo-id ${HF_USER}/aloha_test @@ -113,10 +124,11 @@ python -m lerobot.scripts.visualize_dataset_html \ ## Replay an episode -**/!\ FOR SAFETY, READ THIS /!\** +\*\*/!\ FOR SAFETY, READ THIS /!\*\* Replay consists in automatically replaying the sequence of actions (i.e. goal positions for your motors) recorded in a given dataset episode. Make sure the current initial position of your robot is similar to the one in your episode, so that your follower arms don't move too fast to go to the first goal positions. For safety, you might want to add `--robot.max_relative_target=5` to your command line as explained above. Now try to replay the first episode on your robot: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=aloha \ @@ -130,6 +142,7 @@ python lerobot/scripts/control_robot.py \ ## Train a policy To train a policy to control your robot, use the [`python -m lerobot.scripts.train`](../src/lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: + ```bash python -m lerobot.scripts.train \ --dataset.repo_id=${HF_USER}/aloha_test \ @@ -141,10 +154,11 @@ python -m lerobot.scripts.train \ ``` Let's explain it: + 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/aloha_test`. 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../src/lerobot/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset. -4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. -5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. +3. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon. +4. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. For more information on the `train` script see the previous tutorial: [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md) @@ -153,6 +167,7 @@ Training should take several hours. You will find checkpoints in `outputs/train/ ## Evaluate your policy You can use the `record` function from [`lerobot/scripts/control_robot.py`](../src/lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes: + ```bash python lerobot/scripts/control_robot.py \ --robot.type=aloha \ @@ -171,7 +186,8 @@ python lerobot/scripts/control_robot.py \ ``` As you can see, it's almost the same command as previously used to record your training dataset. Two things changed: -1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_aloha_test`). + +1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_aloha_test`). 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_aloha_test`). 3. We use `--control.num_image_writer_processes=1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constant 30 fps during inference. Feel free to explore different values for `--control.num_image_writer_processes`. diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index d85ac27b3..7c5aec48a 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -50,12 +50,12 @@ import json import logging import threading import time +from collections.abc import Callable from contextlib import nullcontext from copy import deepcopy from dataclasses import asdict from pathlib import Path from pprint import pformat -from typing import Callable import einops import gymnasium as gym diff --git a/src/lerobot/scripts/rl/crop_dataset_roi.py b/src/lerobot/scripts/rl/crop_dataset_roi.py index 0b71b5363..69904b740 100644 --- a/src/lerobot/scripts/rl/crop_dataset_roi.py +++ b/src/lerobot/scripts/rl/crop_dataset_roi.py @@ -18,7 +18,6 @@ import argparse import json from copy import deepcopy from pathlib import Path -from typing import Dict, Tuple import cv2 import torch @@ -162,10 +161,10 @@ def get_image_from_lerobot_dataset(dataset: LeRobotDataset): def convert_lerobot_dataset_to_cropper_lerobot_dataset( original_dataset: LeRobotDataset, - crop_params_dict: Dict[str, Tuple[int, int, int, int]], + crop_params_dict: dict[str, tuple[int, int, int, int]], new_repo_id: str, new_dataset_root: str, - resize_size: Tuple[int, int] = (128, 128), + resize_size: tuple[int, int] = (128, 128), push_to_hub: bool = False, task: str = "", ) -> LeRobotDataset: diff --git a/src/lerobot/scripts/rl/gym_manipulator.py b/src/lerobot/scripts/rl/gym_manipulator.py index 673043b6e..c8be6b7dd 100644 --- a/src/lerobot/scripts/rl/gym_manipulator.py +++ b/src/lerobot/scripts/rl/gym_manipulator.py @@ -39,8 +39,9 @@ Example: import logging import time from collections import deque +from collections.abc import Sequence from threading import Lock -from typing import Annotated, Any, Sequence +from typing import Annotated, Any import gymnasium as gym import numpy as np diff --git a/src/lerobot/scripts/rl/learner.py b/src/lerobot/scripts/rl/learner.py index cb88895cf..f9f3901ce 100644 --- a/src/lerobot/scripts/rl/learner.py +++ b/src/lerobot/scripts/rl/learner.py @@ -87,12 +87,10 @@ from lerobot.utils.process import ProcessSignalHandler from lerobot.utils.random_utils import set_seed from lerobot.utils.train_utils import ( get_step_checkpoint_dir, + load_training_state as utils_load_training_state, save_checkpoint, update_last_checkpoint, ) -from lerobot.utils.train_utils import ( - load_training_state as utils_load_training_state, -) from lerobot.utils.transition import move_state_dict_to_device, move_transition_to_device from lerobot.utils.utils import ( format_big_number, diff --git a/src/lerobot/scripts/server/configs.py b/src/lerobot/scripts/server/configs.py index 7058842ae..5be46485e 100644 --- a/src/lerobot/scripts/server/configs.py +++ b/src/lerobot/scripts/server/configs.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections.abc import Callable from dataclasses import dataclass, field -from typing import Callable import torch diff --git a/src/lerobot/scripts/server/robot_client.py b/src/lerobot/scripts/server/robot_client.py index 44d9cdf77..68166de6f 100644 --- a/src/lerobot/scripts/server/robot_client.py +++ b/src/lerobot/scripts/server/robot_client.py @@ -36,10 +36,11 @@ import logging import pickle # nosec import threading import time +from collections.abc import Callable from dataclasses import asdict from pprint import pformat from queue import Queue -from typing import Any, Callable, Optional +from typing import Any import draccus import grpc @@ -231,7 +232,7 @@ class RobotClient: def _aggregate_action_queues( self, incoming_actions: list[TimedAction], - aggregate_fn: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + aggregate_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None, ): """Finds the same timestep actions in the queue and aggregates them using the aggregate_fn""" if aggregate_fn is None: diff --git a/src/lerobot/scripts/visualize_dataset.py b/src/lerobot/scripts/visualize_dataset.py index 37db66ddf..51ead0dd1 100644 --- a/src/lerobot/scripts/visualize_dataset.py +++ b/src/lerobot/scripts/visualize_dataset.py @@ -65,8 +65,8 @@ import argparse import gc import logging import time +from collections.abc import Iterator from pathlib import Path -from typing import Iterator import numpy as np import rerun as rr diff --git a/src/lerobot/teleoperators/homunculus/homunculus_arm.py b/src/lerobot/teleoperators/homunculus/homunculus_arm.py index dfce0c88e..6f5137af9 100644 --- a/src/lerobot/teleoperators/homunculus/homunculus_arm.py +++ b/src/lerobot/teleoperators/homunculus/homunculus_arm.py @@ -18,7 +18,7 @@ import logging import threading from collections import deque from pprint import pformat -from typing import Deque, Dict, Optional +from typing import Deque import serial @@ -60,7 +60,7 @@ class HomunculusArm(Teleoperator): self.n: int = n self.alpha: float = 2 / (n + 1) # one deque *per joint* so we can inspect raw history if needed - self._buffers: Dict[str, Deque[int]] = { + self._buffers: dict[str, Deque[int]] = { joint: deque(maxlen=n) for joint in ( "shoulder_pitch", @@ -73,7 +73,7 @@ class HomunculusArm(Teleoperator): ) } # running EMA value per joint – lazily initialised on first read - self._ema: Dict[str, Optional[float]] = dict.fromkeys(self._buffers) + self._ema: dict[str, float | None] = dict.fromkeys(self._buffers) self._state: dict[str, float] | None = None self.new_state_event = threading.Event() @@ -217,9 +217,9 @@ class HomunculusArm(Teleoperator): return normalized_values - def _apply_ema(self, raw: Dict[str, int]) -> Dict[str, float]: + def _apply_ema(self, raw: dict[str, int]) -> dict[str, float]: """Update buffers & running EMA values; return smoothed dict.""" - smoothed: Dict[str, float] = {} + smoothed: dict[str, float] = {} for joint, value in raw.items(): # maintain raw history self._buffers[joint].append(value) diff --git a/src/lerobot/teleoperators/homunculus/homunculus_glove.py b/src/lerobot/teleoperators/homunculus/homunculus_glove.py index d367a2a7c..7b0ced9f6 100644 --- a/src/lerobot/teleoperators/homunculus/homunculus_glove.py +++ b/src/lerobot/teleoperators/homunculus/homunculus_glove.py @@ -18,7 +18,7 @@ import logging import threading from collections import deque from pprint import pformat -from typing import Deque, Dict, Optional +from typing import Deque import serial @@ -97,9 +97,9 @@ class HomunculusGlove(Teleoperator): self.n: int = n self.alpha: float = 2 / (n + 1) # one deque *per joint* so we can inspect raw history if needed - self._buffers: Dict[str, Deque[int]] = {joint: deque(maxlen=n) for joint in self.joints} + self._buffers: dict[str, Deque[int]] = {joint: deque(maxlen=n) for joint in self.joints} # running EMA value per joint – lazily initialised on first read - self._ema: Dict[str, Optional[float]] = dict.fromkeys(self._buffers) + self._ema: dict[str, float | None] = dict.fromkeys(self._buffers) self._state: dict[str, float] | None = None self.new_state_event = threading.Event() @@ -248,9 +248,9 @@ class HomunculusGlove(Teleoperator): return normalized_values - def _apply_ema(self, raw: Dict[str, int]) -> Dict[str, int]: + def _apply_ema(self, raw: dict[str, int]) -> dict[str, int]: """Update buffers & running EMA values; return smoothed dict as integers.""" - smoothed: Dict[str, int] = {} + smoothed: dict[str, int] = {} for joint, value in raw.items(): # maintain raw history self._buffers[joint].append(value) diff --git a/src/lerobot/teleoperators/teleoperator.py b/src/lerobot/teleoperators/teleoperator.py index 49f259c17..c360ee7bb 100644 --- a/src/lerobot/teleoperators/teleoperator.py +++ b/src/lerobot/teleoperators/teleoperator.py @@ -13,8 +13,9 @@ # limitations under the License. import abc +import builtins from pathlib import Path -from typing import Any, Type +from typing import Any import draccus @@ -37,7 +38,7 @@ class Teleoperator(abc.ABC): """ # Set these in ALL subclasses - config_class: Type[TeleoperatorConfig] + config_class: builtins.type[TeleoperatorConfig] name: str def __init__(self, config: TeleoperatorConfig): diff --git a/src/lerobot/templates/lerobot_modelcard_template.md b/src/lerobot/templates/lerobot_modelcard_template.md index 64ad7196c..7b7aaa84a 100644 --- a/src/lerobot/templates/lerobot_modelcard_template.md +++ b/src/lerobot/templates/lerobot_modelcard_template.md @@ -1,7 +1,8 @@ --- # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Doc / guide: https://huggingface.co/docs/hub/model-cards -{{ card_data }} +# prettier-ignore +{{card_data}} --- # Model Card for {{ model_name | default("Model ID", true) }} @@ -53,7 +54,7 @@ python -m lerobot.scripts.train \ --wandb.enable=true ``` -*Writes checkpoints to `outputs/train//checkpoints/`.* +_Writes checkpoints to `outputs/train//checkpoints/`._ ### Evaluate the policy/run inference @@ -71,4 +72,4 @@ Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a ## Model Details -* **License:** {{ license | default("\[More Information Needed]", true) }} +- **License:** {{ license | default("\[More Information Needed]", true) }} diff --git a/src/lerobot/utils/benchmark.py b/src/lerobot/utils/benchmark.py index 4b08e6f6d..d9e5e62bb 100644 --- a/src/lerobot/utils/benchmark.py +++ b/src/lerobot/utils/benchmark.py @@ -46,11 +46,13 @@ class TimeBenchmark(ContextDecorator): benchmark = TimeBenchmark() + def context_manager_example(): with benchmark: time.sleep(0.01) print(f"Block took {benchmark.result_ms:.2f} milliseconds") + threads = [] for _ in range(3): t1 = threading.Thread(target=context_manager_example) diff --git a/src/lerobot/utils/buffer.py b/src/lerobot/utils/buffer.py index 7f8d989dd..d9ffa899c 100644 --- a/src/lerobot/utils/buffer.py +++ b/src/lerobot/utils/buffer.py @@ -15,8 +15,9 @@ # limitations under the License. import functools +from collections.abc import Callable, Sequence from contextlib import suppress -from typing import Callable, Sequence, TypedDict +from typing import TypedDict import torch import torch.nn.functional as F # noqa: N812 diff --git a/src/lerobot/utils/hub.py b/src/lerobot/utils/hub.py index df7435c0f..566701b31 100644 --- a/src/lerobot/utils/hub.py +++ b/src/lerobot/utils/hub.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import builtins from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Type, TypeVar +from typing import Any, TypeVar from huggingface_hub import HfApi from huggingface_hub.utils import validate_hf_hub_args @@ -85,7 +86,7 @@ class HubMixin: @classmethod @validate_hf_hub_args def from_pretrained( - cls: Type[T], + cls: builtins.type[T], pretrained_name_or_path: str | Path, *, force_download: bool = False, diff --git a/src/lerobot/utils/random_utils.py b/src/lerobot/utils/random_utils.py index 31fed1da6..da3ecf37f 100644 --- a/src/lerobot/utils/random_utils.py +++ b/src/lerobot/utils/random_utils.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import random +from collections.abc import Generator from contextlib import contextmanager from pathlib import Path -from typing import Any, Generator +from typing import Any import numpy as np import torch diff --git a/src/lerobot/utils/utils.py b/src/lerobot/utils/utils.py index 7a9717dce..6e13646b0 100644 --- a/src/lerobot/utils/utils.py +++ b/src/lerobot/utils/utils.py @@ -185,10 +185,10 @@ def print_cuda_memory_usage(): gc.collect() # Also clear the cache if you want to fully release the memory torch.cuda.empty_cache() - print("Current GPU Memory Allocated: {:.2f} MB".format(torch.cuda.memory_allocated(0) / 1024**2)) - print("Maximum GPU Memory Allocated: {:.2f} MB".format(torch.cuda.max_memory_allocated(0) / 1024**2)) - print("Current GPU Memory Reserved: {:.2f} MB".format(torch.cuda.memory_reserved(0) / 1024**2)) - print("Maximum GPU Memory Reserved: {:.2f} MB".format(torch.cuda.max_memory_reserved(0) / 1024**2)) + print(f"Current GPU Memory Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB") + print(f"Maximum GPU Memory Allocated: {torch.cuda.max_memory_allocated(0) / 1024**2:.2f} MB") + print(f"Current GPU Memory Reserved: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB") + print(f"Maximum GPU Memory Reserved: {torch.cuda.max_memory_reserved(0) / 1024**2:.2f} MB") def capture_timestamp_utc(): diff --git a/tests/configs/test_plugin_loading.py b/tests/configs/test_plugin_loading.py index e81057c95..3ec60a485 100644 --- a/tests/configs/test_plugin_loading.py +++ b/tests/configs/test_plugin_loading.py @@ -15,9 +15,9 @@ # limitations under the License. import sys +from collections.abc import Generator from dataclasses import dataclass from pathlib import Path -from typing import Generator import pytest diff --git a/tests/mocks/mock_dynamixel.py b/tests/mocks/mock_dynamixel.py index 00403d146..84026fc34 100644 --- a/tests/mocks/mock_dynamixel.py +++ b/tests/mocks/mock_dynamixel.py @@ -15,7 +15,7 @@ # limitations under the License. import abc -from typing import Callable +from collections.abc import Callable import dynamixel_sdk as dxl import serial diff --git a/tests/mocks/mock_feetech.py b/tests/mocks/mock_feetech.py index 041c09421..33cbc41d6 100644 --- a/tests/mocks/mock_feetech.py +++ b/tests/mocks/mock_feetech.py @@ -15,7 +15,7 @@ # limitations under the License. import abc -from typing import Callable +from collections.abc import Callable import scservo_sdk as scs import serial diff --git a/tests/motors/test_dynamixel.py b/tests/motors/test_dynamixel.py index d990b5b0f..e0dbe713a 100644 --- a/tests/motors/test_dynamixel.py +++ b/tests/motors/test_dynamixel.py @@ -16,7 +16,7 @@ import re import sys -from typing import Generator +from collections.abc import Generator from unittest.mock import MagicMock, patch import pytest diff --git a/tests/motors/test_feetech.py b/tests/motors/test_feetech.py index d6ea1db20..31e4a9018 100644 --- a/tests/motors/test_feetech.py +++ b/tests/motors/test_feetech.py @@ -16,7 +16,7 @@ import re import sys -from typing import Generator +from collections.abc import Generator from unittest.mock import MagicMock, patch import pytest diff --git a/tests/utils/test_replay_buffer.py b/tests/utils/test_replay_buffer.py index 260276032..a53d7ba8c 100644 --- a/tests/utils/test_replay_buffer.py +++ b/tests/utils/test_replay_buffer.py @@ -15,7 +15,7 @@ # limitations under the License. import sys -from typing import Callable +from collections.abc import Callable import pytest import torch From 7e9f955b4023887439eba681f14bbb81b40c1357 Mon Sep 17 00:00:00 2001 From: Juan Pizarro Date: Thu, 17 Jul 2025 17:01:48 +0200 Subject: [PATCH 17/20] fix(hil-serl): drain queue on get_last_item_from_queue (#1524) * fix(hil-serl): drain queue on get_last_item_from_queue * parametrize queue tests * revert changes for Darwin * revert parametrize queue tests * add test_get_last_item_multiple_items_with_torch_queue * update test_get_last_item_multiple_items_with_torch_queue * update test_get_last_item_multiple_items_with_torch_queue --- src/lerobot/utils/queue.py | 21 +++++++++++++++++---- tests/utils/test_queue.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/lerobot/utils/queue.py b/src/lerobot/utils/queue.py index ceb30e2bf..864d798ac 100644 --- a/src/lerobot/utils/queue.py +++ b/src/lerobot/utils/queue.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import platform +from contextlib import suppress from queue import Empty from typing import Any @@ -30,10 +32,21 @@ def get_last_item_from_queue(queue: Queue, block=True, timeout: float = 0.1) -> item = None # Drain queue and keep only the most recent parameters - try: - while True: + if platform.system() == "Darwin": + # On Mac, avoid using `qsize` due to unreliable implementation. + # There is a comment on `qsize` code in the Python source: + # Raises NotImplementedError on Mac OSX because of broken sem_getvalue() + try: + while True: + item = queue.get_nowait() + except Empty: + pass + + return item + + # Details about using qsize in https://github.com/huggingface/lerobot/issues/1523 + while queue.qsize() > 0: + with suppress(Empty): item = queue.get_nowait() - except Empty: - pass return item diff --git a/tests/utils/test_queue.py b/tests/utils/test_queue.py index 0a0d21770..6e42acdb7 100644 --- a/tests/utils/test_queue.py +++ b/tests/utils/test_queue.py @@ -18,6 +18,8 @@ import threading import time from queue import Queue +from torch.multiprocessing import Queue as TorchMPQueue + from lerobot.utils.queue import get_last_item_from_queue @@ -46,6 +48,20 @@ def test_get_last_item_multiple_items(): assert queue.empty() +def test_get_last_item_multiple_items_with_torch_queue(): + """Test getting the last item when queue has multiple items.""" + queue = TorchMPQueue() + items = ["first", "second", "third", "fourth", "last"] + + for item in items: + queue.put(item) + + result = get_last_item_from_queue(queue) + + assert result == "last" + assert queue.empty() + + def test_get_last_item_different_types(): """Test with different data types in the queue.""" queue = Queue() From 38d3737f09dc3cc8f777a57c9f5a89dd541b464c Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Thu, 17 Jul 2025 18:07:07 +0200 Subject: [PATCH 18/20] feat(ci): add new & clean dockerfiles (#1525) --- .github/workflows/test-docker-build.yml | 4 +- docker/Dockerfile.internal | 60 +++++++++++++++++++++++++ docker/Dockerfile.user | 50 +++++++++++++++++++++ pyproject.toml | 31 ++++++++++--- 4 files changed, 139 insertions(+), 6 deletions(-) create mode 100644 docker/Dockerfile.internal create mode 100644 docker/Dockerfile.user diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml index 7a1e93274..c33813418 100644 --- a/.github/workflows/test-docker-build.yml +++ b/.github/workflows/test-docker-build.yml @@ -20,7 +20,9 @@ on: pull_request: paths: # Run only when DockerFile files are modified - - "docker/**" + - "docker/lerobot-cpu/**" + - "docker/lerobot-gpu/**" + - "docker/lerobot-gpu-dev/**" permissions: {} diff --git a/docker/Dockerfile.internal b/docker/Dockerfile.internal new file mode 100644 index 000000000..051606449 --- /dev/null +++ b/docker/Dockerfile.internal @@ -0,0 +1,60 @@ +# Dockerfile.internal +# This Dockerfile is designed for HuggingFace internal CI environments +# that require GPU access. It starts from an NVIDIA CUDA base image. + +# docker build -f docker/Dockerfile.internal -t lerobot-ci . + +# Configure the base image for CI with GPU access +ARG CUDA_VERSION=12.9.1 +ARG OS_VERSION=24.04 +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION} + +# Define Python version argument +ARG PYTHON_VERSION=3.10 + +# Configure environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + MUJOCO_GL="egl" \ + PATH="/lerobot/.venv/bin:$PATH" + +# Install Python, system dependencies, and uv (as root) +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + build-essential git curl \ + libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + libusb-1.0-0-dev \ + speech-dispatcher libgeos-dev \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create application directory and set permissions +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot + +# Switch to the non-root user +USER user_lerobot + +# Create the virtual environment +# We use a virtual environment inside the container—even though the container itself \ +# provides isolation—to ensure compatibility with the cluster and to prevent \ +# issues with MuJoCo and OpenGL drivers. +RUN uv venv --python python${PYTHON_VERSION} + +# Install Python dependencies for caching +COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ +RUN uv pip install --no-cache ".[all]" + +# Copy the rest of the application source code +COPY --chown=user_lerobot:user_lerobot . . + +# Set the default command +CMD ["/bin/bash"] diff --git a/docker/Dockerfile.user b/docker/Dockerfile.user new file mode 100644 index 000000000..ce63f5530 --- /dev/null +++ b/docker/Dockerfile.user @@ -0,0 +1,50 @@ +# Dockerfile.user +# This Dockerfile is designed for a lerobot user who wants to +# experiment with the project. It starts from an Python Slim base image. + +# docker build -f docker/Dockerfile.user -t lerobot-user . +# docker run -it --rm lerobot-user + +# Configure the base image +ARG PYTHON_VERSION=3.10 +FROM python:${PYTHON_VERSION}-slim + +# Configure environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + MUJOCO_GL="egl" \ + PATH="/lerobot/.venv/bin:$PATH" + +# Install system dependencies and uv (as root) +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential git curl \ + libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + libusb-1.0-0-dev \ + speech-dispatcher libgeos-dev \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create application directory and set permissions +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot + +# Switch to the non-root user +USER user_lerobot + +# Create the virtual environment +# We use a virtual environment inside the container—even though the container itself \ +# provides isolation—to closely resemble local development and allow users to \ +# run other Python projects in the same container without dependency conflicts. +RUN uv venv + +# Install Python dependencies for caching +COPY --chown=user_lerobot:user_lerobot pyproject.toml README.md ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ +RUN uv pip install --no-cache ".[all]" + +# Copy the rest of the application code +COPY --chown=user_lerobot:user_lerobot . . + +# Set the default command +CMD ["/bin/bash"] diff --git a/pyproject.toml b/pyproject.toml index e9539037b..e0d754f53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,11 +110,11 @@ intelrealsense = [ "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'", "pyrealsense2-macosx>=2.54 ; sys_platform == 'darwin'", ] -stretch = [ - "hello-robot-stretch-body>=0.7.27 ; sys_platform == 'linux'", - "pyrender @ git+https://github.com/mmatl/pyrender.git ; sys_platform == 'linux'", - "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'" -] # TODO: Currently not supported +# stretch = [ +# "hello-robot-stretch-body>=0.7.27 ; sys_platform == 'linux'", +# "pyrender @ git+https://github.com/mmatl/pyrender.git ; sys_platform == 'linux'", +# "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'" +# ] # TODO: Currently not supported # Policies pi0 = ["lerobot[transformers-dep]"] @@ -135,6 +135,27 @@ aloha = ["gym-aloha>=0.1.1"] pusht = ["gym-pusht>=0.1.5", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead xarm = ["gym-xarm>=0.1.1"] +# All +all = [ + "lerobot[dynamixel]", + "lerobot[gamepad]", + "lerobot[hopejr]", + "lerobot[lekiwi]", + "lerobot[kinematics]", + "lerobot[intelrealsense]", + "lerobot[pi0]", + "lerobot[smolvla]", + "lerobot[hilserl]", + "lerobot[async]", + "lerobot[docs]", + "lerobot[dev]", + "lerobot[test]", + "lerobot[video_benchmark]", + "lerobot[aloha]", + "lerobot[pusht]", + "lerobot[xarm]" +] + # ---------------- Tool Configurations ---------------- [tool.setuptools.packages.find] where = ["src"] From 862a4439ea4ce4671b84b713f28e705d0b6e172b Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Thu, 17 Jul 2025 18:09:16 +0200 Subject: [PATCH 19/20] chore(examples): remove outdated examples (#1526) --- examples/advanced/1_add_image_transforms.py | 67 ----------- .../advanced/2_calculate_validation_loss.py | 104 ------------------ 2 files changed, 171 deletions(-) delete mode 100644 examples/advanced/1_add_image_transforms.py delete mode 100644 examples/advanced/2_calculate_validation_loss.py diff --git a/examples/advanced/1_add_image_transforms.py b/examples/advanced/1_add_image_transforms.py deleted file mode 100644 index 3760feabb..000000000 --- a/examples/advanced/1_add_image_transforms.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This script demonstrates how to use torchvision's image transformation with LeRobotDataset for data -augmentation purposes. The transformations are passed to the dataset as an argument upon creation, and -transforms are applied to the observation images before they are returned in the dataset's __getitem__. -""" - -from pathlib import Path - -from torchvision.transforms import ToPILImage, v2 - -from lerobot.datasets.lerobot_dataset import LeRobotDataset - -dataset_repo_id = "lerobot/aloha_static_screw_driver" - -# Create a LeRobotDataset with no transformations -dataset = LeRobotDataset(dataset_repo_id, episodes=[0]) -# This is equivalent to `dataset = LeRobotDataset(dataset_repo_id, image_transforms=None)` - -# Get the index of the first observation in the first episode -first_idx = dataset.episode_data_index["from"][0].item() - -# Get the frame corresponding to the first camera -frame = dataset[first_idx][dataset.meta.camera_keys[0]] - - -# Define the transformations -transforms = v2.Compose( - [ - v2.ColorJitter(brightness=(0.5, 1.5)), - v2.ColorJitter(contrast=(0.5, 1.5)), - v2.ColorJitter(hue=(-0.1, 0.1)), - v2.RandomAdjustSharpness(sharpness_factor=2, p=1), - ] -) - -# Create another LeRobotDataset with the defined transformations -transformed_dataset = LeRobotDataset(dataset_repo_id, episodes=[0], image_transforms=transforms) - -# Get a frame from the transformed dataset -transformed_frame = transformed_dataset[first_idx][transformed_dataset.meta.camera_keys[0]] - -# Create a directory to store output images -output_dir = Path("outputs/image_transforms") -output_dir.mkdir(parents=True, exist_ok=True) - -# Save the original frame -to_pil = ToPILImage() -to_pil(frame).save(output_dir / "original_frame.png", quality=100) -print(f"Original frame saved to {output_dir / 'original_frame.png'}.") - -# Save the transformed frame -to_pil(transformed_frame).save(output_dir / "transformed_frame.png", quality=100) -print(f"Transformed frame saved to {output_dir / 'transformed_frame.png'}.") diff --git a/examples/advanced/2_calculate_validation_loss.py b/examples/advanced/2_calculate_validation_loss.py deleted file mode 100644 index 9eeb1a2d9..000000000 --- a/examples/advanced/2_calculate_validation_loss.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This script demonstrates how to slice a dataset and calculate the loss on a subset of the data. - -This technique can be useful for debugging and testing purposes, as well as identifying whether a policy -is learning effectively. - -Furthermore, relying on validation loss to evaluate performance is generally not considered a good practice, -especially in the context of imitation learning. The most reliable approach is to evaluate the policy directly -on the target environment, whether that be in simulation or the real world. -""" - -import math - -import torch - -from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata -from lerobot.policies.diffusion.modeling_diffusion import DiffusionPolicy - - -def main(): - device = torch.device("cuda") - - # Download the diffusion policy for pusht environment - pretrained_policy_path = "lerobot/diffusion_pusht" - # OR uncomment the following to evaluate a policy from the local outputs/train folder. - # pretrained_policy_path = Path("outputs/train/example_pusht_diffusion") - - policy = DiffusionPolicy.from_pretrained(pretrained_policy_path) - policy.eval() - policy.to(device) - - # Set up the dataset. - delta_timestamps = { - # Load the previous image and state at -0.1 seconds before current frame, - # then load current image and state corresponding to 0.0 second. - "observation.image": [-0.1, 0.0], - "observation.state": [-0.1, 0.0], - # Load the previous action (-0.1), the next action to be executed (0.0), - # and 14 future actions with a 0.1 seconds spacing. All these actions will be - # used to calculate the loss. - "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4], - } - - # Load the last 10% of episodes of the dataset as a validation set. - # - Load dataset metadata - dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht") - # - Calculate train and val episodes - total_episodes = dataset_metadata.total_episodes - episodes = list(range(dataset_metadata.total_episodes)) - num_train_episodes = math.floor(total_episodes * 90 / 100) - train_episodes = episodes[:num_train_episodes] - val_episodes = episodes[num_train_episodes:] - print(f"Number of episodes in full dataset: {total_episodes}") - print(f"Number of episodes in training dataset (90% subset): {len(train_episodes)}") - print(f"Number of episodes in validation dataset (10% subset): {len(val_episodes)}") - # - Load train and val datasets - train_dataset = LeRobotDataset( - "lerobot/pusht", episodes=train_episodes, delta_timestamps=delta_timestamps - ) - val_dataset = LeRobotDataset("lerobot/pusht", episodes=val_episodes, delta_timestamps=delta_timestamps) - print(f"Number of frames in training dataset (90% subset): {len(train_dataset)}") - print(f"Number of frames in validation dataset (10% subset): {len(val_dataset)}") - - # Create dataloader for evaluation. - val_dataloader = torch.utils.data.DataLoader( - val_dataset, - num_workers=4, - batch_size=64, - shuffle=False, - pin_memory=device != torch.device("cpu"), - drop_last=False, - ) - - # Run validation loop. - loss_cumsum = 0 - n_examples_evaluated = 0 - for batch in val_dataloader: - batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()} - loss, _ = policy.forward(batch) - - loss_cumsum += loss.item() - n_examples_evaluated += batch["index"].shape[0] - - # Calculate the average loss over the validation set. - average_loss = loss_cumsum / n_examples_evaluated - - print(f"Average loss on validation set: {average_loss:.4f}") - - -if __name__ == "__main__": - main() From e6e1f085d4f66f34e9e6bd7c7f00893e9e425f9e Mon Sep 17 00:00:00 2001 From: Xingdong Zuo Date: Fri, 18 Jul 2025 19:18:52 +0900 Subject: [PATCH 20/20] Feat: Add Batched Video Encoding for Faster Dataset Recording (#1390) * LeRobotDataset video encoding: updated `save_episode` method and added `batch_encode_videos` method to handle video encoding based on `batch_encoding_size`, allowing for both immediate and batched encoding. * LeRobotDataset video cleanup: Enabled individual episode cleanup and check for remaining PNG files before removing the `images` directory. * LeRobotDataset - VideoEncodingManager: added proper handling of pending episodes (encoding, cleaning) on exit or recording failures. * LeRobotDatasetMetadata: removed `update_video_info` to only update video info at episode index 0 encoding. * Adjusted the `record` function to utilize the new encoding management logic. * Removed `encode_videos` method from `LeRobotDataset` and `encode_episode_videos` outputs as they are nowhere used. --------- Signed-off-by: Xingdong Zuo Co-authored-by: Xingdong Zuo Co-authored-by: Caroline Pascal --- src/lerobot/datasets/lerobot_dataset.py | 98 ++++++++++++++++++------- src/lerobot/datasets/video_utils.py | 64 ++++++++++++++++ src/lerobot/record.py | 67 +++++++++-------- 3 files changed, 172 insertions(+), 57 deletions(-) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 46feed2bf..72d1a722c 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -260,8 +260,6 @@ class LeRobotDatasetMetadata: self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"} self.info["total_videos"] += len(self.video_keys) - if len(self.video_keys) > 0: - self.update_video_info() write_info(self.info, self.root) @@ -342,6 +340,7 @@ class LeRobotDataset(torch.utils.data.Dataset): force_cache_sync: bool = False, download_videos: bool = True, video_backend: str | None = None, + batch_encoding_size: int = 1, ): """ 2 modes are available for instantiating this class, depending on 2 different use cases: @@ -443,6 +442,8 @@ class LeRobotDataset(torch.utils.data.Dataset): True. video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'. You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision. + batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. + Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. """ super().__init__() self.repo_id = repo_id @@ -454,6 +455,8 @@ class LeRobotDataset(torch.utils.data.Dataset): self.revision = revision if revision else CODEBASE_VERSION self.video_backend = video_backend if video_backend else get_safe_default_codec() self.delta_indices = None + self.batch_encoding_size = batch_encoding_size + self.episodes_since_last_encoding = 0 # Unused attributes self.image_writer = None @@ -811,6 +814,10 @@ class LeRobotDataset(torch.utils.data.Dataset): """ This will save to disk the current episode in self.episode_buffer. + Video encoding is handled automatically based on batch_encoding_size: + - If batch_encoding_size == 1: Videos are encoded immediately after each episode + - If batch_encoding_size > 1: Videos are encoded in batches. + Args: episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to @@ -850,14 +857,28 @@ class LeRobotDataset(torch.utils.data.Dataset): self._save_episode_table(episode_buffer, episode_index) ep_stats = compute_episode_stats(episode_buffer, self.features) - if len(self.meta.video_keys) > 0: - video_paths = self.encode_episode_videos(episode_index) - for key in self.meta.video_keys: - episode_buffer[key] = video_paths[key] + has_video_keys = len(self.meta.video_keys) > 0 + use_batched_encoding = self.batch_encoding_size > 1 - # `meta.save_episode` be executed after encoding the videos + if has_video_keys and not use_batched_encoding: + self.encode_episode_videos(episode_index) + + # `meta.save_episode` should be executed after encoding the videos self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) + # Check if we should trigger batch encoding + if has_video_keys and use_batched_encoding: + self.episodes_since_last_encoding += 1 + if self.episodes_since_last_encoding == self.batch_encoding_size: + start_ep = self.num_episodes - self.batch_encoding_size + end_ep = self.num_episodes + logging.info( + f"Batch encoding {self.batch_encoding_size} videos for episodes {start_ep} to {end_ep - 1}" + ) + self.batch_encode_videos(start_ep, end_ep) + self.episodes_since_last_encoding = 0 + + # Episode data index and timestamp checking ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index]) ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()} check_timestamps_sync( @@ -868,16 +889,13 @@ class LeRobotDataset(torch.utils.data.Dataset): self.tolerance_s, ) - video_files = list(self.root.rglob("*.mp4")) - assert len(video_files) == self.num_episodes * len(self.meta.video_keys) - + # Verify that we have one parquet file per episode and the number of video files matches the number of encoded episodes parquet_files = list(self.root.rglob("*.parquet")) assert len(parquet_files) == self.num_episodes - - # delete images - img_dir = self.root / "images" - if img_dir.is_dir(): - shutil.rmtree(self.root / "images") + video_files = list(self.root.rglob("*.mp4")) + assert len(video_files) == (self.num_episodes - self.episodes_since_last_encoding) * len( + self.meta.video_keys + ) if not episode_data: # Reset the buffer self.episode_buffer = self.create_episode_buffer() @@ -894,6 +912,8 @@ class LeRobotDataset(torch.utils.data.Dataset): def clear_episode_buffer(self) -> None: episode_index = self.episode_buffer["episode_index"] + + # Clean up image files for the current episode buffer if self.image_writer is not None: for cam_key in self.meta.camera_keys: img_dir = self._get_image_file_path( @@ -930,25 +950,22 @@ class LeRobotDataset(torch.utils.data.Dataset): if self.image_writer is not None: self.image_writer.wait_until_done() - def encode_videos(self) -> None: + def encode_episode_videos(self, episode_index: int) -> None: """ Use ffmpeg to convert frames stored as png into mp4 videos. Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding, since video encoding with ffmpeg is already using multithreading. - """ - for ep_idx in range(self.meta.total_episodes): - self.encode_episode_videos(ep_idx) - def encode_episode_videos(self, episode_index: int) -> dict: + This method handles video encoding steps: + - Video encoding via ffmpeg + - Video info updating in metadata + - Raw image cleanup + + Args: + episode_index (int): Index of the episode to encode. """ - Use ffmpeg to convert frames stored as png into mp4 videos. - Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding, - since video encoding with ffmpeg is already using multithreading. - """ - video_paths = {} for key in self.meta.video_keys: video_path = self.root / self.meta.get_video_file_path(episode_index, key) - video_paths[key] = str(video_path) if video_path.is_file(): # Skip if video is already encoded. Could be the case when resuming data recording. continue @@ -956,8 +973,32 @@ class LeRobotDataset(torch.utils.data.Dataset): episode_index=episode_index, image_key=key, frame_index=0 ).parent encode_video_frames(img_dir, video_path, self.fps, overwrite=True) + shutil.rmtree(img_dir) - return video_paths + # Update video info (only needed when first episode is encoded since it reads from episode 0) + if len(self.meta.video_keys) > 0 and episode_index == 0: + self.meta.update_video_info() + write_info(self.meta.info, self.meta.root) # ensure video info always written properly + + def batch_encode_videos(self, start_episode: int = 0, end_episode: int | None = None) -> None: + """ + Batch encode videos for multiple episodes. + + Args: + start_episode: Starting episode index (inclusive) + end_episode: Ending episode index (exclusive). If None, encodes all episodes from start_episode + """ + if end_episode is None: + end_episode = self.meta.total_episodes + + logging.info(f"Starting batch video encoding for episodes {start_episode} to {end_episode - 1}") + + # Encode all episodes with cleanup enabled for individual episodes + for ep_idx in range(start_episode, end_episode): + logging.info(f"Encoding videos for episode {ep_idx}") + self.encode_episode_videos(ep_idx) + + logging.info("Batch video encoding completed") @classmethod def create( @@ -972,6 +1013,7 @@ class LeRobotDataset(torch.utils.data.Dataset): image_writer_processes: int = 0, image_writer_threads: int = 0, video_backend: str | None = None, + batch_encoding_size: int = 1, ) -> "LeRobotDataset": """Create a LeRobot Dataset from scratch in order to record data.""" obj = cls.__new__(cls) @@ -988,6 +1030,8 @@ class LeRobotDataset(torch.utils.data.Dataset): obj.revision = None obj.tolerance_s = tolerance_s obj.image_writer = None + obj.batch_encoding_size = batch_encoding_size + obj.episodes_since_last_encoding = 0 if image_writer_processes or image_writer_threads: obj.start_image_writer(image_writer_processes, image_writer_threads) diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 3a77f36e4..b05edf6bd 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -16,6 +16,7 @@ import glob import importlib import logging +import shutil import warnings from dataclasses import dataclass, field from pathlib import Path @@ -451,3 +452,66 @@ def get_image_pixel_channels(image: Image): return 4 # RGBA else: raise ValueError("Unknown format") + + +class VideoEncodingManager: + """ + Context manager that ensures proper video encoding and data cleanup even if exceptions occur. + + This manager handles: + - Batch encoding for any remaining episodes when recording interrupted + - Cleaning up temporary image files from interrupted episodes + - Removing empty image directories + + Args: + dataset: The LeRobotDataset instance + """ + + def __init__(self, dataset): + self.dataset = dataset + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Handle any remaining episodes that haven't been batch encoded + if self.dataset.episodes_since_last_encoding > 0: + if exc_type is not None: + logging.info("Exception occurred. Encoding remaining episodes before exit...") + else: + logging.info("Recording stopped. Encoding remaining episodes...") + + start_ep = self.dataset.num_episodes - self.dataset.episodes_since_last_encoding + end_ep = self.dataset.num_episodes + logging.info( + f"Encoding remaining {self.dataset.episodes_since_last_encoding} episodes, " + f"from episode {start_ep} to {end_ep - 1}" + ) + self.dataset.batch_encode_videos(start_ep, end_ep) + + # Clean up episode images if recording was interrupted + if exc_type is not None: + interrupted_episode_index = self.dataset.num_episodes + for key in self.dataset.meta.video_keys: + img_dir = self.dataset._get_image_file_path( + episode_index=interrupted_episode_index, image_key=key, frame_index=0 + ).parent + if img_dir.exists(): + logging.debug( + f"Cleaning up interrupted episode images for episode {interrupted_episode_index}, camera {key}" + ) + shutil.rmtree(img_dir) + + # Clean up any remaining images directory if it's empty + img_dir = self.dataset.root / "images" + # Check for any remaining PNG files + png_files = list(img_dir.rglob("*.png")) + if len(png_files) == 0: + # Only remove the images directory if no PNG files remain + if img_dir.exists(): + shutil.rmtree(img_dir) + logging.debug("Cleaned up empty images directory") + else: + logging.debug(f"Images directory is not empty, containing {len(png_files)} PNG files") + + return False # Don't suppress the original exception diff --git a/src/lerobot/record.py b/src/lerobot/record.py index 0b1af192e..d662efcab 100644 --- a/src/lerobot/record.py +++ b/src/lerobot/record.py @@ -73,6 +73,7 @@ from lerobot.configs.policies import PreTrainedConfig from lerobot.datasets.image_writer import safe_stop_image_writer from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.utils import build_dataset_frame, hw_to_dataset_features +from lerobot.datasets.video_utils import VideoEncodingManager from lerobot.policies.factory import make_policy from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.robots import ( # noqa: F401 @@ -145,6 +146,9 @@ class DatasetRecordConfig: # Too many threads might cause unstable teleoperation fps due to main thread being blocked. # Not enough threads might cause low camera fps. num_image_writer_threads_per_camera: int = 4 + # Number of episodes to record before batch encoding videos + # Set to 1 for immediate encoding (default behavior), or higher for batched encoding + video_encoding_batch_size: int = 1 def __post_init__(self): if self.single_task is None: @@ -298,6 +302,7 @@ def record(cfg: RecordConfig) -> LeRobotDataset: dataset = LeRobotDataset( cfg.dataset.repo_id, root=cfg.dataset.root, + batch_encoding_size=cfg.dataset.video_encoding_batch_size, ) if hasattr(robot, "cameras") and len(robot.cameras) > 0: @@ -318,6 +323,7 @@ def record(cfg: RecordConfig) -> LeRobotDataset: use_videos=cfg.dataset.video, image_writer_processes=cfg.dataset.num_image_writer_processes, image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras), + batch_encoding_size=cfg.dataset.video_encoding_batch_size, ) # Load pretrained policy @@ -329,46 +335,47 @@ def record(cfg: RecordConfig) -> LeRobotDataset: listener, events = init_keyboard_listener() - recorded_episodes = 0 - while recorded_episodes < cfg.dataset.num_episodes and not events["stop_recording"]: - log_say(f"Recording episode {dataset.num_episodes}", cfg.play_sounds) - record_loop( - robot=robot, - events=events, - fps=cfg.dataset.fps, - teleop=teleop, - policy=policy, - dataset=dataset, - control_time_s=cfg.dataset.episode_time_s, - single_task=cfg.dataset.single_task, - display_data=cfg.display_data, - ) - - # Execute a few seconds without recording to give time to manually reset the environment - # Skip reset for the last episode to be recorded - if not events["stop_recording"] and ( - (recorded_episodes < cfg.dataset.num_episodes - 1) or events["rerecord_episode"] - ): - log_say("Reset the environment", cfg.play_sounds) + with VideoEncodingManager(dataset): + recorded_episodes = 0 + while recorded_episodes < cfg.dataset.num_episodes and not events["stop_recording"]: + log_say(f"Recording episode {dataset.num_episodes}", cfg.play_sounds) record_loop( robot=robot, events=events, fps=cfg.dataset.fps, teleop=teleop, - control_time_s=cfg.dataset.reset_time_s, + policy=policy, + dataset=dataset, + control_time_s=cfg.dataset.episode_time_s, single_task=cfg.dataset.single_task, display_data=cfg.display_data, ) - if events["rerecord_episode"]: - log_say("Re-record episode", cfg.play_sounds) - events["rerecord_episode"] = False - events["exit_early"] = False - dataset.clear_episode_buffer() - continue + # Execute a few seconds without recording to give time to manually reset the environment + # Skip reset for the last episode to be recorded + if not events["stop_recording"] and ( + (recorded_episodes < cfg.dataset.num_episodes - 1) or events["rerecord_episode"] + ): + log_say("Reset the environment", cfg.play_sounds) + record_loop( + robot=robot, + events=events, + fps=cfg.dataset.fps, + teleop=teleop, + control_time_s=cfg.dataset.reset_time_s, + single_task=cfg.dataset.single_task, + display_data=cfg.display_data, + ) - dataset.save_episode() - recorded_episodes += 1 + if events["rerecord_episode"]: + log_say("Re-record episode", cfg.play_sounds) + events["rerecord_episode"] = False + events["exit_early"] = False + dataset.clear_episode_buffer() + continue + + dataset.save_episode() + recorded_episodes += 1 log_say("Stop recording", cfg.play_sounds, blocking=True)