add api examples

2026-05-11 14:49:43 +00:00 · 2025-10-20 18:53:37 +02:00
3 changed files with 113 additions and 2 deletions
@@ -0,0 +1,112 @@
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script demonstrates how to evaluate pretrained vision-language-action (VLA) policies
+such as SmolVLA on Libero benchmark tasks using the LeRobot framework.
+
+It showcases the full evaluation pipeline — from environment creation to policy inference,
+visualization, and result logging — and is intended as a reference for benchmarking or
+integrating new robotic policies.
+
+Features included in this script:
+- loading Libero environments (e.g., libero_spatial, libero_object) via `make_env`.
+- initializing pretrained policies (e.g., SmolVLA) from Hugging Face using `make_policy`.
+- applying preprocessing and postprocessing transformations for model compatibility.
+- running evaluation rollouts and recording rendered frames from the simulator.
+- computing success metrics and saving rollout videos as MP4 for qualitative analysis.
+
+The script ends by saving a rollout video (`rollout.mp4`) and printing per-environment
+success indicators for quick visual and numerical evaluation.
+"""
+
+import numpy as np
+import torch
+import imageio.v2 as imageio
+from lerobot.envs.factory import make_env, make_env_config
+from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies.factory import make_policy_config
+from lerobot.envs.utils import (
+    add_envs_task,
+    preprocess_observation,
+)
+import os 
+os.environ["MUJOCO_GL"] = "egl"
+
+SMOLVLA_LIBERO_PATH = "HuggingFaceVLA/smolvla_libero"
+LIBERO_CONFIG = make_env_config("libero", task="libero_spatial")
+breakpoint()
+POLICY_CONFIG = make_policy_config("smolvla", pretrained_path=SMOLVLA_LIBERO_PATH)
+policy = make_policy(
+        cfg=POLICY_CONFIG,
+        env_cfg=LIBERO_CONFIG,
+)
+breakpoint()
+libero_env = make_env(LIBERO_CONFIG)
+breakpoint()
+print(type(libero_env)) # <class 'dict'>
+print(libero_env.keys()) # dict_keys(['libero_spatial', 'libero_object'])
+
+# initilize your policy, here we use smolvla
+breakpoint()
+policy.eval()
+preprocessor, postprocessor = make_pre_post_processors(
+        policy_cfg=POLICY_CONFIG,
+        pretrained_path=SMOLVLA_LIBERO_PATH,
+        # The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility.
+        preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
+    )
+policy.reset()
+# for the sake of this exemple we only use one env from each task
+libero_spatial_env = libero_env['libero_spatial'][0]
+# libero_object_env = libero_env['libero_object'][0]
+
+# let's first run an evaluation throgut the first task
+observation, info = libero_spatial_env.reset() # you can pass seeds
+max_steps = 220
+step = 0
+all_images = []
+done = np.array([False] * libero_spatial_env.num_envs)
+while not np.all(done) and step < max_steps:
+    observation = preprocess_observation(observation)
+    observation = add_envs_task(libero_spatial_env, observation)
+    observation = preprocessor(observation)
+    with torch.inference_mode():
+            action = policy.select_action(observation)
+    action = postprocessor(action)
+    # Convert to CPU / numpy.
+    action_numpy = action.to("cpu").numpy() 
+    # Apply the next action.
+    # let's render the video
+    image = libero_spatial_env.call("render")[0]
+    all_images.append(image)
+    observation, reward, terminated, truncated, info = libero_spatial_env.step(action_numpy)
+    if "final_info" in info:
+            final_info = info["final_info"]
+            if not isinstance(final_info, dict):
+                raise RuntimeError(
+                    "Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). "
+                    "You're likely using an older version of gymnasium (< 1.0). Please upgrade."
+                )
+            successes = final_info["is_success"].tolist()
+    else:
+        successes = [False] * libero_spatial_env.num_envs
+    
+    done = terminated | truncated | done
+    if step + 1 == max_steps:
+        done = np.ones_like(done, dtype=bool)
+    step += 1
+
+print("The success: ", successes)
+
@@ -365,7 +365,7 @@ def make_policy(
    if not cfg.input_features:
        cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
    kwargs["config"] = cfg
-
+    breakpoint()
    if cfg.pretrained_path:
        # Load a pretrained policy and override the config if needed (for example, if there are inference-time
        # hyperparameters that we want to vary).
@@ -502,7 +502,6 @@ def eval_main(cfg: EvalPipelineConfig):
        cfg=cfg.policy,
        env_cfg=cfg.env,
    )
-
    policy.eval()
    preprocessor, postprocessor = make_pre_post_processors(
        policy_cfg=cfg.policy,