Compare commits

...

1 Commits

Author SHA1 Message Date
Jade Choghari ccc1a104db add api examples 2025-10-20 18:53:37 +02:00
3 changed files with 113 additions and 2 deletions
+112
View File
@@ -0,0 +1,112 @@
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script demonstrates how to evaluate pretrained vision-language-action (VLA) policies
such as SmolVLA on Libero benchmark tasks using the LeRobot framework.
It showcases the full evaluation pipeline from environment creation to policy inference,
visualization, and result logging and is intended as a reference for benchmarking or
integrating new robotic policies.
Features included in this script:
- loading Libero environments (e.g., libero_spatial, libero_object) via `make_env`.
- initializing pretrained policies (e.g., SmolVLA) from Hugging Face using `make_policy`.
- applying preprocessing and postprocessing transformations for model compatibility.
- running evaluation rollouts and recording rendered frames from the simulator.
- computing success metrics and saving rollout videos as MP4 for qualitative analysis.
The script ends by saving a rollout video (`rollout.mp4`) and printing per-environment
success indicators for quick visual and numerical evaluation.
"""
import numpy as np
import torch
import imageio.v2 as imageio
from lerobot.envs.factory import make_env, make_env_config
from lerobot.policies.factory import make_policy, make_pre_post_processors
from lerobot.policies.factory import make_policy_config
from lerobot.envs.utils import (
add_envs_task,
preprocess_observation,
)
import os
os.environ["MUJOCO_GL"] = "egl"
SMOLVLA_LIBERO_PATH = "HuggingFaceVLA/smolvla_libero"
LIBERO_CONFIG = make_env_config("libero", task="libero_spatial")
breakpoint()
POLICY_CONFIG = make_policy_config("smolvla", pretrained_path=SMOLVLA_LIBERO_PATH)
policy = make_policy(
cfg=POLICY_CONFIG,
env_cfg=LIBERO_CONFIG,
)
breakpoint()
libero_env = make_env(LIBERO_CONFIG)
breakpoint()
print(type(libero_env)) # <class 'dict'>
print(libero_env.keys()) # dict_keys(['libero_spatial', 'libero_object'])
# initilize your policy, here we use smolvla
breakpoint()
policy.eval()
preprocessor, postprocessor = make_pre_post_processors(
policy_cfg=POLICY_CONFIG,
pretrained_path=SMOLVLA_LIBERO_PATH,
# The inference device is automatically set to match the detected hardware, overriding any previous device settings from training to ensure compatibility.
preprocessor_overrides={"device_processor": {"device": str(policy.config.device)}},
)
policy.reset()
# for the sake of this exemple we only use one env from each task
libero_spatial_env = libero_env['libero_spatial'][0]
# libero_object_env = libero_env['libero_object'][0]
# let's first run an evaluation throgut the first task
observation, info = libero_spatial_env.reset() # you can pass seeds
max_steps = 220
step = 0
all_images = []
done = np.array([False] * libero_spatial_env.num_envs)
while not np.all(done) and step < max_steps:
observation = preprocess_observation(observation)
observation = add_envs_task(libero_spatial_env, observation)
observation = preprocessor(observation)
with torch.inference_mode():
action = policy.select_action(observation)
action = postprocessor(action)
# Convert to CPU / numpy.
action_numpy = action.to("cpu").numpy()
# Apply the next action.
# let's render the video
image = libero_spatial_env.call("render")[0]
all_images.append(image)
observation, reward, terminated, truncated, info = libero_spatial_env.step(action_numpy)
if "final_info" in info:
final_info = info["final_info"]
if not isinstance(final_info, dict):
raise RuntimeError(
"Unsupported `final_info` format: expected dict (Gymnasium >= 1.0). "
"You're likely using an older version of gymnasium (< 1.0). Please upgrade."
)
successes = final_info["is_success"].tolist()
else:
successes = [False] * libero_spatial_env.num_envs
done = terminated | truncated | done
if step + 1 == max_steps:
done = np.ones_like(done, dtype=bool)
step += 1
print("The success: ", successes)
+1 -1
View File
@@ -365,7 +365,7 @@ def make_policy(
if not cfg.input_features:
cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
kwargs["config"] = cfg
breakpoint()
if cfg.pretrained_path:
# Load a pretrained policy and override the config if needed (for example, if there are inference-time
# hyperparameters that we want to vary).
-1
View File
@@ -502,7 +502,6 @@ def eval_main(cfg: EvalPipelineConfig):
cfg=cfg.policy,
env_cfg=cfg.env,
)
policy.eval()
preprocessor, postprocessor = make_pre_post_processors(
policy_cfg=cfg.policy,