From e5efb6b6dc7fb8c1d5bbd894a6cbb8d7b9c3722e Mon Sep 17 00:00:00 2001 From: "jade.choghari@huggingface.co" <“chogharijade@gmail.com”> Date: Tue, 18 Nov 2025 13:55:45 +0100 Subject: [PATCH] working changes --- eval.sh | 6 ++ src/lerobot/envs/configs.py | 9 ++- src/lerobot/envs/libero.py | 29 ++++++-- src/lerobot/envs/utils.py | 20 +++-- .../processor/observation_processor.py | 74 +++++++++++++++++++ src/lerobot/scripts/lerobot_eval.py | 1 + test2.py | 13 ++++ test3.py | 20 +++++ 8 files changed, 159 insertions(+), 13 deletions(-) create mode 100644 eval.sh create mode 100644 test3.py diff --git a/eval.sh b/eval.sh new file mode 100644 index 000000000..151a292d0 --- /dev/null +++ b/eval.sh @@ -0,0 +1,6 @@ +lerobot-eval \ + --policy.path="HuggingFaceVLA/smolvla_libero" \ + --env.type=libero \ + --env.task=libero_spatial \ + --eval.batch_size=1 \ + --eval.n_episodes=1 \ No newline at end of file diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 029da95ac..a8e69330b 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -246,7 +246,14 @@ class LiberoEnv(EnvConfig): features_map: dict[str, str] = field( default_factory=lambda: { ACTION: ACTION, - "agent_pos": OBS_STATE, + "robot_state/eef/pos": f"{OBS_STATE}.eef_pos", + "robot_state/eef/quat": f"{OBS_STATE}.eef_quat", + "robot_state/eef/mat": f"{OBS_STATE}.eef_mat", + "robot_state/eef/axisangle": f"{OBS_STATE}.eef_axisangle", + "robot_state/gripper/qpos": f"{OBS_STATE}.gripper_qpos", + "robot_state/gripper/qvel": f"{OBS_STATE}.gripper_qvel", + "robot_state/joints/pos": f"{OBS_STATE}.joint_pos", + "robot_state/joints/vel": f"{OBS_STATE}.joint_vel", "pixels/agentview_image": f"{OBS_IMAGES}.image", "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2", } diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py index 5e692501a..a44f18881 100644 --- a/src/lerobot/envs/libero.py +++ b/src/lerobot/envs/libero.py @@ -175,15 +175,34 @@ class LiberoEnv(gym.Env): self.observation_space = spaces.Dict( { "pixels": spaces.Dict(images), - "agent_pos": spaces.Box( - low=AGENT_POS_LOW, - high=AGENT_POS_HIGH, - shape=(OBS_STATE_DIM,), - dtype=np.float64, + "robot_state": spaces.Dict( + { + "eef": spaces.Dict( + { + "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64), + "quat": spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64), + "mat": spaces.Box(low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64), + "axisangle": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64), + } + ), + "gripper": spaces.Dict( + { + "qpos": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64), + "qvel": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64), + } + ), + "joints": spaces.Dict( + { + "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64), + "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64), + } + ), + } ), } ) + self.action_space = spaces.Box( low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32 ) diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py index 1c7f15427..e9eb17611 100644 --- a/src/lerobot/envs/utils.py +++ b/src/lerobot/envs/utils.py @@ -29,7 +29,7 @@ from torch import Tensor from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.envs.configs import EnvConfig -from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE +from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR from lerobot.utils.utils import get_channel_first_image_shape @@ -78,12 +78,18 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten return_observations[OBS_ENV_STATE] = env_state - # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing - agent_pos = torch.from_numpy(observations["agent_pos"]).float() - if agent_pos.dim() == 1: - agent_pos = agent_pos.unsqueeze(0) - return_observations[OBS_STATE] = agent_pos - + if "agent_pos" in observations: + agent_pos = torch.from_numpy(observations["agent_pos"]).float() + if agent_pos.dim() == 1: + agent_pos = agent_pos.unsqueeze(0) + return_observations[OBS_STATE] = agent_pos + + if "robot_state" in observations: + # simply copy nested dict as-is + return_observations[f"{OBS_STR}.robot_state"] = { + k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v + for k, v in observations["robot_state"].items() + } return return_observations diff --git a/src/lerobot/processor/observation_processor.py b/src/lerobot/processor/observation_processor.py index d22d8fb96..cf314b781 100644 --- a/src/lerobot/processor/observation_processor.py +++ b/src/lerobot/processor/observation_processor.py @@ -25,6 +25,11 @@ from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_ST from .pipeline import ObservationProcessorStep, ProcessorStepRegistry +try: + from robosuite.utils.transform_utils import quat2axisangle +except ImportError: + quat2axisangle = None + @dataclass @ProcessorStepRegistry.register(name="observation_processor") @@ -204,3 +209,72 @@ class VanillaObservationProcessorStep(ObservationProcessorStep): new_features[src_ft][key] = feat return new_features + + +@dataclass +@ProcessorStepRegistry.register(name="libero_processor") +class LiberoProcessorStep(ObservationProcessorStep): + """ + Processes LIBERO observations into the LeRobot format. + + This step handles the specific observation structure from LIBERO environments, + which includes nested robot_state dictionaries and image observations. + + **State Processing:** + - Processes the `robot_state` dictionary which contains nested end-effector, + gripper, and joint information. + - Extracts and concatenates: + - End-effector position (3D) + - End-effector quaternion converted to axis-angle (3D) + - Gripper joint positions (2D) + - Maps the concatenated state to `"observation.state"`. + """ + + def _process_observation(self, observation): + """ + Processes both image and robot_state observations from LIBERO. + """ + if quat2axisangle is None: + raise ImportError( + "robosuite is required for LiberoProcessorStep. " + "Install it with: pip install robosuite" + ) + + processed_obs = observation.copy() + + # Process robot_state into a flat state vector + if "observation.robot_state" in processed_obs: + robot_state = processed_obs.pop("observation.robot_state") + + # Extract components + eef_pos = robot_state["eef"]["pos"] # (3,) + eef_quat = robot_state["eef"]["quat"] # (4,) + gripper_qpos = robot_state["gripper"]["qpos"] # (2,) + + # Convert quaternion to axis-angle + eef_axisangle = quat2axisangle(eef_quat.squeeze(0)) # (3,) + eef_axisangle = eef_axisangle[np.newaxis, :] # (1, 3) + + # Concatenate into a single state vector + state = np.concatenate((eef_pos, eef_axisangle, gripper_qpos), axis=1) + + # Convert to tensor + state_tensor = torch.from_numpy(state).float() + if state_tensor.dim() == 1: + state_tensor = state_tensor.unsqueeze(0) + + processed_obs[OBS_STATE] = state_tensor + return processed_obs + + def transform_features( + self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] + ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + """ + Transforms feature keys from the LIBERO format to the LeRobot standard. + """ + new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {ft: {} for ft in features} + return new_features + + def observation(self, observation): + return self._process_observation(observation) + diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py index 0d66fa1aa..76e6b1242 100644 --- a/src/lerobot/scripts/lerobot_eval.py +++ b/src/lerobot/scripts/lerobot_eval.py @@ -165,6 +165,7 @@ def rollout( # Infer "task" from attributes of environments. # TODO: works with SyncVectorEnv but not AsyncVectorEnv observation = add_envs_task(env, observation) + breakpoint() observation = preprocessor(observation) with torch.inference_mode(): action = policy.select_action(observation) diff --git a/test2.py b/test2.py index 9b2cf68a3..9684333e0 100644 --- a/test2.py +++ b/test2.py @@ -1,4 +1,7 @@ from lerobot.envs.factory import make_env, make_env_config +from lerobot.envs.utils import add_envs_task, preprocess_observation +from lerobot.processor.pipeline import PolicyProcessorPipeline +from lerobot.processor.observation_processor import LiberoProcessorStep config = make_env_config("libero", task="libero_spatial") envs_dict = make_env(config) env = envs_dict["libero_spatial"][0] @@ -7,3 +10,13 @@ seed = 42 # First rollout obs1, info1 = env.reset(seed=seed) + +observation = preprocess_observation(obs1) +observation = add_envs_task(env, observation) + +libero_preprocessor = PolicyProcessorPipeline( + steps=[ + LiberoProcessorStep(), + ] +) +observation = libero_preprocessor(observation) diff --git a/test3.py b/test3.py new file mode 100644 index 000000000..83b28ee2a --- /dev/null +++ b/test3.py @@ -0,0 +1,20 @@ +from lerobot.processor.observation_processor import VanillaObservationProcessorStep +from lerobot.processor.converters import create_transition +from lerobot.processor import TransitionKey +from lerobot.utils.constants import OBS_IMAGE +import numpy as np + +processor = VanillaObservationProcessorStep() + +# Create a mock image (H, W, C) format, uint8 +image = np.random.randint(0, 256, size=(64, 64, 3), dtype=np.uint8) + +observation = {"pixels": image} +transition = create_transition(observation=observation) +breakpoint() +result = processor(transition) +processed_obs = result[TransitionKey.OBSERVATION] + +# Check that the image was processed correctly +assert OBS_IMAGE in processed_obs +processed_img = processed_obs[OBS_IMAGE]