From e5efb6b6dc7fb8c1d5bbd894a6cbb8d7b9c3722e Mon Sep 17 00:00:00 2001
From: "jade.choghari@huggingface.co" <“chogharijade@gmail.com”>
Date: Tue, 18 Nov 2025 13:55:45 +0100
Subject: [PATCH] working changes

---
 eval.sh                                       |  6 ++
 src/lerobot/envs/configs.py                   |  9 ++-
 src/lerobot/envs/libero.py                    | 29 ++++++--
 src/lerobot/envs/utils.py                     | 20 +++--
 .../processor/observation_processor.py        | 74 +++++++++++++++++++
 src/lerobot/scripts/lerobot_eval.py           |  1 +
 test2.py                                      | 13 ++++
 test3.py                                      | 20 +++++
 8 files changed, 159 insertions(+), 13 deletions(-)
 create mode 100644 eval.sh
 create mode 100644 test3.py

diff --git a/eval.sh b/eval.sh
new file mode 100644
index 000000000..151a292d0
--- /dev/null
+++ b/eval.sh
@@ -0,0 +1,6 @@
+lerobot-eval \
+  --policy.path="HuggingFaceVLA/smolvla_libero" \
+  --env.type=libero \
+  --env.task=libero_spatial \
+  --eval.batch_size=1 \
+  --eval.n_episodes=1
\ No newline at end of file
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index 029da95ac..a8e69330b 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -246,7 +246,14 @@ class LiberoEnv(EnvConfig):
     features_map: dict[str, str] = field(
         default_factory=lambda: {
             ACTION: ACTION,
-            "agent_pos": OBS_STATE,
+            "robot_state/eef/pos": f"{OBS_STATE}.eef_pos",
+            "robot_state/eef/quat": f"{OBS_STATE}.eef_quat",
+            "robot_state/eef/mat": f"{OBS_STATE}.eef_mat",
+            "robot_state/eef/axisangle": f"{OBS_STATE}.eef_axisangle",
+            "robot_state/gripper/qpos": f"{OBS_STATE}.gripper_qpos",
+            "robot_state/gripper/qvel": f"{OBS_STATE}.gripper_qvel",
+            "robot_state/joints/pos": f"{OBS_STATE}.joint_pos",
+            "robot_state/joints/vel": f"{OBS_STATE}.joint_vel",
             "pixels/agentview_image": f"{OBS_IMAGES}.image",
             "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
         }
diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py
index 5e692501a..a44f18881 100644
--- a/src/lerobot/envs/libero.py
+++ b/src/lerobot/envs/libero.py
@@ -175,15 +175,34 @@ class LiberoEnv(gym.Env):
             self.observation_space = spaces.Dict(
                 {
                     "pixels": spaces.Dict(images),
-                    "agent_pos": spaces.Box(
-                        low=AGENT_POS_LOW,
-                        high=AGENT_POS_HIGH,
-                        shape=(OBS_STATE_DIM,),
-                        dtype=np.float64,
+                    "robot_state": spaces.Dict(
+                        {
+                            "eef": spaces.Dict(
+                                {
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
+                                    "quat": spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64),
+                                    "mat": spaces.Box(low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64),
+                                    "axisangle": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
+                                }
+                            ),
+                            "gripper": spaces.Dict(
+                                {
+                                    "qpos": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
+                                    "qvel": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
+                                }
+                            ),
+                            "joints": spaces.Dict(
+                                {
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
+                                    "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
+                                }
+                            ),
+                        }
                     ),
                 }
             )
 
+
         self.action_space = spaces.Box(
             low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
         )
diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py
index 1c7f15427..e9eb17611 100644
--- a/src/lerobot/envs/utils.py
+++ b/src/lerobot/envs/utils.py
@@ -29,7 +29,7 @@ from torch import Tensor
 
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
-from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR
 from lerobot.utils.utils import get_channel_first_image_shape
 
 
@@ -78,12 +78,18 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
 
         return_observations[OBS_ENV_STATE] = env_state
 
-    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
-    agent_pos = torch.from_numpy(observations["agent_pos"]).float()
-    if agent_pos.dim() == 1:
-        agent_pos = agent_pos.unsqueeze(0)
-    return_observations[OBS_STATE] = agent_pos
-
+    if "agent_pos" in observations:
+        agent_pos = torch.from_numpy(observations["agent_pos"]).float()
+        if agent_pos.dim() == 1:
+            agent_pos = agent_pos.unsqueeze(0)
+        return_observations[OBS_STATE] = agent_pos
+    
+    if "robot_state" in observations:
+        # simply copy nested dict as-is
+        return_observations[f"{OBS_STR}.robot_state"] = {
+            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
+            for k, v in observations["robot_state"].items()
+        }
     return return_observations
 
 
diff --git a/src/lerobot/processor/observation_processor.py b/src/lerobot/processor/observation_processor.py
index d22d8fb96..cf314b781 100644
--- a/src/lerobot/processor/observation_processor.py
+++ b/src/lerobot/processor/observation_processor.py
@@ -25,6 +25,11 @@ from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_ST
 
 from .pipeline import ObservationProcessorStep, ProcessorStepRegistry
 
+try:
+    from robosuite.utils.transform_utils import quat2axisangle
+except ImportError:
+    quat2axisangle = None
+
 
 @dataclass
 @ProcessorStepRegistry.register(name="observation_processor")
@@ -204,3 +209,72 @@ class VanillaObservationProcessorStep(ObservationProcessorStep):
                 new_features[src_ft][key] = feat
 
         return new_features
+
+
+@dataclass
+@ProcessorStepRegistry.register(name="libero_processor")
+class LiberoProcessorStep(ObservationProcessorStep):
+    """
+    Processes LIBERO observations into the LeRobot format.
+
+    This step handles the specific observation structure from LIBERO environments,
+    which includes nested robot_state dictionaries and image observations.
+
+    **State Processing:**
+    -   Processes the `robot_state` dictionary which contains nested end-effector,
+        gripper, and joint information.
+    -   Extracts and concatenates:
+        - End-effector position (3D)
+        - End-effector quaternion converted to axis-angle (3D)
+        - Gripper joint positions (2D)
+    -   Maps the concatenated state to `"observation.state"`.
+    """
+
+    def _process_observation(self, observation):
+        """
+        Processes both image and robot_state observations from LIBERO.
+        """
+        if quat2axisangle is None:
+            raise ImportError(
+                "robosuite is required for LiberoProcessorStep. "
+                "Install it with: pip install robosuite"
+            )
+
+        processed_obs = observation.copy()
+
+        # Process robot_state into a flat state vector
+        if "observation.robot_state" in processed_obs:
+            robot_state = processed_obs.pop("observation.robot_state")
+
+            # Extract components
+            eef_pos = robot_state["eef"]["pos"]  # (3,)
+            eef_quat = robot_state["eef"]["quat"]  # (4,)
+            gripper_qpos = robot_state["gripper"]["qpos"]  # (2,)
+
+            # Convert quaternion to axis-angle
+            eef_axisangle = quat2axisangle(eef_quat.squeeze(0))  # (3,)
+            eef_axisangle = eef_axisangle[np.newaxis, :]         # (1, 3)
+
+            # Concatenate into a single state vector
+            state = np.concatenate((eef_pos, eef_axisangle, gripper_qpos), axis=1)
+
+            # Convert to tensor
+            state_tensor = torch.from_numpy(state).float()
+            if state_tensor.dim() == 1:
+                state_tensor = state_tensor.unsqueeze(0)
+
+            processed_obs[OBS_STATE] = state_tensor
+        return processed_obs
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        """
+        Transforms feature keys from the LIBERO format to the LeRobot standard.
+        """
+        new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {ft: {} for ft in features}
+        return new_features
+    
+    def observation(self, observation):
+        return self._process_observation(observation)
+
diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py
index 0d66fa1aa..76e6b1242 100644
--- a/src/lerobot/scripts/lerobot_eval.py
+++ b/src/lerobot/scripts/lerobot_eval.py
@@ -165,6 +165,7 @@ def rollout(
         # Infer "task" from attributes of environments.
         # TODO: works with SyncVectorEnv but not AsyncVectorEnv
         observation = add_envs_task(env, observation)
+        breakpoint()
         observation = preprocessor(observation)
         with torch.inference_mode():
             action = policy.select_action(observation)
diff --git a/test2.py b/test2.py
index 9b2cf68a3..9684333e0 100644
--- a/test2.py
+++ b/test2.py
@@ -1,4 +1,7 @@
 from lerobot.envs.factory import make_env, make_env_config
+from lerobot.envs.utils import add_envs_task, preprocess_observation
+from lerobot.processor.pipeline import PolicyProcessorPipeline
+from lerobot.processor.observation_processor import LiberoProcessorStep
 config = make_env_config("libero", task="libero_spatial")
 envs_dict = make_env(config)
 env = envs_dict["libero_spatial"][0]
@@ -7,3 +10,13 @@ seed = 42
 
 # First rollout
 obs1, info1 = env.reset(seed=seed)
+
+observation = preprocess_observation(obs1)
+observation = add_envs_task(env, observation)
+
+libero_preprocessor = PolicyProcessorPipeline(
+    steps=[
+        LiberoProcessorStep(),
+    ]
+)
+observation = libero_preprocessor(observation)
diff --git a/test3.py b/test3.py
new file mode 100644
index 000000000..83b28ee2a
--- /dev/null
+++ b/test3.py
@@ -0,0 +1,20 @@
+from lerobot.processor.observation_processor import VanillaObservationProcessorStep
+from lerobot.processor.converters import create_transition
+from lerobot.processor import TransitionKey
+from lerobot.utils.constants import OBS_IMAGE
+import numpy as np
+
+processor = VanillaObservationProcessorStep()
+
+# Create a mock image (H, W, C) format, uint8
+image = np.random.randint(0, 256, size=(64, 64, 3), dtype=np.uint8)
+
+observation = {"pixels": image}
+transition = create_transition(observation=observation)
+breakpoint()
+result = processor(transition)
+processed_obs = result[TransitionKey.OBSERVATION]
+
+# Check that the image was processed correctly
+assert OBS_IMAGE in processed_obs
+processed_img = processed_obs[OBS_IMAGE]