working changes

2026-07-24 18:26:11 +00:00 · 2025-11-18 13:55:45 +01:00
parent 4c67330430
commit e5efb6b6dc
8 changed files with 159 additions and 13 deletions
@@ -0,0 +1,6 @@
+lerobot-eval \
+  --policy.path="HuggingFaceVLA/smolvla_libero" \
+  --env.type=libero \
+  --env.task=libero_spatial \
+  --eval.batch_size=1 \
+  --eval.n_episodes=1
@@ -246,7 +246,14 @@ class LiberoEnv(EnvConfig):
    features_map: dict[str, str] = field(
        default_factory=lambda: {
            ACTION: ACTION,
-            "agent_pos": OBS_STATE,
+            "robot_state/eef/pos": f"{OBS_STATE}.eef_pos",
+            "robot_state/eef/quat": f"{OBS_STATE}.eef_quat",
+            "robot_state/eef/mat": f"{OBS_STATE}.eef_mat",
+            "robot_state/eef/axisangle": f"{OBS_STATE}.eef_axisangle",
+            "robot_state/gripper/qpos": f"{OBS_STATE}.gripper_qpos",
+            "robot_state/gripper/qvel": f"{OBS_STATE}.gripper_qvel",
+            "robot_state/joints/pos": f"{OBS_STATE}.joint_pos",
+            "robot_state/joints/vel": f"{OBS_STATE}.joint_vel",
            "pixels/agentview_image": f"{OBS_IMAGES}.image",
            "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
        }
@@ -175,15 +175,34 @@ class LiberoEnv(gym.Env):
            self.observation_space = spaces.Dict(
                {
                    "pixels": spaces.Dict(images),
-                    "agent_pos": spaces.Box(
-                        low=AGENT_POS_LOW,
-                        high=AGENT_POS_HIGH,
-                        shape=(OBS_STATE_DIM,),
-                        dtype=np.float64,
+                    "robot_state": spaces.Dict(
+                        {
+                            "eef": spaces.Dict(
+                                {
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
+                                    "quat": spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64),
+                                    "mat": spaces.Box(low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64),
+                                    "axisangle": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
+                                }
+                            ),
+                            "gripper": spaces.Dict(
+                                {
+                                    "qpos": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
+                                    "qvel": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
+                                }
+                            ),
+                            "joints": spaces.Dict(
+                                {
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
+                                    "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
+                                }
+                            ),
+                        }
                    ),
                }
            )

+
        self.action_space = spaces.Box(
            low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
        )
@@ -29,7 +29,7 @@ from torch import Tensor

 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
-from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR
 from lerobot.utils.utils import get_channel_first_image_shape


@@ -78,12 +78,18 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten

        return_observations[OBS_ENV_STATE] = env_state

-    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
-    agent_pos = torch.from_numpy(observations["agent_pos"]).float()
-    if agent_pos.dim() == 1:
-        agent_pos = agent_pos.unsqueeze(0)
-    return_observations[OBS_STATE] = agent_pos
+    if "agent_pos" in observations:
+        agent_pos = torch.from_numpy(observations["agent_pos"]).float()
+        if agent_pos.dim() == 1:
+            agent_pos = agent_pos.unsqueeze(0)
+        return_observations[OBS_STATE] = agent_pos
    
+    if "robot_state" in observations:
+        # simply copy nested dict as-is
+        return_observations[f"{OBS_STR}.robot_state"] = {
+            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
+            for k, v in observations["robot_state"].items()
+        }
    return return_observations


@@ -25,6 +25,11 @@ from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_ST

 from .pipeline import ObservationProcessorStep, ProcessorStepRegistry

+try:
+    from robosuite.utils.transform_utils import quat2axisangle
+except ImportError:
+    quat2axisangle = None
+

@dataclass
@ProcessorStepRegistry.register(name="observation_processor")
@@ -204,3 +209,72 @@ class VanillaObservationProcessorStep(ObservationProcessorStep):
                new_features[src_ft][key] = feat

        return new_features
+
+
+@dataclass
+@ProcessorStepRegistry.register(name="libero_processor")
+class LiberoProcessorStep(ObservationProcessorStep):
+    """
+    Processes LIBERO observations into the LeRobot format.
+
+    This step handles the specific observation structure from LIBERO environments,
+    which includes nested robot_state dictionaries and image observations.
+
+    **State Processing:**
+    -   Processes the `robot_state` dictionary which contains nested end-effector,
+        gripper, and joint information.
+    -   Extracts and concatenates:
+        - End-effector position (3D)
+        - End-effector quaternion converted to axis-angle (3D)
+        - Gripper joint positions (2D)
+    -   Maps the concatenated state to `"observation.state"`.
+    """
+
+    def _process_observation(self, observation):
+        """
+        Processes both image and robot_state observations from LIBERO.
+        """
+        if quat2axisangle is None:
+            raise ImportError(
+                "robosuite is required for LiberoProcessorStep. "
+                "Install it with: pip install robosuite"
+            )
+
+        processed_obs = observation.copy()
+
+        # Process robot_state into a flat state vector
+        if "observation.robot_state" in processed_obs:
+            robot_state = processed_obs.pop("observation.robot_state")
+
+            # Extract components
+            eef_pos = robot_state["eef"]["pos"]  # (3,)
+            eef_quat = robot_state["eef"]["quat"]  # (4,)
+            gripper_qpos = robot_state["gripper"]["qpos"]  # (2,)
+
+            # Convert quaternion to axis-angle
+            eef_axisangle = quat2axisangle(eef_quat.squeeze(0))  # (3,)
+            eef_axisangle = eef_axisangle[np.newaxis, :]         # (1, 3)
+
+            # Concatenate into a single state vector
+            state = np.concatenate((eef_pos, eef_axisangle, gripper_qpos), axis=1)
+
+            # Convert to tensor
+            state_tensor = torch.from_numpy(state).float()
+            if state_tensor.dim() == 1:
+                state_tensor = state_tensor.unsqueeze(0)
+
+            processed_obs[OBS_STATE] = state_tensor
+        return processed_obs
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        """
+        Transforms feature keys from the LIBERO format to the LeRobot standard.
+        """
+        new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {ft: {} for ft in features}
+        return new_features
+    
+    def observation(self, observation):
+        return self._process_observation(observation)
+
@@ -165,6 +165,7 @@ def rollout(
        # Infer "task" from attributes of environments.
        # TODO: works with SyncVectorEnv but not AsyncVectorEnv
        observation = add_envs_task(env, observation)
+        breakpoint()
        observation = preprocessor(observation)
        with torch.inference_mode():
            action = policy.select_action(observation)
@@ -1,4 +1,7 @@
 from lerobot.envs.factory import make_env, make_env_config
+from lerobot.envs.utils import add_envs_task, preprocess_observation
+from lerobot.processor.pipeline import PolicyProcessorPipeline
+from lerobot.processor.observation_processor import LiberoProcessorStep
 config = make_env_config("libero", task="libero_spatial")
 envs_dict = make_env(config)
 env = envs_dict["libero_spatial"][0]
@@ -7,3 +10,13 @@ seed = 42

 # First rollout
 obs1, info1 = env.reset(seed=seed)
+
+observation = preprocess_observation(obs1)
+observation = add_envs_task(env, observation)
+
+libero_preprocessor = PolicyProcessorPipeline(
+    steps=[
+        LiberoProcessorStep(),
+    ]
+)
+observation = libero_preprocessor(observation)
@@ -0,0 +1,20 @@
+from lerobot.processor.observation_processor import VanillaObservationProcessorStep
+from lerobot.processor.converters import create_transition
+from lerobot.processor import TransitionKey
+from lerobot.utils.constants import OBS_IMAGE
+import numpy as np
+
+processor = VanillaObservationProcessorStep()
+
+# Create a mock image (H, W, C) format, uint8
+image = np.random.randint(0, 256, size=(64, 64, 3), dtype=np.uint8)
+
+observation = {"pixels": image}
+transition = create_transition(observation=observation)
+breakpoint()
+result = processor(transition)
+processed_obs = result[TransitionKey.OBSERVATION]
+
+# Check that the image was processed correctly
+assert OBS_IMAGE in processed_obs
+processed_img = processed_obs[OBS_IMAGE]