working changes

2026-07-23 09:46:00 +00:00 · 2025-11-18 13:55:45 +01:00
parent 4c67330430
commit e5efb6b6dc
8 changed files with 159 additions and 13 deletions
@@ -0,0 +1,6 @@
 lerobot-eval \
  --policy.path="HuggingFaceVLA/smolvla_libero" \
  --env.type=libero \
  --env.task=libero_spatial \
  --eval.batch_size=1 \
  --eval.n_episodes=1
@@ -246,7 +246,14 @@ class LiberoEnv(EnvConfig):
    features_map: dict[str, str] = field(
        default_factory=lambda: {
            ACTION: ACTION,
-            "agent_pos": OBS_STATE,
+            "robot_state/eef/pos": f"{OBS_STATE}.eef_pos",
            "robot_state/eef/quat": f"{OBS_STATE}.eef_quat",
            "robot_state/eef/mat": f"{OBS_STATE}.eef_mat",
            "robot_state/eef/axisangle": f"{OBS_STATE}.eef_axisangle",
            "robot_state/gripper/qpos": f"{OBS_STATE}.gripper_qpos",
            "robot_state/gripper/qvel": f"{OBS_STATE}.gripper_qvel",
            "robot_state/joints/pos": f"{OBS_STATE}.joint_pos",
            "robot_state/joints/vel": f"{OBS_STATE}.joint_vel",
            "pixels/agentview_image": f"{OBS_IMAGES}.image",
            "pixels/robot0_eye_in_hand_image": f"{OBS_IMAGES}.image2",
        }
@@ -175,15 +175,34 @@ class LiberoEnv(gym.Env):
            self.observation_space = spaces.Dict(
                {
                    "pixels": spaces.Dict(images),
-                    "agent_pos": spaces.Box(
+                    "robot_state": spaces.Dict(
-                        low=AGENT_POS_LOW,
+                        {
-                        high=AGENT_POS_HIGH,
+                            "eef": spaces.Dict(
-                        shape=(OBS_STATE_DIM,),
+                                {
-                        dtype=np.float64,
+                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
                                    "quat": spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64),
                                    "mat": spaces.Box(low=-np.inf, high=np.inf, shape=(3, 3), dtype=np.float64),
                                    "axisangle": spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64),
                                }
                            ),
                            "gripper": spaces.Dict(
                                {
                                    "qpos": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
                                    "qvel": spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64),
                                }
                            ),
                            "joints": spaces.Dict(
                                {
                                    "pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
                                    "vel": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
                                }
                            ),
                        }
                    ),
                }
            )
        self.action_space = spaces.Box(
            low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
        )
@@ -29,7 +29,7 @@ from torch import Tensor
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
-from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STR
 from lerobot.utils.utils import get_channel_first_image_shape
@@ -78,12 +78,18 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
        return_observations[OBS_ENV_STATE] = env_state
-    # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
+    if "agent_pos" in observations:
-    agent_pos = torch.from_numpy(observations["agent_pos"]).float()
+        agent_pos = torch.from_numpy(observations["agent_pos"]).float()
-    if agent_pos.dim() == 1:
+        if agent_pos.dim() == 1:
-        agent_pos = agent_pos.unsqueeze(0)
+            agent_pos = agent_pos.unsqueeze(0)
-    return_observations[OBS_STATE] = agent_pos
+        return_observations[OBS_STATE] = agent_pos
    if "robot_state" in observations:
        # simply copy nested dict as-is
        return_observations[f"{OBS_STR}.robot_state"] = {
            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
            for k, v in observations["robot_state"].items()
        }
    return return_observations
@@ -25,6 +25,11 @@ from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_ST
 from .pipeline import ObservationProcessorStep, ProcessorStepRegistry
 try:
    from robosuite.utils.transform_utils import quat2axisangle
 except ImportError:
    quat2axisangle = None
@dataclass
@ProcessorStepRegistry.register(name="observation_processor")
@@ -204,3 +209,72 @@ class VanillaObservationProcessorStep(ObservationProcessorStep):
                new_features[src_ft][key] = feat
        return new_features
@dataclass
@ProcessorStepRegistry.register(name="libero_processor")
 class LiberoProcessorStep(ObservationProcessorStep):
    """
    Processes LIBERO observations into the LeRobot format.
    This step handles the specific observation structure from LIBERO environments,
    which includes nested robot_state dictionaries and image observations.
    **State Processing:**
    -   Processes the `robot_state` dictionary which contains nested end-effector,
        gripper, and joint information.
    -   Extracts and concatenates:
        - End-effector position (3D)
        - End-effector quaternion converted to axis-angle (3D)
        - Gripper joint positions (2D)
    -   Maps the concatenated state to `"observation.state"`.
    """
    def _process_observation(self, observation):
        """
        Processes both image and robot_state observations from LIBERO.
        """
        if quat2axisangle is None:
            raise ImportError(
                "robosuite is required for LiberoProcessorStep. "
                "Install it with: pip install robosuite"
            )
        processed_obs = observation.copy()
        # Process robot_state into a flat state vector
        if "observation.robot_state" in processed_obs:
            robot_state = processed_obs.pop("observation.robot_state")
            # Extract components
            eef_pos = robot_state["eef"]["pos"]  # (3,)
            eef_quat = robot_state["eef"]["quat"]  # (4,)
            gripper_qpos = robot_state["gripper"]["qpos"]  # (2,)
            # Convert quaternion to axis-angle
            eef_axisangle = quat2axisangle(eef_quat.squeeze(0))  # (3,)
            eef_axisangle = eef_axisangle[np.newaxis, :]         # (1, 3)
            # Concatenate into a single state vector
            state = np.concatenate((eef_pos, eef_axisangle, gripper_qpos), axis=1)
            # Convert to tensor
            state_tensor = torch.from_numpy(state).float()
            if state_tensor.dim() == 1:
                state_tensor = state_tensor.unsqueeze(0)
            processed_obs[OBS_STATE] = state_tensor
        return processed_obs
    def transform_features(
        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
        """
        Transforms feature keys from the LIBERO format to the LeRobot standard.
        """
        new_features: dict[PipelineFeatureType, dict[str, PolicyFeature]] = {ft: {} for ft in features}
        return new_features
    def observation(self, observation):
        return self._process_observation(observation)
@@ -165,6 +165,7 @@ def rollout(
        # Infer "task" from attributes of environments.
        # TODO: works with SyncVectorEnv but not AsyncVectorEnv
        observation = add_envs_task(env, observation)
        breakpoint()
        observation = preprocessor(observation)
        with torch.inference_mode():
            action = policy.select_action(observation)
@@ -1,4 +1,7 @@
 from lerobot.envs.factory import make_env, make_env_config
 from lerobot.envs.utils import add_envs_task, preprocess_observation
 from lerobot.processor.pipeline import PolicyProcessorPipeline
 from lerobot.processor.observation_processor import LiberoProcessorStep
 config = make_env_config("libero", task="libero_spatial")
 envs_dict = make_env(config)
 env = envs_dict["libero_spatial"][0]
@@ -7,3 +10,13 @@ seed = 42
 # First rollout
 obs1, info1 = env.reset(seed=seed)
 observation = preprocess_observation(obs1)
 observation = add_envs_task(env, observation)
 libero_preprocessor = PolicyProcessorPipeline(
    steps=[
        LiberoProcessorStep(),
    ]
 )
 observation = libero_preprocessor(observation)
@@ -0,0 +1,20 @@
 from lerobot.processor.observation_processor import VanillaObservationProcessorStep
 from lerobot.processor.converters import create_transition
 from lerobot.processor import TransitionKey
 from lerobot.utils.constants import OBS_IMAGE
 import numpy as np
 processor = VanillaObservationProcessorStep()
 # Create a mock image (H, W, C) format, uint8
 image = np.random.randint(0, 256, size=(64, 64, 3), dtype=np.uint8)
 observation = {"pixels": image}
 transition = create_transition(observation=observation)
 breakpoint()
 result = processor(transition)
 processed_obs = result[TransitionKey.OBSERVATION]
 # Check that the image was processed correctly
 assert OBS_IMAGE in processed_obs
 processed_img = processed_obs[OBS_IMAGE]