make it work

2026-05-15 00:29:52 +00:00 · 2025-09-17 11:16:42 +02:00
parent d31283cc5d
commit 2c17433f4d
3 changed files with 98 additions and 66 deletions
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# storage / caches
+RAID=/raid/jade
+export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
+export HF_HOME=$RAID/.cache/huggingface
+export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
+export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
+export WANDB_CACHE_DIR=$RAID/.cache/wandb
+export TMPDIR=$RAID/.cache/tmp
+mkdir -p $TMPDIR
+export WANDB_MODE=offline
+export HF_DATASETS_OFFLINE=1
+export HF_HUB_OFFLINE=1
+export TOKENIZERS_PARALLELISM=false
+export MUJOCO_GL=egl
+export CUDA_VISIBLE_DEVICES=2
+
+# CONFIGURATION
+POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model"
+POLICY_PATH="/raid/jade/models/smolvla_pipe"
+TASK=libero_spatial
+ENV_TYPE="libero"
+BATCH_SIZE=1
+N_EPISODES=1
+# storage / caches
+RAID=/raid/jade
+N_ACTION_STEPS=1
+export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
+export HF_HOME=$RAID/.cache/huggingface
+export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
+export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
+export WANDB_CACHE_DIR=$RAID/.cache/wandb
+export TMPDIR=$RAID/.cache/tmp
+mkdir -p $TMPDIR
+export WANDB_MODE=offline
+# export HF_DATASETS_OFFLINE=1
+# export HF_HUB_OFFLINE=1
+export TOKENIZERS_PARALLELISM=false
+export MUJOCO_GL=egl
+export MUJOCO_GL=egl
+unset HF_HUB_OFFLINE
+# RUN EVALUATION
+python src/lerobot/scripts/eval.py \
+    --policy.path="$POLICY_PATH" \
+    --env.type="$ENV_TYPE" \
+    --eval.batch_size="$BATCH_SIZE" \
+    --eval.n_episodes="$N_EPISODES" \
+    --env.task=$TASK \
+    --env.max_parallel_tasks=10 \
+# python examples/evaluate_libero.py \
+#     --policy_path "$POLICY_PATH" \
+#     --task_suite_name "$TASK" \
+#     --num_steps_wait 10 \
+#     --num_trials_per_task 10 \
+#     --video_out_path "data/libero/videos" \
+#     --device "cuda" \
+#     --seed 7
@@ -162,71 +162,33 @@ class XarmEnv(EnvConfig):


@dataclass
-class ImagePreprocessingConfig:
-    crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
-    resize_size: tuple[int, int] | None = None
+class VideoRecordConfig:
+    """Configuration for video recording in ManiSkill environments."""
+
+    enabled: bool = False
+    record_dir: str = "videos"
+    trajectory_name: str = "trajectory"


@dataclass
-class RewardClassifierConfig:
-    """Configuration for reward classification."""
-
-    pretrained_path: str | None = None
-    success_threshold: float = 0.5
-    success_reward: float = 1.0
-
-
-@dataclass
-class InverseKinematicsConfig:
-    """Configuration for inverse kinematics processing."""
-
-    urdf_path: str | None = None
-    target_frame_name: str | None = None
-    end_effector_bounds: dict[str, list[float]] | None = None
-    end_effector_step_sizes: dict[str, float] | None = None
-
-
-@dataclass
-class ObservationConfig:
-    """Configuration for observation processing."""
+class EnvTransformConfig:
+    """Configuration for environment wrappers."""

+    # ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig)
+    control_mode: str = "gamepad"
+    display_cameras: bool = False
    add_joint_velocity_to_observation: bool = False
    add_current_to_observation: bool = False
    add_ee_pose_to_observation: bool = False
-    display_cameras: bool = False
-
-
-@dataclass
-class GripperConfig:
-    """Configuration for gripper control and penalties."""
-
-    use_gripper: bool = True
-    gripper_penalty: float = 0.0
-    gripper_penalty_in_reward: bool = False
-
-
-@dataclass
-class ResetConfig:
-    """Configuration for environment reset behavior."""
-
+    crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
+    resize_size: tuple[int, int] | None = None
+    control_time_s: float = 20.0
    fixed_reset_joint_positions: Any | None = None
    reset_time_s: float = 5.0
-    control_time_s: float = 20.0
-    terminate_on_success: bool = True
-
-
-@dataclass
-class HILSerlProcessorConfig:
-    """Configuration for environment processing pipeline."""
-
-    control_mode: str = "gamepad"
-    observation: ObservationConfig | None = None
-    image_preprocessing: ImagePreprocessingConfig | None = None
-    gripper: GripperConfig | None = None
-    reset: ResetConfig | None = None
-    inverse_kinematics: InverseKinematicsConfig | None = None
-    reward_classifier: RewardClassifierConfig | None = None
-    max_gripper_pos: float | None = 100.0
+    use_gripper: bool = True
+    gripper_quantization_threshold: float | None = 0.8
+    gripper_penalty: float = 0.0
+    gripper_penalty_in_reward: bool = False


@EnvConfig.register_subclass(name="gym_manipulator")
@@ -236,9 +198,21 @@ class HILSerlRobotEnvConfig(EnvConfig):

    robot: RobotConfig | None = None
    teleop: TeleoperatorConfig | None = None
-    processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig)
-
+    wrapper: EnvTransformConfig | None = None
+    fps: int = 10
    name: str = "real_robot"
+    mode: str | None = None  # Either "record", "replay", None
+    repo_id: str | None = None
+    dataset_root: str | None = None
+    task: str | None = ""
+    num_episodes: int = 10  # only for record mode
+    episode: int = 0
+    device: str = "cuda"
+    push_to_hub: bool = True
+    pretrained_policy_name_or_path: str | None = None
+    reward_classifier_pretrained_path: str | None = None
+    # For the reward classifier, to record more positive examples after a success
+    number_of_steps_after_success: int = 0

    @property
    def gym_kwargs(self) -> dict:
@@ -349,4 +323,4 @@ class LiberoEnv(EnvConfig):
        return {
            "obs_type": self.obs_type,
            "render_mode": self.render_mode,
-        }
+        }
@@ -130,7 +130,6 @@ def rollout(
        The dictionary described above.
    """
    assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module."
-    device = get_device_from_parameters(policy)

    # Reset the policy and environments.
    policy.reset()
@@ -161,10 +160,11 @@ def rollout(
        if return_observations:
            all_observations.append(deepcopy(observation))

-        observation = preprocessor(observation)
        # Infer "task" from attributes of environments.
        # TODO: works with SyncVectorEnv but not AsyncVectorEnv
        observation = add_envs_task(env, observation)
+
+        observation = preprocessor(observation)
        with torch.inference_mode():
            action = policy.select_action(observation)
        action = postprocessor(action)
@@ -232,12 +232,12 @@ def eval_policy(
    env: gym.vector.VectorEnv,
    policy: PreTrainedPolicy,
    n_episodes: int,
+    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]] | None = None,
+    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction] | None = None,
    max_episodes_rendered: int = 0,
    videos_dir: Path | None = None,
    return_episode_data: bool = False,
    start_seed: int | None = None,
-    preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
 ) -> dict:
    """
    Args:
@@ -498,7 +498,7 @@ def eval_main(cfg: EvalPipelineConfig):
    )
    with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
        info = eval_policy_all(
-            env=env,
+            envs=envs,
            policy=policy,
            preprocessor=preprocessor,
            postprocessor=postprocessor,
@@ -587,7 +587,7 @@ def eval_policy_all(
            env=env,
            policy=policy,
            preprocessor=preprocessor,
-            postprocessor=postprocessor
+            postprocessor=postprocessor,
            n_episodes=n_episodes,
            max_episodes_rendered=max_episodes_rendered,
            videos_dir=task_videos_dir,
@@ -689,7 +689,7 @@ def eval_policy_all(
                videos_dir=videos_dir,
                return_episode_data=return_episode_data,
                start_seed=start_seed,
-                max_parallel_tasks,
+                max_parallel_tasks=max_parallel_tasks,
            )

    # single accumulator path on the main thread