From 2c17433f4df3e947ba4a2d919f351b3fb515a277 Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Wed, 17 Sep 2025 11:16:42 +0200 Subject: [PATCH] make it work --- examples/test.sh | 58 +++++++++++++++++++++++ src/lerobot/envs/configs.py | 92 +++++++++++++------------------------ src/lerobot/scripts/eval.py | 14 +++--- 3 files changed, 98 insertions(+), 66 deletions(-) create mode 100644 examples/test.sh diff --git a/examples/test.sh b/examples/test.sh new file mode 100644 index 000000000..a34f85719 --- /dev/null +++ b/examples/test.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# storage / caches +RAID=/raid/jade +export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers +export HF_HOME=$RAID/.cache/huggingface +export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets +export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot +export WANDB_CACHE_DIR=$RAID/.cache/wandb +export TMPDIR=$RAID/.cache/tmp +mkdir -p $TMPDIR +export WANDB_MODE=offline +export HF_DATASETS_OFFLINE=1 +export HF_HUB_OFFLINE=1 +export TOKENIZERS_PARALLELISM=false +export MUJOCO_GL=egl +export CUDA_VISIBLE_DEVICES=2 + +# CONFIGURATION +POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model" +POLICY_PATH="/raid/jade/models/smolvla_pipe" +TASK=libero_spatial +ENV_TYPE="libero" +BATCH_SIZE=1 +N_EPISODES=1 +# storage / caches +RAID=/raid/jade +N_ACTION_STEPS=1 +export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers +export HF_HOME=$RAID/.cache/huggingface +export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets +export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot +export WANDB_CACHE_DIR=$RAID/.cache/wandb +export TMPDIR=$RAID/.cache/tmp +mkdir -p $TMPDIR +export WANDB_MODE=offline +# export HF_DATASETS_OFFLINE=1 +# export HF_HUB_OFFLINE=1 +export TOKENIZERS_PARALLELISM=false +export MUJOCO_GL=egl +export MUJOCO_GL=egl +unset HF_HUB_OFFLINE +# RUN EVALUATION +python src/lerobot/scripts/eval.py \ + --policy.path="$POLICY_PATH" \ + --env.type="$ENV_TYPE" \ + --eval.batch_size="$BATCH_SIZE" \ + --eval.n_episodes="$N_EPISODES" \ + --env.task=$TASK \ + --env.max_parallel_tasks=10 \ +# python examples/evaluate_libero.py \ +# --policy_path "$POLICY_PATH" \ +# --task_suite_name "$TASK" \ +# --num_steps_wait 10 \ +# --num_trials_per_task 10 \ +# --video_out_path "data/libero/videos" \ +# --device "cuda" \ +# --seed 7 \ No newline at end of file diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index 14f722d80..c3fdbbff5 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -162,71 +162,33 @@ class XarmEnv(EnvConfig): @dataclass -class ImagePreprocessingConfig: - crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None - resize_size: tuple[int, int] | None = None +class VideoRecordConfig: + """Configuration for video recording in ManiSkill environments.""" + + enabled: bool = False + record_dir: str = "videos" + trajectory_name: str = "trajectory" @dataclass -class RewardClassifierConfig: - """Configuration for reward classification.""" - - pretrained_path: str | None = None - success_threshold: float = 0.5 - success_reward: float = 1.0 - - -@dataclass -class InverseKinematicsConfig: - """Configuration for inverse kinematics processing.""" - - urdf_path: str | None = None - target_frame_name: str | None = None - end_effector_bounds: dict[str, list[float]] | None = None - end_effector_step_sizes: dict[str, float] | None = None - - -@dataclass -class ObservationConfig: - """Configuration for observation processing.""" +class EnvTransformConfig: + """Configuration for environment wrappers.""" + # ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig) + control_mode: str = "gamepad" + display_cameras: bool = False add_joint_velocity_to_observation: bool = False add_current_to_observation: bool = False add_ee_pose_to_observation: bool = False - display_cameras: bool = False - - -@dataclass -class GripperConfig: - """Configuration for gripper control and penalties.""" - - use_gripper: bool = True - gripper_penalty: float = 0.0 - gripper_penalty_in_reward: bool = False - - -@dataclass -class ResetConfig: - """Configuration for environment reset behavior.""" - + crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None + resize_size: tuple[int, int] | None = None + control_time_s: float = 20.0 fixed_reset_joint_positions: Any | None = None reset_time_s: float = 5.0 - control_time_s: float = 20.0 - terminate_on_success: bool = True - - -@dataclass -class HILSerlProcessorConfig: - """Configuration for environment processing pipeline.""" - - control_mode: str = "gamepad" - observation: ObservationConfig | None = None - image_preprocessing: ImagePreprocessingConfig | None = None - gripper: GripperConfig | None = None - reset: ResetConfig | None = None - inverse_kinematics: InverseKinematicsConfig | None = None - reward_classifier: RewardClassifierConfig | None = None - max_gripper_pos: float | None = 100.0 + use_gripper: bool = True + gripper_quantization_threshold: float | None = 0.8 + gripper_penalty: float = 0.0 + gripper_penalty_in_reward: bool = False @EnvConfig.register_subclass(name="gym_manipulator") @@ -236,9 +198,21 @@ class HILSerlRobotEnvConfig(EnvConfig): robot: RobotConfig | None = None teleop: TeleoperatorConfig | None = None - processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig) - + wrapper: EnvTransformConfig | None = None + fps: int = 10 name: str = "real_robot" + mode: str | None = None # Either "record", "replay", None + repo_id: str | None = None + dataset_root: str | None = None + task: str | None = "" + num_episodes: int = 10 # only for record mode + episode: int = 0 + device: str = "cuda" + push_to_hub: bool = True + pretrained_policy_name_or_path: str | None = None + reward_classifier_pretrained_path: str | None = None + # For the reward classifier, to record more positive examples after a success + number_of_steps_after_success: int = 0 @property def gym_kwargs(self) -> dict: @@ -349,4 +323,4 @@ class LiberoEnv(EnvConfig): return { "obs_type": self.obs_type, "render_mode": self.render_mode, - } + } \ No newline at end of file diff --git a/src/lerobot/scripts/eval.py b/src/lerobot/scripts/eval.py index 1c0e78c04..0600ddb19 100644 --- a/src/lerobot/scripts/eval.py +++ b/src/lerobot/scripts/eval.py @@ -130,7 +130,6 @@ def rollout( The dictionary described above. """ assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module." - device = get_device_from_parameters(policy) # Reset the policy and environments. policy.reset() @@ -161,10 +160,11 @@ def rollout( if return_observations: all_observations.append(deepcopy(observation)) - observation = preprocessor(observation) # Infer "task" from attributes of environments. # TODO: works with SyncVectorEnv but not AsyncVectorEnv observation = add_envs_task(env, observation) + + observation = preprocessor(observation) with torch.inference_mode(): action = policy.select_action(observation) action = postprocessor(action) @@ -232,12 +232,12 @@ def eval_policy( env: gym.vector.VectorEnv, policy: PreTrainedPolicy, n_episodes: int, + preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]] | None = None, + postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction] | None = None, max_episodes_rendered: int = 0, videos_dir: Path | None = None, return_episode_data: bool = False, start_seed: int | None = None, - preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], - postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction], ) -> dict: """ Args: @@ -498,7 +498,7 @@ def eval_main(cfg: EvalPipelineConfig): ) with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext(): info = eval_policy_all( - env=env, + envs=envs, policy=policy, preprocessor=preprocessor, postprocessor=postprocessor, @@ -587,7 +587,7 @@ def eval_policy_all( env=env, policy=policy, preprocessor=preprocessor, - postprocessor=postprocessor + postprocessor=postprocessor, n_episodes=n_episodes, max_episodes_rendered=max_episodes_rendered, videos_dir=task_videos_dir, @@ -689,7 +689,7 @@ def eval_policy_all( videos_dir=videos_dir, return_episode_data=return_episode_data, start_seed=start_seed, - max_parallel_tasks, + max_parallel_tasks=max_parallel_tasks, ) # single accumulator path on the main thread