mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
make it work
This commit is contained in:
@@ -0,0 +1,58 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# storage / caches
|
||||||
|
RAID=/raid/jade
|
||||||
|
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||||
|
export HF_HOME=$RAID/.cache/huggingface
|
||||||
|
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||||
|
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||||
|
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||||
|
export TMPDIR=$RAID/.cache/tmp
|
||||||
|
mkdir -p $TMPDIR
|
||||||
|
export WANDB_MODE=offline
|
||||||
|
export HF_DATASETS_OFFLINE=1
|
||||||
|
export HF_HUB_OFFLINE=1
|
||||||
|
export TOKENIZERS_PARALLELISM=false
|
||||||
|
export MUJOCO_GL=egl
|
||||||
|
export CUDA_VISIBLE_DEVICES=2
|
||||||
|
|
||||||
|
# CONFIGURATION
|
||||||
|
POLICY_PATH="/raid/jade/logs/lerobot/lerobot_2_HuggingFaceVLA_libero_smolvla_lr1e-4bs32steps100000/checkpoints/100000/pretrained_model"
|
||||||
|
POLICY_PATH="/raid/jade/models/smolvla_pipe"
|
||||||
|
TASK=libero_spatial
|
||||||
|
ENV_TYPE="libero"
|
||||||
|
BATCH_SIZE=1
|
||||||
|
N_EPISODES=1
|
||||||
|
# storage / caches
|
||||||
|
RAID=/raid/jade
|
||||||
|
N_ACTION_STEPS=1
|
||||||
|
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
|
||||||
|
export HF_HOME=$RAID/.cache/huggingface
|
||||||
|
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
|
||||||
|
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
|
||||||
|
export WANDB_CACHE_DIR=$RAID/.cache/wandb
|
||||||
|
export TMPDIR=$RAID/.cache/tmp
|
||||||
|
mkdir -p $TMPDIR
|
||||||
|
export WANDB_MODE=offline
|
||||||
|
# export HF_DATASETS_OFFLINE=1
|
||||||
|
# export HF_HUB_OFFLINE=1
|
||||||
|
export TOKENIZERS_PARALLELISM=false
|
||||||
|
export MUJOCO_GL=egl
|
||||||
|
export MUJOCO_GL=egl
|
||||||
|
unset HF_HUB_OFFLINE
|
||||||
|
# RUN EVALUATION
|
||||||
|
python src/lerobot/scripts/eval.py \
|
||||||
|
--policy.path="$POLICY_PATH" \
|
||||||
|
--env.type="$ENV_TYPE" \
|
||||||
|
--eval.batch_size="$BATCH_SIZE" \
|
||||||
|
--eval.n_episodes="$N_EPISODES" \
|
||||||
|
--env.task=$TASK \
|
||||||
|
--env.max_parallel_tasks=10 \
|
||||||
|
# python examples/evaluate_libero.py \
|
||||||
|
# --policy_path "$POLICY_PATH" \
|
||||||
|
# --task_suite_name "$TASK" \
|
||||||
|
# --num_steps_wait 10 \
|
||||||
|
# --num_trials_per_task 10 \
|
||||||
|
# --video_out_path "data/libero/videos" \
|
||||||
|
# --device "cuda" \
|
||||||
|
# --seed 7
|
||||||
+33
-59
@@ -162,71 +162,33 @@ class XarmEnv(EnvConfig):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ImagePreprocessingConfig:
|
class VideoRecordConfig:
|
||||||
crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
|
"""Configuration for video recording in ManiSkill environments."""
|
||||||
resize_size: tuple[int, int] | None = None
|
|
||||||
|
enabled: bool = False
|
||||||
|
record_dir: str = "videos"
|
||||||
|
trajectory_name: str = "trajectory"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RewardClassifierConfig:
|
class EnvTransformConfig:
|
||||||
"""Configuration for reward classification."""
|
"""Configuration for environment wrappers."""
|
||||||
|
|
||||||
pretrained_path: str | None = None
|
|
||||||
success_threshold: float = 0.5
|
|
||||||
success_reward: float = 1.0
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class InverseKinematicsConfig:
|
|
||||||
"""Configuration for inverse kinematics processing."""
|
|
||||||
|
|
||||||
urdf_path: str | None = None
|
|
||||||
target_frame_name: str | None = None
|
|
||||||
end_effector_bounds: dict[str, list[float]] | None = None
|
|
||||||
end_effector_step_sizes: dict[str, float] | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ObservationConfig:
|
|
||||||
"""Configuration for observation processing."""
|
|
||||||
|
|
||||||
|
# ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig)
|
||||||
|
control_mode: str = "gamepad"
|
||||||
|
display_cameras: bool = False
|
||||||
add_joint_velocity_to_observation: bool = False
|
add_joint_velocity_to_observation: bool = False
|
||||||
add_current_to_observation: bool = False
|
add_current_to_observation: bool = False
|
||||||
add_ee_pose_to_observation: bool = False
|
add_ee_pose_to_observation: bool = False
|
||||||
display_cameras: bool = False
|
crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
|
||||||
|
resize_size: tuple[int, int] | None = None
|
||||||
|
control_time_s: float = 20.0
|
||||||
@dataclass
|
|
||||||
class GripperConfig:
|
|
||||||
"""Configuration for gripper control and penalties."""
|
|
||||||
|
|
||||||
use_gripper: bool = True
|
|
||||||
gripper_penalty: float = 0.0
|
|
||||||
gripper_penalty_in_reward: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ResetConfig:
|
|
||||||
"""Configuration for environment reset behavior."""
|
|
||||||
|
|
||||||
fixed_reset_joint_positions: Any | None = None
|
fixed_reset_joint_positions: Any | None = None
|
||||||
reset_time_s: float = 5.0
|
reset_time_s: float = 5.0
|
||||||
control_time_s: float = 20.0
|
use_gripper: bool = True
|
||||||
terminate_on_success: bool = True
|
gripper_quantization_threshold: float | None = 0.8
|
||||||
|
gripper_penalty: float = 0.0
|
||||||
|
gripper_penalty_in_reward: bool = False
|
||||||
@dataclass
|
|
||||||
class HILSerlProcessorConfig:
|
|
||||||
"""Configuration for environment processing pipeline."""
|
|
||||||
|
|
||||||
control_mode: str = "gamepad"
|
|
||||||
observation: ObservationConfig | None = None
|
|
||||||
image_preprocessing: ImagePreprocessingConfig | None = None
|
|
||||||
gripper: GripperConfig | None = None
|
|
||||||
reset: ResetConfig | None = None
|
|
||||||
inverse_kinematics: InverseKinematicsConfig | None = None
|
|
||||||
reward_classifier: RewardClassifierConfig | None = None
|
|
||||||
max_gripper_pos: float | None = 100.0
|
|
||||||
|
|
||||||
|
|
||||||
@EnvConfig.register_subclass(name="gym_manipulator")
|
@EnvConfig.register_subclass(name="gym_manipulator")
|
||||||
@@ -236,9 +198,21 @@ class HILSerlRobotEnvConfig(EnvConfig):
|
|||||||
|
|
||||||
robot: RobotConfig | None = None
|
robot: RobotConfig | None = None
|
||||||
teleop: TeleoperatorConfig | None = None
|
teleop: TeleoperatorConfig | None = None
|
||||||
processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig)
|
wrapper: EnvTransformConfig | None = None
|
||||||
|
fps: int = 10
|
||||||
name: str = "real_robot"
|
name: str = "real_robot"
|
||||||
|
mode: str | None = None # Either "record", "replay", None
|
||||||
|
repo_id: str | None = None
|
||||||
|
dataset_root: str | None = None
|
||||||
|
task: str | None = ""
|
||||||
|
num_episodes: int = 10 # only for record mode
|
||||||
|
episode: int = 0
|
||||||
|
device: str = "cuda"
|
||||||
|
push_to_hub: bool = True
|
||||||
|
pretrained_policy_name_or_path: str | None = None
|
||||||
|
reward_classifier_pretrained_path: str | None = None
|
||||||
|
# For the reward classifier, to record more positive examples after a success
|
||||||
|
number_of_steps_after_success: int = 0
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def gym_kwargs(self) -> dict:
|
def gym_kwargs(self) -> dict:
|
||||||
@@ -349,4 +323,4 @@ class LiberoEnv(EnvConfig):
|
|||||||
return {
|
return {
|
||||||
"obs_type": self.obs_type,
|
"obs_type": self.obs_type,
|
||||||
"render_mode": self.render_mode,
|
"render_mode": self.render_mode,
|
||||||
}
|
}
|
||||||
@@ -130,7 +130,6 @@ def rollout(
|
|||||||
The dictionary described above.
|
The dictionary described above.
|
||||||
"""
|
"""
|
||||||
assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module."
|
assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module."
|
||||||
device = get_device_from_parameters(policy)
|
|
||||||
|
|
||||||
# Reset the policy and environments.
|
# Reset the policy and environments.
|
||||||
policy.reset()
|
policy.reset()
|
||||||
@@ -161,10 +160,11 @@ def rollout(
|
|||||||
if return_observations:
|
if return_observations:
|
||||||
all_observations.append(deepcopy(observation))
|
all_observations.append(deepcopy(observation))
|
||||||
|
|
||||||
observation = preprocessor(observation)
|
|
||||||
# Infer "task" from attributes of environments.
|
# Infer "task" from attributes of environments.
|
||||||
# TODO: works with SyncVectorEnv but not AsyncVectorEnv
|
# TODO: works with SyncVectorEnv but not AsyncVectorEnv
|
||||||
observation = add_envs_task(env, observation)
|
observation = add_envs_task(env, observation)
|
||||||
|
|
||||||
|
observation = preprocessor(observation)
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
action = policy.select_action(observation)
|
action = policy.select_action(observation)
|
||||||
action = postprocessor(action)
|
action = postprocessor(action)
|
||||||
@@ -232,12 +232,12 @@ def eval_policy(
|
|||||||
env: gym.vector.VectorEnv,
|
env: gym.vector.VectorEnv,
|
||||||
policy: PreTrainedPolicy,
|
policy: PreTrainedPolicy,
|
||||||
n_episodes: int,
|
n_episodes: int,
|
||||||
|
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]] | None = None,
|
||||||
|
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction] | None = None,
|
||||||
max_episodes_rendered: int = 0,
|
max_episodes_rendered: int = 0,
|
||||||
videos_dir: Path | None = None,
|
videos_dir: Path | None = None,
|
||||||
return_episode_data: bool = False,
|
return_episode_data: bool = False,
|
||||||
start_seed: int | None = None,
|
start_seed: int | None = None,
|
||||||
preprocessor: PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
|
|
||||||
postprocessor: PolicyProcessorPipeline[PolicyAction, PolicyAction],
|
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@@ -498,7 +498,7 @@ def eval_main(cfg: EvalPipelineConfig):
|
|||||||
)
|
)
|
||||||
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
|
with torch.no_grad(), torch.autocast(device_type=device.type) if cfg.policy.use_amp else nullcontext():
|
||||||
info = eval_policy_all(
|
info = eval_policy_all(
|
||||||
env=env,
|
envs=envs,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
preprocessor=preprocessor,
|
preprocessor=preprocessor,
|
||||||
postprocessor=postprocessor,
|
postprocessor=postprocessor,
|
||||||
@@ -587,7 +587,7 @@ def eval_policy_all(
|
|||||||
env=env,
|
env=env,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
preprocessor=preprocessor,
|
preprocessor=preprocessor,
|
||||||
postprocessor=postprocessor
|
postprocessor=postprocessor,
|
||||||
n_episodes=n_episodes,
|
n_episodes=n_episodes,
|
||||||
max_episodes_rendered=max_episodes_rendered,
|
max_episodes_rendered=max_episodes_rendered,
|
||||||
videos_dir=task_videos_dir,
|
videos_dir=task_videos_dir,
|
||||||
@@ -689,7 +689,7 @@ def eval_policy_all(
|
|||||||
videos_dir=videos_dir,
|
videos_dir=videos_dir,
|
||||||
return_episode_data=return_episode_data,
|
return_episode_data=return_episode_data,
|
||||||
start_seed=start_seed,
|
start_seed=start_seed,
|
||||||
max_parallel_tasks,
|
max_parallel_tasks=max_parallel_tasks,
|
||||||
)
|
)
|
||||||
|
|
||||||
# single accumulator path on the main thread
|
# single accumulator path on the main thread
|
||||||
|
|||||||
Reference in New Issue
Block a user