Refactored hilserl config

This commit is contained in:
Michel Aractingi
2025-08-05 01:24:46 +02:00
parent b292dbbc55
commit 9effc5214f
4 changed files with 289 additions and 104 deletions
+82 -42
View File
@@ -56,27 +56,38 @@ pip install -e ".[hilserl]"
### Understanding Configuration ### Understanding Configuration
The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. Which is defined as: The training process begins with proper configuration for the HILSerl environment. The configuration class of interest is `HILSerlRobotEnvConfig` in `lerobot/envs/configs.py`. The configuration is now organized into focused, nested sub-configs:
<!-- prettier-ignore-start --> <!-- prettier-ignore-start -->
```python ```python
class HILSerlRobotEnvConfig(EnvConfig): class HILSerlRobotEnvConfig(EnvConfig):
robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/robots`) robot: RobotConfig | None = None # Main robot agent (defined in `lerobot/robots`)
teleop: TeleoperatorConfig | None = None # Teleoperator agent, e.g., gamepad or leader arm, (defined in `lerobot/teleoperators`) teleop: TeleoperatorConfig | None = None # Teleoperator agent, e.g., gamepad or leader arm
wrapper: EnvTransformConfig | None = None # Environment wrapper settings; check `lerobot/scripts/server/gym_manipulator.py` processor: HILSerlProcessorConfig # Processing pipeline configuration (nested)
fps: int = 10 # Control frequency dataset: DatasetConfig # Dataset recording/replay configuration (nested)
name: str = "real_robot" # Environment name name: str = "real_robot" # Environment name
mode: str = None # "record", "replay", or None (for training) mode: str = None # "record", "replay", or None (for training)
device: str = "cuda" # Compute device
# Nested processor configuration
class HILSerlProcessorConfig:
control_mode: str = "gamepad" # Control mode
observation: ObservationConfig # Observation processing settings
image_preprocessing: ImagePreprocessingConfig # Image crop/resize settings
gripper: GripperConfig # Gripper control and penalty settings
reset: ResetConfig # Environment reset and timing settings
inverse_kinematics: InverseKinematicsConfig # IK processing settings
reward_classifier: RewardClassifierConfig # Reward classifier settings
# Dataset configuration
class DatasetConfig:
repo_id: str | None = None # LeRobot dataset repository ID repo_id: str | None = None # LeRobot dataset repository ID
dataset_root: str | None = None # Local dataset root (optional) dataset_root: str | None = None # Local dataset root (optional)
task: str = "" # Task identifier task: str = "" # Task identifier
num_episodes: int = 10 # Number of episodes for recording num_episodes: int = 10 # Number of episodes for recording
episode: int = 0 # episode index for replay episode: int = 0 # Episode index for replay
device: str = "cuda" # Compute device push_to_hub: bool = True # Whether to push datasets to Hub
push_to_hub: bool = True # Whether to push the recorded datasets to Hub fps: int = 10 # Control frequency
pretrained_policy_name_or_path: str | None = None # For policy loading
reward_classifier_pretrained_path: str | None = None # For reward model
number_of_steps_after_success: int = 0 # For reward classifier, collect more positive examples after a success to train a classifier
``` ```
<!-- prettier-ignore-end --> <!-- prettier-ignore-end -->
@@ -133,19 +144,22 @@ Create a configuration file for recording demonstrations (or edit an existing on
1. Set `mode` to `"record"` 1. Set `mode` to `"record"`
2. Specify a unique `repo_id` for your dataset (e.g., "username/task_name") 2. Specify a unique `repo_id` for your dataset (e.g., "username/task_name")
3. Set `num_episodes` to the number of demonstrations you want to collect 3. Set `num_episodes` to the number of demonstrations you want to collect
4. Set `crop_params_dict` to `null` initially (we'll determine crops later) 4. Set `processor.image_preprocessing.crop_params_dict` to `{}` initially (we'll determine crops later)
5. Configure `robot`, `cameras`, and other hardware settings 5. Configure `robot`, `cameras`, and other hardware settings
Example configuration section: Example configuration section:
```json ```json
"mode": "record", "mode": "record",
"repo_id": "username/pick_lift_cube", "dataset": {
"dataset_root": null, "repo_id": "username/pick_lift_cube",
"task": "pick_and_lift", "dataset_root": null,
"num_episodes": 15, "task": "pick_and_lift",
"episode": 0, "num_episodes": 15,
"push_to_hub": true "episode": 0,
"push_to_hub": true,
"fps": 10
}
``` ```
### Using a Teleoperation Device ### Using a Teleoperation Device
@@ -191,10 +205,13 @@ The gamepad provides a very convenient way to control the robot and the episode
To setup the gamepad, you need to set the `control_mode` to `"gamepad"` and define the `teleop` section in the configuration file. To setup the gamepad, you need to set the `control_mode` to `"gamepad"` and define the `teleop` section in the configuration file.
```json ```json
"teleop": { "teleop": {
"type": "gamepad", "type": "gamepad",
"use_gripper": true "use_gripper": true
}, },
"processor": {
"control_mode": "gamepad"
}
``` ```
<p align="center"> <p align="center">
@@ -216,11 +233,14 @@ The SO101 leader arm has reduced gears that allows it to move and track the foll
To setup the SO101 leader, you need to set the `control_mode` to `"leader"` and define the `teleop` section in the configuration file. To setup the SO101 leader, you need to set the `control_mode` to `"leader"` and define the `teleop` section in the configuration file.
```json ```json
"teleop": { "teleop": {
"type": "so101_leader", "type": "so101_leader",
"port": "/dev/tty.usbmodem585A0077921", # check your port number "port": "/dev/tty.usbmodem585A0077921",
"use_degrees": true "use_degrees": true
}, },
"processor": {
"control_mode": "leader"
}
``` ```
In order to annotate the success/failure of the episode, **you will need** to use a keyboard to press `s` for success, `esc` for failure. In order to annotate the success/failure of the episode, **you will need** to use a keyboard to press `s` for success, `esc` for failure.
@@ -251,7 +271,7 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/e
During recording: During recording:
1. The robot will reset to the initial position defined in the configuration file `fixed_reset_joint_positions` 1. The robot will reset to the initial position defined in the configuration file `processor.reset.fixed_reset_joint_positions`
2. Complete the task successfully 2. Complete the task successfully
3. The episode ends with a reward of 1 when you press the "success" button 3. The episode ends with a reward of 1 when you press the "success" button
4. If the time limit is reached, or the fail button is pressed, the episode ends with a reward of 0 4. If the time limit is reached, or the fail button is pressed, the episode ends with a reward of 0
@@ -310,11 +330,15 @@ observation.images.front: [180, 250, 120, 150]
Add these crop parameters to your training configuration: Add these crop parameters to your training configuration:
```json ```json
"crop_params_dict": { "processor": {
"observation.images.side": [180, 207, 180, 200], "image_preprocessing": {
"observation.images.front": [180, 250, 120, 150] "crop_params_dict": {
}, "observation.images.side": [180, 207, 180, 200],
"resize_size": [128, 128] "observation.images.front": [180, 250, 120, 150]
},
"resize_size": [128, 128]
}
}
``` ```
**Recommended image resolution** **Recommended image resolution**
@@ -346,23 +370,29 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/r
- **mode**: set it to `"record"` to collect a dataset - **mode**: set it to `"record"` to collect a dataset
- **repo_id**: `"hf_username/dataset_name"`, name of the dataset and repo on the hub - **repo_id**: `"hf_username/dataset_name"`, name of the dataset and repo on the hub
- **num_episodes**: Number of episodes to record - **num_episodes**: Number of episodes to record
- **number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected - **processor.reset.number_of_steps_after_success**: Number of additional frames to record after a success (reward=1) is detected
- **fps**: Number of frames per second to record - **fps**: Number of frames per second to record
- **push_to_hub**: Whether to push the dataset to the hub - **push_to_hub**: Whether to push the dataset to the hub
The `number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier. The `processor.reset.number_of_steps_after_success` parameter is crucial as it allows you to collect more positive examples. When a success is detected, the system will continue recording for the specified number of steps while maintaining the reward=1 label. Otherwise, there won't be enough states in the dataset labeled to 1 to train a good classifier.
Example configuration section for data collection: Example configuration section for data collection:
```json ```json
{ {
"mode": "record", "mode": "record",
"repo_id": "hf_username/dataset_name", "dataset": {
"dataset_root": "data/your_dataset", "repo_id": "hf_username/dataset_name",
"num_episodes": 20, "dataset_root": "data/your_dataset",
"push_to_hub": true, "num_episodes": 20,
"fps": 10, "push_to_hub": true,
"number_of_steps_after_success": 15 "fps": 10
},
"processor": {
"reset": {
"number_of_steps_after_success": 15
}
}
} }
``` ```
@@ -422,7 +452,11 @@ To use your trained reward classifier, configure the `HILSerlRobotEnvConfig` to
<!-- prettier-ignore-start --> <!-- prettier-ignore-start -->
```python ```python
env_config = HILSerlRobotEnvConfig( env_config = HILSerlRobotEnvConfig(
reward_classifier_pretrained_path="path_to_your_pretrained_trained_model", processor=HILSerlProcessorConfig(
reward_classifier=RewardClassifierConfig(
pretrained_path="path_to_your_pretrained_trained_model"
)
),
# Other environment parameters # Other environment parameters
) )
``` ```
@@ -432,7 +466,13 @@ or set the argument in the json config file.
```json ```json
{ {
"reward_classifier_pretrained_path": "path_to_your_pretrained_model" "processor": {
"reward_classifier": {
"pretrained_path": "path_to_your_pretrained_model",
"success_threshold": 0.7,
"success_reward": 1.0
}
}
} }
``` ```
+73 -35
View File
@@ -161,33 +161,36 @@ class XarmEnv(EnvConfig):
@dataclass @dataclass
class VideoRecordConfig: class ImagePreprocessingConfig:
"""Configuration for video recording in ManiSkill environments.""" crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None
resize_size: tuple[int, int] | None = None
enabled: bool = False
record_dir: str = "videos"
trajectory_name: str = "trajectory"
@dataclass @dataclass
class HILSerlProcessorConfig: class DatasetConfig:
"""Configuration for environment wrappers.""" """Configuration for dataset recording and replay."""
# ee_action_space_params: EEActionSpaceConfig = field(default_factory=EEActionSpaceConfig) repo_id: str | None = None
control_mode: str = "gamepad" dataset_root: str | None = None
display_cameras: bool = False task: str | None = ""
add_joint_velocity_to_observation: bool = False num_episodes: int = 10
add_current_to_observation: bool = False episode: int = 0 # for replay mode
add_ee_pose_to_observation: bool = False push_to_hub: bool = True
crop_params_dict: dict[str, tuple[int, int, int, int]] | None = None fps: int = 10
resize_size: tuple[int, int] | None = None
control_time_s: float = 20.0
fixed_reset_joint_positions: Any | None = None @dataclass
reset_time_s: float = 5.0 class RewardClassifierConfig:
use_gripper: bool = True """Configuration for reward classification."""
gripper_quantization_threshold: float | None = 0.8
gripper_penalty: float = 0.0 pretrained_path: str | None = None
gripper_penalty_in_reward: bool = False success_threshold: float = 0.5
success_reward: float = 1.0
@dataclass
class InverseKinematicsConfig:
"""Configuration for inverse kinematics processing."""
urdf_path: str | None = None urdf_path: str | None = None
target_frame_name: str | None = None target_frame_name: str | None = None
@@ -196,6 +199,50 @@ class HILSerlProcessorConfig:
max_gripper_pos: float | None = None max_gripper_pos: float | None = None
@dataclass
class ObservationConfig:
"""Configuration for observation processing."""
add_joint_velocity_to_observation: bool = False
add_current_to_observation: bool = False
add_ee_pose_to_observation: bool = False
display_cameras: bool = False
@dataclass
class GripperConfig:
"""Configuration for gripper control and penalties."""
use_gripper: bool = True
gripper_quantization_threshold: float | None = 0.8
gripper_penalty: float = 0.0
gripper_penalty_in_reward: bool = False
@dataclass
class ResetConfig:
"""Configuration for environment reset behavior."""
fixed_reset_joint_positions: Any | None = None
reset_time_s: float = 5.0
control_time_s: float = 20.0
terminate_on_success: bool = True
number_of_steps_after_success: int = 0
@dataclass
class HILSerlProcessorConfig:
"""Configuration for environment processing pipeline."""
control_mode: str = "gamepad"
observation: ObservationConfig = field(default_factory=ObservationConfig)
image_preprocessing: ImagePreprocessingConfig = field(default_factory=ImagePreprocessingConfig)
gripper: GripperConfig = field(default_factory=GripperConfig)
reset: ResetConfig = field(default_factory=ResetConfig)
inverse_kinematics: InverseKinematicsConfig = field(default_factory=InverseKinematicsConfig)
reward_classifier: RewardClassifierConfig = field(default_factory=RewardClassifierConfig)
@EnvConfig.register_subclass(name="gym_manipulator") @EnvConfig.register_subclass(name="gym_manipulator")
@dataclass @dataclass
class HILSerlRobotEnvConfig(EnvConfig): class HILSerlRobotEnvConfig(EnvConfig):
@@ -203,21 +250,12 @@ class HILSerlRobotEnvConfig(EnvConfig):
robot: RobotConfig | None = None robot: RobotConfig | None = None
teleop: TeleoperatorConfig | None = None teleop: TeleoperatorConfig | None = None
processor: HILSerlProcessorConfig | None = None processor: HILSerlProcessorConfig = field(default_factory=HILSerlProcessorConfig)
fps: int = 10 dataset: DatasetConfig = field(default_factory=DatasetConfig)
name: str = "real_robot" name: str = "real_robot"
mode: str | None = None # Either "record", "replay", None mode: str | None = None # Either "record", "replay", None
repo_id: str | None = None
dataset_root: str | None = None
task: str | None = ""
num_episodes: int = 10 # only for record mode
episode: int = 0
device: str = "cuda" device: str = "cuda"
push_to_hub: bool = True
pretrained_policy_name_or_path: str | None = None
reward_classifier_pretrained_path: str | None = None
# For the reward classifier, to record more positive examples after a success
number_of_steps_after_success: int = 0
@property @property
def gym_kwargs(self) -> dict: def gym_kwargs(self) -> dict:
+88
View File
@@ -1,3 +1,4 @@
import time
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any from typing import Any
@@ -241,3 +242,90 @@ class InterventionActionProcessor:
def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
return features return features
@dataclass
@ProcessorStepRegistry.register("reward_classifier_processor")
class RewardClassifierProcessor:
"""Apply reward classification to image observations.
This processor runs a trained reward classifier on image observations
to predict rewards and success states, potentially terminating episodes
when success is achieved.
"""
pretrained_path: str = None
device: str = "cpu"
success_threshold: float = 0.5
success_reward: float = 1.0
terminate_on_success: bool = True
reward_classifier: Any = None
def __post_init__(self):
"""Initialize the reward classifier after dataclass initialization."""
if self.pretrained_path is not None:
from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
self.reward_classifier = Classifier.from_pretrained(self.pretrained_path)
self.reward_classifier.to(self.device)
self.reward_classifier.eval()
def __call__(self, transition: EnvTransition) -> EnvTransition:
observation = transition.get(TransitionKey.OBSERVATION)
if observation is None or self.reward_classifier is None:
return transition
# Extract images from observation
images = {key: value for key, value in observation.items() if "image" in key}
if not images:
return transition
# Run reward classifier
start_time = time.perf_counter()
with torch.inference_mode():
success = self.reward_classifier.predict_reward(images, threshold=self.success_threshold)
classifier_frequency = 1 / (time.perf_counter() - start_time)
# Calculate reward and termination
reward = transition.get(TransitionKey.REWARD, 0.0)
terminated = transition.get(TransitionKey.DONE, False)
if success == 1.0:
reward = self.success_reward
if self.terminate_on_success:
terminated = True
# Update transition
new_transition = transition.copy()
new_transition[TransitionKey.REWARD] = reward
new_transition[TransitionKey.DONE] = terminated
# Update info with classifier frequency
info = new_transition.get(TransitionKey.INFO, {})
info["reward_classifier_frequency"] = classifier_frequency
new_transition[TransitionKey.INFO] = info
return new_transition
def get_config(self) -> dict[str, Any]:
return {
"device": self.device,
"success_threshold": self.success_threshold,
"success_reward": self.success_reward,
"terminate_on_success": self.terminate_on_success,
}
def state_dict(self) -> dict[str, torch.Tensor]:
return {}
def load_state_dict(self, state: dict[str, torch.Tensor]) -> None:
pass
def reset(self) -> None:
pass
def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
return features
+46 -27
View File
@@ -36,6 +36,7 @@ from lerobot.processor.hil_processor import (
GripperPenaltyProcessor, GripperPenaltyProcessor,
ImageCropResizeProcessor, ImageCropResizeProcessor,
InterventionActionProcessor, InterventionActionProcessor,
RewardClassifierProcessor,
TimeLimitProcessor, TimeLimitProcessor,
) )
from lerobot.processor.pipeline import TransitionKey from lerobot.processor.pipeline import TransitionKey
@@ -308,9 +309,9 @@ def make_robot_env(cfg: EnvConfig) -> tuple[gym.Env, Any]:
# Create base environment # Create base environment
env = RobotEnv( env = RobotEnv(
robot=robot, robot=robot,
use_gripper=cfg.processor.use_gripper, use_gripper=cfg.processor.gripper.use_gripper,
display_cameras=cfg.processor.display_cameras, display_cameras=cfg.processor.observation.display_cameras,
reset_pose=cfg.processor.fixed_reset_joint_positions, reset_pose=cfg.processor.reset.fixed_reset_joint_positions,
) )
return env, teleop_device return env, teleop_device
@@ -330,33 +331,48 @@ def make_processors(env, cfg):
env_pipeline_steps = [ env_pipeline_steps = [
ImageProcessor(), ImageProcessor(),
StateProcessor(), StateProcessor(),
JointVelocityProcessor(dt=1.0 / cfg.fps), JointVelocityProcessor(dt=1.0 / cfg.dataset.fps),
MotorCurrentProcessor(env=env), MotorCurrentProcessor(env=env),
ImageCropResizeProcessor( ImageCropResizeProcessor(
crop_params_dict=cfg.processor.crop_params_dict, resize_size=cfg.processor.resize_size crop_params_dict=cfg.processor.image_preprocessing.crop_params_dict,
resize_size=cfg.processor.image_preprocessing.resize_size,
), ),
TimeLimitProcessor(max_episode_steps=int(cfg.processor.control_time_s * cfg.fps)), TimeLimitProcessor(max_episode_steps=int(cfg.processor.reset.control_time_s * cfg.dataset.fps)),
] ]
if cfg.processor.use_gripper: if cfg.processor.gripper.use_gripper:
env_pipeline_steps.append( env_pipeline_steps.append(
GripperPenaltyProcessor( GripperPenaltyProcessor(
penalty=cfg.processor.gripper_penalty, max_gripper_pos=cfg.processor.max_gripper_pos penalty=cfg.processor.gripper.gripper_penalty,
max_gripper_pos=cfg.processor.inverse_kinematics.max_gripper_pos,
) )
) )
# Add reward classifier processor if configured
if cfg.processor.reward_classifier.pretrained_path is not None:
env_pipeline_steps.append(
RewardClassifierProcessor(
pretrained_path=cfg.processor.reward_classifier.pretrained_path,
device=cfg.device,
success_threshold=cfg.processor.reward_classifier.success_threshold,
success_reward=cfg.processor.reward_classifier.success_reward,
terminate_on_success=cfg.processor.reward_classifier.terminate_on_success,
)
)
env_pipeline_steps.append(DeviceProcessor(device=cfg.device)) env_pipeline_steps.append(DeviceProcessor(device=cfg.device))
env_processor = RobotProcessor(steps=env_pipeline_steps) env_processor = RobotProcessor(steps=env_pipeline_steps)
action_pipeline_steps = [ action_pipeline_steps = [
InterventionActionProcessor( InterventionActionProcessor(
use_gripper=cfg.processor.use_gripper, use_gripper=cfg.processor.gripper.use_gripper,
), ),
InverseKinematicsProcessor( InverseKinematicsProcessor(
urdf_path=cfg.processor.urdf_path, urdf_path=cfg.processor.inverse_kinematics.urdf_path,
target_frame_name=cfg.processor.target_frame_name, target_frame_name=cfg.processor.inverse_kinematics.target_frame_name,
end_effector_step_sizes=cfg.processor.end_effector_step_sizes, end_effector_step_sizes=cfg.processor.inverse_kinematics.end_effector_step_sizes,
end_effector_bounds=cfg.processor.end_effector_bounds, end_effector_bounds=cfg.processor.inverse_kinematics.end_effector_bounds,
max_gripper_pos=cfg.processor.max_gripper_pos, max_gripper_pos=cfg.processor.inverse_kinematics.max_gripper_pos,
), ),
] ]
action_processor = RobotProcessor(steps=action_pipeline_steps) action_processor = RobotProcessor(steps=action_pipeline_steps)
@@ -435,9 +451,9 @@ def step_env_and_process_transition(
def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvConfig): def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvConfig):
dt = 1.0 / cfg.fps dt = 1.0 / cfg.dataset.fps
print(f"Starting control loop at {cfg.fps} FPS") print(f"Starting control loop at {cfg.dataset.fps} FPS")
print("Controls:") print("Controls:")
print("- Use gamepad/teleop device for intervention") print("- Use gamepad/teleop device for intervention")
print("- When not intervening, robot will stay still") print("- When not intervening, robot will stay still")
@@ -460,7 +476,7 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
"next.reward": {"dtype": "float32", "shape": (1,), "names": None}, "next.reward": {"dtype": "float32", "shape": (1,), "names": None},
"next.done": {"dtype": "bool", "shape": (1,), "names": None}, "next.done": {"dtype": "bool", "shape": (1,), "names": None},
} }
if cfg.processor.use_gripper: if cfg.processor.gripper.use_gripper:
features["complementary_info.discrete_penalty"] = { features["complementary_info.discrete_penalty"] = {
"dtype": "float32", "dtype": "float32",
"shape": (1,), "shape": (1,),
@@ -483,9 +499,9 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
# Create dataset # Create dataset
dataset = LeRobotDataset.create( dataset = LeRobotDataset.create(
cfg.repo_id, cfg.dataset.repo_id,
cfg.fps, cfg.dataset.fps,
root=cfg.dataset_root, root=cfg.dataset.dataset_root,
use_videos=True, use_videos=True,
image_writer_threads=4, image_writer_threads=4,
image_writer_processes=0, image_writer_processes=0,
@@ -496,12 +512,12 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
episode_step = 0 episode_step = 0
episode_start_time = time.perf_counter() episode_start_time = time.perf_counter()
while episode_idx < cfg.num_episodes: while episode_idx < cfg.dataset.num_episodes:
step_start_time = time.perf_counter() step_start_time = time.perf_counter()
# Create a neutral action (no movement) # Create a neutral action (no movement)
neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32) neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
if hasattr(env, "use_gripper") and env.use_gripper: if cfg.processor.gripper.use_gripper:
neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay
# Use the new step function # Use the new step function
@@ -524,11 +540,11 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
"next.reward": np.array([transition[TransitionKey.REWARD]], dtype=np.float32), "next.reward": np.array([transition[TransitionKey.REWARD]], dtype=np.float32),
"next.done": np.array([terminated or truncated], dtype=bool), "next.done": np.array([terminated or truncated], dtype=bool),
} }
if cfg.processor.use_gripper: if cfg.processor.gripper.use_gripper:
frame["complementary_info.discrete_penalty"] = np.array( frame["complementary_info.discrete_penalty"] = np.array(
[transition[TransitionKey.COMPLEMENTARY_DATA]["discrete_penalty"]], dtype=np.float32 [transition[TransitionKey.COMPLEMENTARY_DATA]["discrete_penalty"]], dtype=np.float32
) )
dataset.add_frame(frame, task=cfg.task) dataset.add_frame(frame, task=cfg.dataset.task)
episode_step += 1 episode_step += 1
@@ -562,14 +578,17 @@ def control_loop(env, env_processor, action_processor, teleop_device, cfg: EnvCo
# Maintain fps timing # Maintain fps timing
busy_wait(dt - (time.perf_counter() - step_start_time)) busy_wait(dt - (time.perf_counter() - step_start_time))
if cfg.mode == "record" and cfg.push_to_hub: if cfg.mode == "record" and cfg.dataset.push_to_hub:
logging.info("Pushing dataset to hub") logging.info("Pushing dataset to hub")
dataset.push_to_hub() dataset.push_to_hub()
def replay_trajectory(env, action_processor, cfg): def replay_trajectory(env, action_processor, cfg):
dataset = LeRobotDataset( dataset = LeRobotDataset(
cfg.repo_id, root=cfg.dataset_root, episodes=[cfg.episode], download_videos=False cfg.dataset.repo_id,
root=cfg.dataset.dataset_root,
episodes=[cfg.dataset.episode],
download_videos=False,
) )
dataset_actions = dataset.hf_dataset.select_columns(["action"]) dataset_actions = dataset.hf_dataset.select_columns(["action"])
_, info = env.reset() _, info = env.reset()
@@ -581,7 +600,7 @@ def replay_trajectory(env, action_processor, cfg):
) )
transition = action_processor(transition) transition = action_processor(transition)
env.step(transition[TransitionKey.ACTION]) env.step(transition[TransitionKey.ACTION])
busy_wait(1 / cfg.fps - (time.perf_counter() - start_time)) busy_wait(1 / cfg.dataset.fps - (time.perf_counter() - start_time))
@parser.wrap() @parser.wrap()