fix(examples): wrap all of them into a main function (#2524)

2026-07-12 12:32:02 +00:00 · 2025-11-26 14:28:04 +01:00
parent 87bee86640
commit 17581a9449
24 changed files with 1672 additions and 1532 deletions
@@ -20,6 +20,8 @@ from lerobot.teleoperators.utils import TeleopEvents

 LOG_EVERY = 10
 SEND_EVERY = 10
+MAX_EPISODES = 5
+MAX_STEPS_PER_EPISODE = 20


 def run_learner(
@@ -223,123 +225,123 @@ def make_policy_obs(obs, device: torch.device = "cpu"):
    }


-"""Main function - coordinates actor and learner processes."""
+def main():
+    """Main function - coordinates actor and learner processes."""

-device = "mps"  # or "cuda" or "cpu"
-output_directory = Path("outputs/robot_learning_tutorial/hil_serl")
-output_directory.mkdir(parents=True, exist_ok=True)
+    device = "mps"  # or "cuda" or "cpu"
+    output_directory = Path("outputs/robot_learning_tutorial/hil_serl")
+    output_directory.mkdir(parents=True, exist_ok=True)

-# find ports using lerobot-find-port
-follower_port = ...
-leader_port = ...
+    # find ports using lerobot-find-port
+    follower_port = ...
+    leader_port = ...

-# the robot ids are used the load the right calibration files
-follower_id = ...
-leader_id = ...
+    # the robot ids are used the load the right calibration files
+    follower_id = ...
+    leader_id = ...

-# A pretrained model (to be used in-distribution!)
-reward_classifier_id = "fracapuano/reward_classifier_hil_serl_example"
-reward_classifier = Classifier.from_pretrained(reward_classifier_id)
+    # A pretrained model (to be used in-distribution!)
+    reward_classifier_id = "<user>/reward_classifier_hil_serl_example"
+    reward_classifier = Classifier.from_pretrained(reward_classifier_id)

-reward_classifier.to(device)
-reward_classifier.eval()
+    reward_classifier.to(device)
+    reward_classifier.eval()

-MAX_EPISODES = 5
-MAX_STEPS_PER_EPISODE = 20
+    # Robot and environment configuration
+    robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id)
+    teleop_cfg = SO100LeaderConfig(port=leader_port, id=leader_id)
+    processor_cfg = HILSerlProcessorConfig(control_mode="leader")

-# Robot and environment configuration
-robot_cfg = SO100FollowerConfig(port=follower_port, id=follower_id)
-teleop_cfg = SO100LeaderConfig(port=leader_port, id=leader_id)
-processor_cfg = HILSerlProcessorConfig(control_mode="leader")
+    env_cfg = HILSerlRobotEnvConfig(robot=robot_cfg, teleop=teleop_cfg, processor=processor_cfg)

-env_cfg = HILSerlRobotEnvConfig(robot=robot_cfg, teleop=teleop_cfg, processor=processor_cfg)
+    # Create robot environment
+    env, teleop_device = make_robot_env(env_cfg)

-# Create robot environment
-env, teleop_device = make_robot_env(env_cfg)
+    obs_features = hw_to_dataset_features(env.robot.observation_features, "observation")
+    action_features = hw_to_dataset_features(env.robot.action_features, "action")

-obs_features = hw_to_dataset_features(env.robot.observation_features, "observation")
-action_features = hw_to_dataset_features(env.robot.action_features, "action")
+    # Create SAC policy for action selection
+    policy_cfg = SACConfig(
+        device=device,
+        input_features=obs_features,
+        output_features=action_features,
+    )

-# Create SAC policy for action selection
-policy_cfg = SACConfig(
-    device=device,
-    input_features=obs_features,
-    output_features=action_features,
-)
+    policy_actor = SACPolicy(policy_cfg)
+    policy_learner = SACPolicy(policy_cfg)

-policy_actor = SACPolicy(policy_cfg)
-policy_learner = SACPolicy(policy_cfg)
+    demonstrations_repo_id = "lerobot/example_hil_serl_dataset"
+    offline_dataset = LeRobotDataset(repo_id=demonstrations_repo_id)

-demonstrations_repo_id = "lerobot/example_hil_serl_dataset"
-offline_dataset = LeRobotDataset(repo_id=demonstrations_repo_id)
+    # Online buffer: initialized from scratch
+    online_replay_buffer = ReplayBuffer(device=device, state_keys=list(obs_features.keys()))
+    # Offline buffer: Created from dataset (pre-populated it with demonstrations)
+    offline_replay_buffer = ReplayBuffer.from_lerobot_dataset(
+        lerobot_dataset=offline_dataset, device=device, state_keys=list(obs_features.keys())
+    )

-# Online buffer: initialized from scratch
-online_replay_buffer = ReplayBuffer(device=device, state_keys=list(obs_features.keys()))
-# Offline buffer: Created from dataset (pre-populated it with demonstrations)
-offline_replay_buffer = ReplayBuffer.from_lerobot_dataset(
-    lerobot_dataset=offline_dataset, device=device, state_keys=list(obs_features.keys())
-)
+    # Create communication channels between learner and actor processes
+    transitions_queue = mp.Queue(maxsize=10)
+    parameters_queue = mp.Queue(maxsize=2)
+    shutdown_event = mp.Event()

-# Create communication channels between learner and actor processes
-transitions_queue = mp.Queue(maxsize=10)
-parameters_queue = mp.Queue(maxsize=2)
-shutdown_event = mp.Event()
+    # Signal handler for graceful shutdown
+    def signal_handler(sig):
+        print(f"\nSignal {sig} received, shutting down...")
+        shutdown_event.set()
+
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    # Create processes
+    learner_process = mp.Process(
+        target=run_learner,
+        args=(
+            transitions_queue,
+            parameters_queue,
+            shutdown_event,
+            policy_learner,
+            online_replay_buffer,
+            offline_replay_buffer,
+        ),
+        kwargs={"device": device},  # can run on accelerated hardware for training
+    )
+
+    actor_process = mp.Process(
+        target=run_actor,
+        args=(
+            transitions_queue,
+            parameters_queue,
+            shutdown_event,
+            policy_actor,
+            reward_classifier,
+            env_cfg,
+            output_directory,
+        ),
+        kwargs={"device": "cpu"},  # actor is frozen, can run on CPU or accelerate for inference
+    )
+
+    learner_process.start()
+    actor_process.start()
+
+    try:
+        # Wait for actor to finish (it controls the episode loop)
+        actor_process.join()
+        shutdown_event.set()
+        learner_process.join(timeout=10)
+
+    except KeyboardInterrupt:
+        print("Main process interrupted")
+        shutdown_event.set()
+        actor_process.join(timeout=5)
+        learner_process.join(timeout=10)
+
+    finally:
+        if learner_process.is_alive():
+            learner_process.terminate()
+        if actor_process.is_alive():
+            actor_process.terminate()


-# Signal handler for graceful shutdown
-def signal_handler(sig):
-    print(f"\nSignal {sig} received, shutting down...")
-    shutdown_event.set()
-
-
-signal.signal(signal.SIGINT, signal_handler)
-signal.signal(signal.SIGTERM, signal_handler)
-
-# Create processes
-learner_process = mp.Process(
-    target=run_learner,
-    args=(
-        transitions_queue,
-        parameters_queue,
-        shutdown_event,
-        policy_learner,
-        online_replay_buffer,
-        offline_replay_buffer,
-    ),
-    kwargs={"device": device},  # can run on accelerated hardware for training
-)
-
-actor_process = mp.Process(
-    target=run_actor,
-    args=(
-        transitions_queue,
-        parameters_queue,
-        shutdown_event,
-        policy_actor,
-        reward_classifier,
-        env_cfg,
-        output_directory,
-    ),
-    kwargs={"device": "cpu"},  # actor is frozen, can run on CPU or accelerate for inference
-)
-
-learner_process.start()
-actor_process.start()
-
-try:
-    # Wait for actor to finish (it controls the episode loop)
-    actor_process.join()
-    shutdown_event.set()
-    learner_process.join(timeout=10)
-
-except KeyboardInterrupt:
-    print("Main process interrupted")
-    shutdown_event.set()
-    actor_process.join(timeout=5)
-    learner_process.join(timeout=10)
-
-finally:
-    if learner_process.is_alive():
-        learner_process.terminate()
-    if actor_process.is_alive():
-        actor_process.terminate()
+if __name__ == "__main__":
+    main()
@@ -4,59 +4,64 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.policies.factory import make_policy, make_pre_post_processors
 from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig

-# Device to use for training
-device = "mps"  # or "cuda", or "cpu"

-# Load the dataset used for training
-repo_id = "lerobot/example_hil_serl_dataset"
-dataset = LeRobotDataset(repo_id)
+def main():
+    # Device to use for training
+    device = "mps"  # or "cuda", or "cpu"

-# Configure the policy to extract features from the image frames
-camera_keys = dataset.meta.camera_keys
+    # Load the dataset used for training
+    repo_id = "lerobot/example_hil_serl_dataset"
+    dataset = LeRobotDataset(repo_id)

-config = RewardClassifierConfig(
-    num_cameras=len(camera_keys),
-    device=device,
-    # backbone model to extract features from the image frames
-    model_name="microsoft/resnet-18",
-)
+    # Configure the policy to extract features from the image frames
+    camera_keys = dataset.meta.camera_keys

-# Make policy, preprocessor, and optimizer
-policy = make_policy(config, ds_meta=dataset.meta)
-optimizer = config.get_optimizer_preset().build(policy.parameters())
-preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)
+    config = RewardClassifierConfig(
+        num_cameras=len(camera_keys),
+        device=device,
+        # backbone model to extract features from the image frames
+        model_name="microsoft/resnet-18",
+    )
+
+    # Make policy, preprocessor, and optimizer
+    policy = make_policy(config, ds_meta=dataset.meta)
+    optimizer = config.get_optimizer_preset().build(policy.parameters())
+    preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)
+
+    classifier_id = "<user>/reward_classifier_hil_serl_example"
+
+    # Instantiate a dataloader
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
+
+    # Training loop
+    num_epochs = 5
+    for epoch in range(num_epochs):
+        total_loss = 0
+        total_accuracy = 0
+        for batch in dataloader:
+            # Preprocess the batch and move it to the correct device.
+            batch = preprocessor(batch)
+
+            # Forward pass
+            loss, output_dict = policy.forward(batch)
+
+            # Backward pass and optimization
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            total_loss += loss.item()
+            total_accuracy += output_dict["accuracy"]
+
+        avg_loss = total_loss / len(dataloader)
+        avg_accuracy = total_accuracy / len(dataloader)
+        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%")
+
+    print("Training finished!")
+
+    # You can now save the trained policy.
+    policy.push_to_hub(classifier_id)


-classifier_id = "fracapuano/reward_classifier_hil_serl_example"
-
-# Instantiate a dataloader
-dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
-
-# Training loop
-num_epochs = 5
-for epoch in range(num_epochs):
-    total_loss = 0
-    total_accuracy = 0
-    for batch in dataloader:
-        # Preprocess the batch and move it to the correct device.
-        batch = preprocessor(batch)
-
-        # Forward pass
-        loss, output_dict = policy.forward(batch)
-
-        # Backward pass and optimization
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        total_loss += loss.item()
-        total_accuracy += output_dict["accuracy"]
-
-    avg_loss = total_loss / len(dataloader)
-    avg_accuracy = total_accuracy / len(dataloader)
-    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%")
-
-print("Training finished!")
-
-# You can now save the trained policy.
-policy.push_to_hub(classifier_id)
+if __name__ == "__main__":
+    main()