From ef8bfffbd72e9d0951de576553f89c7c281315de Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Sun, 26 Apr 2026 23:09:33 +0200 Subject: [PATCH] fix(rl): enhance intervention handling in actor and learner --- src/lerobot/rl/actor.py | 7 ++++++- src/lerobot/rl/learner.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py index 2a274da0b..d598bf015 100644 --- a/src/lerobot/rl/actor.py +++ b/src/lerobot/rl/actor.py @@ -338,7 +338,8 @@ def act_with_policy( # Check for intervention from transition info intervention_info = new_transition[TransitionKey.INFO] - if intervention_info.get(TeleopEvents.IS_INTERVENTION, False): + is_intervention = bool(intervention_info.get(TeleopEvents.IS_INTERVENTION, False)) + if is_intervention: episode_intervention = True episode_intervention_steps += 1 @@ -346,6 +347,10 @@ def act_with_policy( "discrete_penalty": torch.tensor( [new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)] ), + # Forward the intervention flag so the learner can route this transition + # into the offline replay buffer (see `process_transitions` in learner.py). + # Use the plain string key so the payload survives torch.load(weights_only=True). + TeleopEvents.IS_INTERVENTION.value: is_intervention, } # Create transition for learner (convert to old format) list_transition_to_send_to_learner.append( diff --git a/src/lerobot/rl/learner.py b/src/lerobot/rl/learner.py index af910d314..452116697 100644 --- a/src/lerobot/rl/learner.py +++ b/src/lerobot/rl/learner.py @@ -1168,7 +1168,7 @@ def process_transitions( # Add to offline buffer if it's an intervention if dataset_repo_id is not None and transition.get("complementary_info", {}).get( - TeleopEvents.IS_INTERVENTION + TeleopEvents.IS_INTERVENTION.value ): offline_replay_buffer.add(**transition)