fix(rl): enhance intervention handling in actor and learner

2026-05-11 14:49:43 +00:00 · 2026-04-26 23:09:33 +02:00
parent f887ab3f6a
commit ef8bfffbd7
2 changed files with 7 additions and 2 deletions
@@ -338,7 +338,8 @@ def act_with_policy(

        # Check for intervention from transition info
        intervention_info = new_transition[TransitionKey.INFO]
-        if intervention_info.get(TeleopEvents.IS_INTERVENTION, False):
+        is_intervention = bool(intervention_info.get(TeleopEvents.IS_INTERVENTION, False))
+        if is_intervention:
            episode_intervention = True
            episode_intervention_steps += 1

@@ -346,6 +347,10 @@ def act_with_policy(
            "discrete_penalty": torch.tensor(
                [new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)]
            ),
+            # Forward the intervention flag so the learner can route this transition
+            # into the offline replay buffer (see `process_transitions` in learner.py).
+            # Use the plain string key so the payload survives torch.load(weights_only=True).
+            TeleopEvents.IS_INTERVENTION.value: is_intervention,
        }
        # Create transition for learner (convert to old format)
        list_transition_to_send_to_learner.append(
@@ -1168,7 +1168,7 @@ def process_transitions(

            # Add to offline buffer if it's an intervention
            if dataset_repo_id is not None and transition.get("complementary_info", {}).get(
-                TeleopEvents.IS_INTERVENTION
+                TeleopEvents.IS_INTERVENTION.value
            ):
                offline_replay_buffer.add(**transition)