fix(rl): enhance intervention handling in actor and learner

This commit is contained in:
Khalil Meftah
2026-04-26 23:09:33 +02:00
parent f887ab3f6a
commit ef8bfffbd7
2 changed files with 7 additions and 2 deletions
+6 -1
View File
@@ -338,7 +338,8 @@ def act_with_policy(
# Check for intervention from transition info
intervention_info = new_transition[TransitionKey.INFO]
if intervention_info.get(TeleopEvents.IS_INTERVENTION, False):
is_intervention = bool(intervention_info.get(TeleopEvents.IS_INTERVENTION, False))
if is_intervention:
episode_intervention = True
episode_intervention_steps += 1
@@ -346,6 +347,10 @@ def act_with_policy(
"discrete_penalty": torch.tensor(
[new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)]
),
# Forward the intervention flag so the learner can route this transition
# into the offline replay buffer (see `process_transitions` in learner.py).
# Use the plain string key so the payload survives torch.load(weights_only=True).
TeleopEvents.IS_INTERVENTION.value: is_intervention,
}
# Create transition for learner (convert to old format)
list_transition_to_send_to_learner.append(
+1 -1
View File
@@ -1168,7 +1168,7 @@ def process_transitions(
# Add to offline buffer if it's an intervention
if dataset_repo_id is not None and transition.get("complementary_info", {}).get(
TeleopEvents.IS_INTERVENTION
TeleopEvents.IS_INTERVENTION.value
):
offline_replay_buffer.add(**transition)