From 4b779b1e99ff5426204a7d203fbf988edc3e4e2f Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Mon, 22 Jun 2026 14:39:45 +0200 Subject: [PATCH] feat(recap): add advantage conditioning recipe YAMLs --- .../configs/recipes/recap_advantage.yaml | 30 ++++ .../recipes/recap_advantage_blend.yaml | 41 +++++ tests/configs/test_recap_recipe.py | 145 ++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 src/lerobot/configs/recipes/recap_advantage.yaml create mode 100644 src/lerobot/configs/recipes/recap_advantage_blend.yaml create mode 100644 tests/configs/test_recap_recipe.py diff --git a/src/lerobot/configs/recipes/recap_advantage.yaml b/src/lerobot/configs/recipes/recap_advantage.yaml new file mode 100644 index 000000000..de4558bf1 --- /dev/null +++ b/src/lerobot/configs/recipes/recap_advantage.yaml @@ -0,0 +1,30 @@ +# RECAP advantage-conditioned recipe. +# +# Composes task + advantage indicator into the prompt for conditional SFT. +# The advantage binding resolves to "positive" or "negative" from the +# language_persistent column (written by lerobot-annotate --advantage). +# When advantage is absent (30% dropout), the advantage turn is skipped +# entirely via if_present, training the unconditional branch for CFG. +# +# This recipe is policy-agnostic: any VLA that consumes chat-style messages +# can use it. Override bindings or add blend components for task-specific needs. +# +# Paper: pi*0.6, Section IV-B (conditional policy training with I_t). + +bindings: + advantage: "active_at(t, style=advantage)" + +messages: + - role: user + content: "${task}" + stream: high_level + + - role: user + content: "Advantage: ${advantage}" + stream: high_level + if_present: advantage + + - role: assistant + content: "${subtask}" + stream: low_level + target: true diff --git a/src/lerobot/configs/recipes/recap_advantage_blend.yaml b/src/lerobot/configs/recipes/recap_advantage_blend.yaml new file mode 100644 index 000000000..bb32e7487 --- /dev/null +++ b/src/lerobot/configs/recipes/recap_advantage_blend.yaml @@ -0,0 +1,41 @@ +# RECAP full recipe with advantage conditioning and subtask blending. +# +# Blend of two training modes: +# 1. advantage_conditioned (70%): Task + advantage indicator → action +# 2. unconditional (30%): Task only → action (no advantage, trains CFG baseline) +# +# This achieves the same effect as per-frame dropout in the annotation module +# but at the recipe level, giving explicit control over the conditioning ratio. +# Use this instead of annotation-level dropout if you want a fixed split. +# +# Paper: pi*0.6, Appendix E (classifier-free guidance requires both branches). + +blend: + advantage_conditioned: + weight: 0.7 + messages: + - role: user + content: "${task}\nAdvantage: ${advantage}" + stream: high_level + if_present: advantage + + - role: user + content: "${task}" + stream: high_level + + - role: assistant + content: "${subtask}" + stream: low_level + target: true + + unconditional: + weight: 0.3 + messages: + - role: user + content: "${task}" + stream: high_level + + - role: assistant + content: "${subtask}" + stream: low_level + target: true diff --git a/tests/configs/test_recap_recipe.py b/tests/configs/test_recap_recipe.py new file mode 100644 index 000000000..99828d606 --- /dev/null +++ b/tests/configs/test_recap_recipe.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python + +# Copyright 2026 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for RECAP advantage conditioning recipes.""" + +from __future__ import annotations + +from pathlib import Path + +from lerobot.configs.recipe import load_recipe +from lerobot.datasets.language_render import render_sample + +RECIPES_DIR = Path(__file__).resolve().parents[2] / "src" / "lerobot" / "configs" / "recipes" + + +def _persistent_rows(advantage: str | None = None): + """Build minimal persistent rows with optional advantage.""" + rows = [ + { + "role": "user", + "content": "pick up the cup", + "style": "task_aug", + "timestamp": 0.0, + "camera": None, + "tool_calls": None, + }, + { + "role": "assistant", + "content": "reaching for the cup", + "style": "subtask", + "timestamp": 0.0, + "camera": None, + "tool_calls": None, + }, + ] + if advantage is not None: + rows.append( + { + "role": "user", + "content": advantage, + "style": "advantage", + "timestamp": 0.0, + "camera": None, + "tool_calls": None, + } + ) + return rows + + +def test_recap_advantage_recipe_loads(): + """The recap_advantage.yaml recipe loads without errors.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage.yaml") + assert recipe.messages is not None + assert len(recipe.messages) == 3 + assert recipe.bindings == {"advantage": "active_at(t, style=advantage)"} + + +def test_advantage_present_renders_indicator(): + """When advantage annotation exists, the prompt includes 'Advantage: positive'.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage.yaml") + result = render_sample( + recipe=recipe, + persistent=_persistent_rows(advantage="positive"), + events=[], + t=0.5, + sample_idx=0, + task="pick up the cup", + ) + assert result is not None + messages = result["messages"] + assert len(messages) == 3 + assert messages[1]["content"] == "Advantage: positive" + + +def test_advantage_negative_renders_indicator(): + """Negative advantage also appears in the prompt.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage.yaml") + result = render_sample( + recipe=recipe, + persistent=_persistent_rows(advantage="negative"), + events=[], + t=0.5, + sample_idx=0, + task="pick up the cup", + ) + assert result is not None + messages = result["messages"] + assert messages[1]["content"] == "Advantage: negative" + + +def test_advantage_absent_skips_turn(): + """When no advantage annotation exists (dropout), the advantage turn is skipped.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage.yaml") + result = render_sample( + recipe=recipe, + persistent=_persistent_rows(advantage=None), + events=[], + t=0.5, + sample_idx=0, + task="pick up the cup", + ) + assert result is not None + messages = result["messages"] + # Only task + subtask, no advantage turn + assert len(messages) == 2 + assert messages[0]["content"] == "pick up the cup" + assert messages[1]["content"] == "reaching for the cup" + + +def test_advantage_absent_still_has_target(): + """Even without advantage, the target message (subtask) is preserved.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage.yaml") + result = render_sample( + recipe=recipe, + persistent=_persistent_rows(advantage=None), + events=[], + t=0.5, + sample_idx=0, + task="pick up the cup", + ) + assert result is not None + assert result["target_message_indices"] == [1] + + +def test_blend_recipe_loads(): + """The blend recipe has two components with correct weights.""" + recipe = load_recipe(RECIPES_DIR / "recap_advantage_blend.yaml") + assert recipe.blend is not None + assert "advantage_conditioned" in recipe.blend + assert "unconditional" in recipe.blend + assert recipe.blend["advantage_conditioned"].weight == 0.7 + assert recipe.blend["unconditional"].weight == 0.3