diff --git a/examples/annotation/run_hf_job.py b/examples/annotation/run_hf_job.py
index 49af5da16..913815153 100644
--- a/examples/annotation/run_hf_job.py
+++ b/examples/annotation/run_hf_job.py
@@ -23,18 +23,6 @@ token = os.environ.get("HF_TOKEN") or get_token()
 if not token:
     raise RuntimeError("No HF token. Run `huggingface-cli login` or `export HF_TOKEN=hf_...`")
 
-# --- Diversity knobs (Pi0.7-style prompt expansion) -----------------------
-# Bumped roughly 3x across the board to fight memorization on small datasets.
-# A single dataset trained for many epochs with deterministic atom wording
-# converges to perfect recall on training prompts but produces JSON-token
-# garbage at inference for any wording that drifts slightly. More atom
-# variants per episode + higher sampling temperature widens the training
-# distribution so the model has to actually use its language head, not
-# just memorize.
-#
-# Pushes to a *new* hub repo (``_tool3``) so the previous annotation pass
-# (``_tool2``) stays intact — re-train from scratch on the new dataset and
-# compare loss-curve shapes to verify the diversity bump is doing something.
 CMD = (
     "apt-get update -qq && apt-get install -y -qq git ffmpeg && "
     "pip install --no-deps "
diff --git a/src/lerobot/configs/recipes/pi052_hirobot.yaml b/src/lerobot/configs/recipes/pi052_hirobot.yaml
index 40a20387d..c2f80da71 100644
--- a/src/lerobot/configs/recipes/pi052_hirobot.yaml
+++ b/src/lerobot/configs/recipes/pi052_hirobot.yaml
@@ -1,51 +1,13 @@
-# π0.5 v2 (pi052) — Hi-Robot / MEM / ECoT blend.
+# π0.5 v2 (pi052) Hi-Robot blend.
 #
-# Architecturally mirrors ``smolvla2_hirobot.yaml`` — same two
-# flavors, same sub-recipes — but the rendered messages are fed
-# to PaliGemma (PaliGemma is not chat-pretrained, so the
-# ``PI052TextTokenizerStep`` concatenates them as ``Role: content``
-# plain text rather than calling ``apply_chat_template``).
-#
-# Two flavors
-# -----------
-#
-#   Flavor 1 — ``action_execution`` (~60% weight)
-#     The main always-on recipe. Fuses all available context
-#     (task + plan + memory) into a unified user prompt, and
-#     uses the current subtask as the assistant target. This
-#     single recipe supervises *both*:
-#       * subtask prediction (text CE on the assistant span,
-#         lm_head), and
-#       * action chunks (flow MSE on the action expert via
-#         ``stream: low_level, target: true``, plus the FAST
-#         CE on the action tokens when enabled).
-#     Pi 0.7 §V.A — subtask in the prompt + flow on actions.
-#
-#   Flavor 2 — event-driven text-only recipes
-#     ``ask_vqa_*``. Each handles a specific high-level event
-#     with a TEXT output. ``if_present`` guards keep them from
-#     firing on frames without the relevant annotation.
-#
-# Memory updates are folded INTO ``action_execution`` as a
-# conditional second target gated on boundary frames — see
-# ``smolvla2_hirobot.yaml`` for the rationale. The
-# ``user_interjection_response`` recipe was dropped — the
-# current datasets don't include interjection / say() annotations.
+# Same shape as ``smolvla2_hirobot.yaml`` — see that file for the
+# flavor breakdown. The only difference here is the backbone:
+# PaliGemma isn't chat-pretrained, so ``PI052TextTokenizerStep``
+# concatenates messages as ``Role: content`` plain text instead
+# of calling ``apply_chat_template``.
 
 blend:
 
-  # ----------------------------------------------------------
-  # FLAVOR 1: action_execution (main path)
-  #
-  # Bundles memory updates inline. On most frames the binding
-  # ``new_memory: emitted_at(t, style=memory)`` returns None and
-  # only the subtask is supervised. On *boundary* frames (the
-  # exact timestamp a new memory was annotated — i.e. when a
-  # subtask just completed) the binding fires and the recipe
-  # supervises the new memory as a follow-up assistant turn,
-  # with a "Completed subtask: …" user message in between to
-  # separate the two outputs in the rendered prefix.
-  # ----------------------------------------------------------
   action_execution:
     weight: 0.85
     bindings:
@@ -55,17 +17,10 @@ blend:
         stream: high_level
         content: "${task}\nPlan: ${plan}\nMemory: ${memory}"
       - {role: assistant, content: "${subtask}", stream: low_level, target: true, if_present: subtask}
-      # Memory-update tail — only renders at boundary frames where
-      # ``new_memory`` fires. The new memory is appended as a second
-      # assistant turn right after the subtask, with no intervening
-      # user filler: at a subtask boundary the model emits the new
-      # subtask AND the updated memory in one forward pass.
+      # Boundary-frame tail: at a subtask transition, predict the
+      # new memory as a second assistant turn (same forward pass).
       - {role: assistant, content: "${new_memory}", stream: high_level, target: true, if_present: new_memory}
 
-  # ----------------------------------------------------------
-  # FLAVOR 2: event-driven text-only paths
-  # ----------------------------------------------------------
-
   ask_vqa_top:
     weight: 0.075
     bindings:
diff --git a/src/lerobot/configs/recipes/smolvla2_hirobot.yaml b/src/lerobot/configs/recipes/smolvla2_hirobot.yaml
index d96bd168d..8579d9622 100644
--- a/src/lerobot/configs/recipes/smolvla2_hirobot.yaml
+++ b/src/lerobot/configs/recipes/smolvla2_hirobot.yaml
@@ -1,68 +1,13 @@
-# SmolVLA2 canonical training recipe — Hi Robot / MEM / ECoT blend.
+# SmolVLA2 Hi-Robot blend — two flavors:
 #
-# Inspired by Pi 0.7 §V (Diversifying the Prompt) and Pi 0.5's
-# hierarchical subtask training. The blend has **two flavors**:
-#
-#   Flavor 1 — ``action_execution`` (~60% weight)
-#     The main always-on recipe. Fuses all available context
-#     (task + plan + memory) into a unified user prompt, and
-#     uses the current subtask as the assistant target. This
-#     single recipe supervises *both*:
-#       * subtask prediction (text CE on the assistant span,
-#         lm_head), and
-#       * action chunks (flow MSE on the action expert via
-#         ``stream: low_level, target: true``, plus the FAST
-#         CE on the action tokens when enabled).
-#     At inference, the same prompt structure is used:
-#       * the high-level loop calls ``select_message`` with the
-#         user prompt only → generates the next subtask.
-#       * the low-level loop calls ``predict_action_chunk`` with
-#         the user prompt + the generated subtask as the
-#         assistant turn → generates the action chunk.
-#     Replaces what used to be three separate recipes
-#     (``high_level_subtask`` + ``low_level_execution`` + the
-#     implicit subtask-in-prompt context) in earlier drafts.
-#     Pi 0.7's §V.A "Subtask instructions" pattern.
-#
-#   Flavor 2 — event-driven text-only recipes
-#     Each handles a specific high-level event with a TEXT
-#     output (no action supervision). They fire when the
-#     binding for the event resolves to non-None:
-#       * ``ask_vqa_top`` / ``ask_vqa_wrist``: answer a
-#         camera-grounded visual question.
-#     All use ``stream: high_level`` (no flow loss) and rely on
-#     ``if_present`` guards so they only fire on frames where
-#     the relevant event annotation is present.
-#
-#     ``memory_update`` is folded into Flavor 1 (gated on the
-#     ``new_memory`` binding at boundary frames).
-#     ``user_interjection_response`` was dropped — the current
-#     datasets don't include interjection / say() annotations.
-#
-# How the chat tokenizer interprets the flavor split
-# ---------------------------------------------------
-#   * predict_actions = bool(targets_by_stream.get("low_level"))
-#     → True only for Flavor 1 (action_execution).
-#   * text_labels supervises whatever assistant turns are marked
-#     target=true. For action_execution, this is the subtask
-#     string. For Flavor 2, it's the corresponding text output.
+#   1. action_execution  — fused (task + plan + memory) prompt;
+#      supervises the current subtask (low_level: flow + text CE)
+#      and, at memory-boundary frames, the new memory too.
+#   2. ask_vqa_{top,wrist} — text-only VQA on a camera image,
+#      gated by ``if_present`` so they only fire on annotated frames.
 
 blend:
 
-  # ----------------------------------------------------------
-  # FLAVOR 1: action_execution (main path)
-  #
-  # Bundles memory updates inline. On most frames the binding
-  # ``new_memory: emitted_at(t, style=memory)`` returns None and
-  # only the subtask is supervised. On *boundary* frames (the
-  # exact timestamp a new memory was annotated — i.e. when a
-  # subtask just completed) the binding fires and the recipe
-  # supervises the new memory as a follow-up assistant turn,
-  # with a "Completed subtask: …" user message in between to
-  # separate the two outputs in the chat sequence. Mirrors the
-  # behaviour of the old standalone ``memory_update`` recipe
-  # but keeps everything inside the unified action_execution.
-  # ----------------------------------------------------------
   action_execution:
     weight: 0.85
     bindings:
@@ -72,17 +17,10 @@ blend:
         stream: high_level
         content: "${task}\nPlan: ${plan}\nMemory: ${memory}"
       - {role: assistant, content: "${subtask}", stream: low_level, target: true, if_present: subtask}
-      # Memory-update tail — only renders at boundary frames where
-      # ``new_memory`` fires. The new memory is appended as a second
-      # assistant turn right after the subtask, with no intervening
-      # user filler: at a subtask boundary the model emits the new
-      # subtask AND the updated memory in one forward pass.
+      # Boundary-frame tail: at a subtask transition, predict the
+      # new memory as a second assistant turn (same forward pass).
       - {role: assistant, content: "${new_memory}", stream: high_level, target: true, if_present: new_memory}
 
-  # ----------------------------------------------------------
-  # FLAVOR 2: event-driven text-only paths
-  # ----------------------------------------------------------
-
   ask_vqa_top:
     weight: 0.075
     bindings: