diff --git a/src/lerobot/configs/recipes/hirobot.yaml b/src/lerobot/configs/recipes/hirobot.yaml
index 8eb21cc3c..3cf5bc573 100644
--- a/src/lerobot/configs/recipes/hirobot.yaml
+++ b/src/lerobot/configs/recipes/hirobot.yaml
@@ -1,14 +1,13 @@
 # Hi-Robot blend — shared between SmolVLA2 (SmolVLM2 backbone) and
-# PI052 (PaliGemma backbone). π0.5-style split:
+# PI052 (PaliGemma backbone).
 #
-#   The action expert is conditioned on (images, state, subtask) only.
-#   Hierarchical context (task + plan + memory) only flows into the
-#   high-level text head.
+#   Trains two things only: subtasks and VQA. Plan and memory are
+#   intentionally left out for now — keeps the prompt short and the
+#   training surface small while the core subtask + action loop is
+#   validated.
 #
-#     high_level_subtask  — predict subtask from (task+plan+memory),
-#                           and the new memory at boundary frames.
+#     high_level_subtask  — predict the subtask from the task.
 #     low_level_execution — flow loss with [images, subtask, state].
-#     plan_generation     — task → plan.
 #     ask_vqa_{top,wrist} — camera-grounded VQA.
 #
 # Each backbone's text tokenizer renders these messages differently
@@ -18,20 +17,13 @@
 blend:
 
   high_level_subtask:
-    weight: 0.50
-    bindings:
-      new_memory: "emitted_at(t, style=memory)"
+    weight: 0.40
     messages:
-      - role: user
-        stream: high_level
-        content: "${task}\nPlan: ${plan}\nMemory: ${memory}"
+      - {role: user, content: "${task}", stream: high_level}
       - {role: assistant, content: "${subtask}", stream: high_level, target: true, if_present: subtask}
-      # Boundary-frame tail: at a subtask transition, also predict
-      # the new memory in the same forward pass.
-      - {role: assistant, content: "${new_memory}", stream: high_level, target: true, if_present: new_memory}
 
   low_level_execution:
-    weight: 0.30
+    weight: 0.40
     messages:
       # π0.5-style action conditioning. The action expert sees only
       # [images, this user turn (= bare subtask), state]. No text-CE
@@ -40,16 +32,8 @@ blend:
       # flow loss fires.
       - {role: user, content: "${subtask}", stream: low_level, if_present: subtask}
 
-  plan_generation:
-    weight: 0.10
-    bindings:
-      current_plan: "active_at(t, style=plan)"
-    messages:
-      - {role: user, content: "${task}", stream: high_level}
-      - {role: assistant, content: "${current_plan}", stream: high_level, target: true, if_present: current_plan}
-
   ask_vqa_top:
-    weight: 0.05
+    weight: 0.10
     bindings:
       vqa_query: "emitted_at(t, style=vqa, role=user, camera=observation.images.front)"
       vqa: "emitted_at(t, style=vqa, role=assistant, camera=observation.images.front)"
@@ -63,7 +47,7 @@ blend:
       - {role: assistant, content: "${vqa}", stream: high_level, target: true, if_present: vqa}
 
   ask_vqa_wrist:
-    weight: 0.05
+    weight: 0.10
     bindings:
       vqa_query: "emitted_at(t, style=vqa, role=user, camera=observation.images.wrist)"
       vqa: "emitted_at(t, style=vqa, role=assistant, camera=observation.images.wrist)"
diff --git a/src/lerobot/policies/smolvla2/inference/runtime.py b/src/lerobot/policies/smolvla2/inference/runtime.py
index 3d76015ab..24b32a940 100644
--- a/src/lerobot/policies/smolvla2/inference/runtime.py
+++ b/src/lerobot/policies/smolvla2/inference/runtime.py
@@ -30,11 +30,8 @@ from .steps import (
     AskVQAFwd,
     DispatchAction,
     DispatchToolCalls,
-    HighLevelSubtaskFwd,
     InferenceStep,
     LowLevelForward,
-    MemoryUpdateFwd,
-    UserInterjectionFwd,
 )
 from .triggers import HzTrigger, TickClock
 
@@ -69,31 +66,24 @@ class SmolVLA2Runtime:
     _stop: bool = field(default=False, init=False)
 
     def __post_init__(self) -> None:
-        # Pipeline order matters. Both ``HighLevelSubtaskFwd`` and
-        # ``LowLevelForward`` are gated on "action queue is empty" so
-        # the slow LLM call (select_message) doesn't starve dispatch.
-        # If LowLevelForward runs first, it refills the queue and the
-        # high-level step never sees ``queue == 0`` afterwards.
+        # VQA-only configuration (current scope). The training recipe
+        # supervises only subtasks + VQA — plan and memory are out for
+        # now — so the runtime drops the high-level subtask /
+        # memory-update / interjection steps. The remaining loop is:
         #
-        # Order is therefore: high-level steps that read state (subtask,
-        # memory, interjection, vqa) → low-level chunk refresh → action
-        # dispatch → tool dispatch. So on an empty-queue tick the
-        # subtask refreshes first, the new subtask string flows into
-        # the next chunk's prompt, and DispatchAction drains.
+        #   AskVQAFwd      → answer camera-grounded questions on stdin
+        #   LowLevelForward → action chunk (conditioned on the task
+        #                     string directly, since no subtask is
+        #                     being generated — see LowLevelForward's
+        #                     ``current_subtask or task`` fallback)
+        #   DispatchAction  → drain the chunk to the robot
+        #   DispatchToolCalls → fire any pending tool calls
+        #
+        # ``HighLevelSubtaskFwd`` / ``MemoryUpdateFwd`` /
+        # ``UserInterjectionFwd`` are still importable from
+        # ``inference.steps`` — re-add them here once plan / memory /
+        # subtask generation is back in scope.
         self.pipeline = [
-            HighLevelSubtaskFwd(
-                trigger=HzTrigger(self.high_level_hz),
-                policy=self.policy,
-                observation_provider=self.observation_provider,
-            ),
-            MemoryUpdateFwd(
-                policy=self.policy,
-                observation_provider=self.observation_provider,
-            ),
-            UserInterjectionFwd(
-                policy=self.policy,
-                observation_provider=self.observation_provider,
-            ),
             AskVQAFwd(
                 policy=self.policy,
                 observation_provider=self.observation_provider,