diff --git a/src/lerobot/configs/recipes/hirobot.yaml b/src/lerobot/configs/recipes/hirobot.yaml
index 3ee88690a..2fae907e1 100644
--- a/src/lerobot/configs/recipes/hirobot.yaml
+++ b/src/lerobot/configs/recipes/hirobot.yaml
@@ -3,11 +3,11 @@
 #
 #   Trains two things only: subtasks and VQA. Plan and memory are
 #   intentionally left out for now — keeps the prompt short and the
-#   training surface small while the core action loop is validated.
+#   training surface small while the core subtask + action loop is
+#   validated.
 #
-#     high_level_subtask  — predict the subtask from the task (text
-#                           head only; not on the inference path yet).
-#     low_level_execution — flow loss with [images, task, state].
+#     high_level_subtask  — predict the subtask from the task.
+#     low_level_execution — flow loss with [images, subtask, state].
 #     ask_vqa_{top,wrist} — camera-grounded VQA.
 #
 # Each backbone's text tokenizer renders these messages differently
@@ -25,15 +25,14 @@ blend:
   low_level_execution:
     weight: 0.40
     messages:
-      # The action expert is conditioned on the TASK (not the subtask).
-      # The task is always available at inference with no high-level
-      # generation loop, so this removes the train/inference mismatch
-      # that a subtask-conditioned action head would have while there
-      # is no reliable runtime subtask source. ``high_level_subtask``
-      # still trains the text head to predict subtasks for later use.
-      # ``stream: low_level`` flips ``predict_actions=True`` so the
-      # flow loss fires; no text-CE target here.
-      - {role: user, content: "${task}", stream: low_level}
+      # The action expert is conditioned on the SUBTASK — at inference
+      # the high-level loop (``HighLevelSubtaskFwd``) generates the
+      # subtask via the LM head and feeds it here. The action expert's
+      # prefix is [images, subtask, state]. ``stream: low_level`` flips
+      # ``predict_actions=True`` so the flow loss fires; no text-CE
+      # target here (subtask prediction is owned by
+      # ``high_level_subtask``).
+      - {role: user, content: "${subtask}", stream: low_level, if_present: subtask}
 
   ask_vqa_top:
     weight: 0.10
diff --git a/src/lerobot/policies/smolvla2/inference/runtime.py b/src/lerobot/policies/smolvla2/inference/runtime.py
index 24b32a940..afc7cca18 100644
--- a/src/lerobot/policies/smolvla2/inference/runtime.py
+++ b/src/lerobot/policies/smolvla2/inference/runtime.py
@@ -30,6 +30,7 @@ from .steps import (
     AskVQAFwd,
     DispatchAction,
     DispatchToolCalls,
+    HighLevelSubtaskFwd,
     InferenceStep,
     LowLevelForward,
 )
@@ -66,24 +67,29 @@ class SmolVLA2Runtime:
     _stop: bool = field(default=False, init=False)
 
     def __post_init__(self) -> None:
-        # VQA-only configuration (current scope). The training recipe
-        # supervises only subtasks + VQA — plan and memory are out for
-        # now — so the runtime drops the high-level subtask /
-        # memory-update / interjection steps. The remaining loop is:
+        # Subtask + VQA configuration (current scope — plan and memory
+        # are not trained yet). Pipeline:
         #
-        #   AskVQAFwd      → answer camera-grounded questions on stdin
-        #   LowLevelForward → action chunk (conditioned on the task
-        #                     string directly, since no subtask is
-        #                     being generated — see LowLevelForward's
-        #                     ``current_subtask or task`` fallback)
-        #   DispatchAction  → drain the chunk to the robot
-        #   DispatchToolCalls → fire any pending tool calls
+        #   HighLevelSubtaskFwd → generate the next subtask via the LM
+        #                         head at ~``high_level_hz``; writes
+        #                         ``current_subtask``
+        #   AskVQAFwd           → answer camera-grounded stdin questions
+        #   LowLevelForward     → action chunk conditioned on the
+        #                         generated ``current_subtask``
+        #   DispatchAction      → drain the chunk to the robot
+        #   DispatchToolCalls   → fire any pending tool calls
         #
-        # ``HighLevelSubtaskFwd`` / ``MemoryUpdateFwd`` /
-        # ``UserInterjectionFwd`` are still importable from
-        # ``inference.steps`` — re-add them here once plan / memory /
-        # subtask generation is back in scope.
+        # Order matters: ``HighLevelSubtaskFwd`` and ``LowLevelForward``
+        # are both gated on "action queue empty", so the subtask must
+        # refresh *before* the chunk that consumes it. ``MemoryUpdateFwd``
+        # / ``UserInterjectionFwd`` are still importable from
+        # ``inference.steps`` — re-add once plan / memory are in scope.
         self.pipeline = [
+            HighLevelSubtaskFwd(
+                trigger=HzTrigger(self.high_level_hz),
+                policy=self.policy,
+                observation_provider=self.observation_provider,
+            ),
             AskVQAFwd(
                 policy=self.policy,
                 observation_provider=self.observation_provider,
diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py
index b255e7417..c9b84b167 100644
--- a/src/lerobot/policies/smolvla2/inference/steps.py
+++ b/src/lerobot/policies/smolvla2/inference/steps.py
@@ -111,12 +111,14 @@ class LowLevelForward(InferenceStep):
         if observation is None:
             return None
 
-        # The action expert is conditioned on the TASK string — the
-        # ``low_level_execution`` recipe renders ``user(${task})``.
-        # The task is stable for the whole episode and always present,
-        # so there is no train/inference mismatch and no dependency on
-        # a (currently unreliable) high-level subtask generator.
-        ctx = [{"role": "user", "content": state.get("task") or ""}]
+        # The action expert is conditioned on the SUBTASK generated by
+        # the high-level loop (``HighLevelSubtaskFwd`` runs earlier in
+        # the pipeline and writes ``current_subtask``). Matches the
+        # training-time ``low_level_execution`` recipe — ``user(${subtask})``.
+        # Falls back to the task string only on the very first frame,
+        # before the high-level loop has produced a subtask.
+        subtask = state.get("current_subtask") or state.get("task") or ""
+        ctx = [{"role": "user", "content": subtask}]
         # ``add_generation_prompt=False`` to match the training-time
         # prefix shape: at training the action expert sees the rendered
         # user turn ending at ``<|im_end|>`` (no trailing
@@ -744,11 +746,12 @@ def _hirobot_user_head(state: dict[str, Any]) -> str:
 
 
 def _msgs_for_subtask(state: dict[str, Any]) -> list[dict[str, Any]]:
-    """``high_level_subtask`` recipe layout — predict the current subtask
-    from (task + plan + memory). Even when plan / memory aren't set yet
-    the labels render as bare ``Plan: `` / ``Memory: `` to match training.
+    """``high_level_subtask`` recipe layout — predict the subtask from the
+    task. The v-current recipe's user turn is just ``${task}`` (plan and
+    memory are not trained), so the inference prompt is the bare task —
+    no ``Plan: `` / ``Memory: `` lines.
     """
-    return [{"role": "user", "content": _hirobot_user_head(state)}]
+    return [{"role": "user", "content": state.get("task") or ""}]
 
 
 def _msgs_for_memory(state: dict[str, Any]) -> list[dict[str, Any]]: