diff --git a/src/lerobot/configs/recipes/hirobot.yaml b/src/lerobot/configs/recipes/hirobot.yaml
index 3cf5bc573..3ee88690a 100644
--- a/src/lerobot/configs/recipes/hirobot.yaml
+++ b/src/lerobot/configs/recipes/hirobot.yaml
@@ -3,11 +3,11 @@
 #
 #   Trains two things only: subtasks and VQA. Plan and memory are
 #   intentionally left out for now — keeps the prompt short and the
-#   training surface small while the core subtask + action loop is
-#   validated.
+#   training surface small while the core action loop is validated.
 #
-#     high_level_subtask  — predict the subtask from the task.
-#     low_level_execution — flow loss with [images, subtask, state].
+#     high_level_subtask  — predict the subtask from the task (text
+#                           head only; not on the inference path yet).
+#     low_level_execution — flow loss with [images, task, state].
 #     ask_vqa_{top,wrist} — camera-grounded VQA.
 #
 # Each backbone's text tokenizer renders these messages differently
@@ -25,12 +25,15 @@ blend:
   low_level_execution:
     weight: 0.40
     messages:
-      # π0.5-style action conditioning. The action expert sees only
-      # [images, this user turn (= bare subtask), state]. No text-CE
-      # target — subtask prediction is owned by ``high_level_subtask``.
+      # The action expert is conditioned on the TASK (not the subtask).
+      # The task is always available at inference with no high-level
+      # generation loop, so this removes the train/inference mismatch
+      # that a subtask-conditioned action head would have while there
+      # is no reliable runtime subtask source. ``high_level_subtask``
+      # still trains the text head to predict subtasks for later use.
       # ``stream: low_level`` flips ``predict_actions=True`` so the
-      # flow loss fires.
-      - {role: user, content: "${subtask}", stream: low_level, if_present: subtask}
+      # flow loss fires; no text-CE target here.
+      - {role: user, content: "${task}", stream: low_level}
 
   ask_vqa_top:
     weight: 0.10
diff --git a/src/lerobot/policies/smolvla2/inference/steps.py b/src/lerobot/policies/smolvla2/inference/steps.py
index 3bd34074d..b255e7417 100644
--- a/src/lerobot/policies/smolvla2/inference/steps.py
+++ b/src/lerobot/policies/smolvla2/inference/steps.py
@@ -111,15 +111,12 @@ class LowLevelForward(InferenceStep):
         if observation is None:
             return None
 
-        # π0.5-style: the action expert is conditioned on just the
-        # subtask (+ images + state). No task / plan / memory in the
-        # low-level prompt — those are only used by the high-level
-        # loop to *generate* the subtask. Matches the training-time
-        # ``low_level_execution`` recipe shape (single user turn,
-        # no assistant target since text-CE is owned by the
-        # high-level recipe).
-        subtask = state.get("current_subtask") or state.get("task") or ""
-        ctx = [{"role": "user", "content": subtask}]
+        # The action expert is conditioned on the TASK string — the
+        # ``low_level_execution`` recipe renders ``user(${task})``.
+        # The task is stable for the whole episode and always present,
+        # so there is no train/inference mismatch and no dependency on
+        # a (currently unreliable) high-level subtask generator.
+        ctx = [{"role": "user", "content": state.get("task") or ""}]
         # ``add_generation_prompt=False`` to match the training-time
         # prefix shape: at training the action expert sees the rendered
         # user turn ending at ``<|im_end|>`` (no trailing