feat(smolvla2-runtime): 'rephrase:' prefix to swap task string in place

Adds a third stdin channel alongside 'task:' and bare interjections: rephrase: <text> Swaps state['task'] with the new string while preserving plan/memory/ subtask. Lets the operator probe how robust the model is to wording variations of the same task — the trained augmentation provided n_task_rephrasings≈30 task wordings per dataset task, and this is the direct way to exercise that distribution at inference without generating a fresh plan via user_interjection_response. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-25 10:46:01 +00:00 · 2026-05-12 17:26:59 +02:00
parent d528078aca
commit c98c695127
1 changed files with 25 additions and 3 deletions
@@ -785,17 +785,32 @@ def _run_autonomous(
            # Typing a rephrasing of the current task as an
            # interjection is the trained way to redirect without
            # resetting the high-level plan from scratch.
            # ``task: <text>``     — full task switch, clears plan/memory/subtask
            # ``rephrase: <text>`` — swap the task string in place,
            #                       keep plan/memory/subtask. Tests
            #                       prompt robustness from the
            #                       n_task_rephrasings training
            #                       augmentation: the model should
            #                       behave the same on equivalent
            #                       phrasings of the same task.
            # bare line ending in ``?``  — VQA
            # bare line                  — interjection
            if lower.startswith("task:"):
                new_task = line[5:].strip()
                if new_task:
                    runtime.set_task(new_task)
                    # Clear stale plan/memory/subtask so the next
                    # high-level pass regenerates from the new task
                    # rather than carrying over context from the old.
                    runtime.state["current_plan"] = None
                    runtime.state["current_memory"] = None
                    runtime.state["current_subtask"] = None
                continue
            if lower.startswith("rephrase:"):
                rephrased = line[len("rephrase:"):].strip()
                if rephrased:
                    runtime.state["task"] = rephrased
                    runtime.state.setdefault("log_lines", []).append(
                        f"Task rephrased: {rephrased}  (plan/memory preserved)"
                    )
                continue
            if not runtime.state.get("task"):
                runtime.set_task(line)
                continue
@@ -1110,6 +1125,13 @@ def _run_repl(runtime: Any, *, initial_task: str | None, max_ticks: int | None)
                    runtime.state["current_plan"] = None
                    runtime.state["current_memory"] = None
                    runtime.state["current_subtask"] = None
            elif lower.startswith("rephrase:"):
                rephrased = line[len("rephrase:"):].strip()
                if rephrased:
                    runtime.state["task"] = rephrased
                    runtime.state.setdefault("log_lines", []).append(
                        f"Task rephrased: {rephrased}  (plan/memory preserved)"
                    )
            elif not runtime.state.get("task"):
                runtime.set_task(line)
            elif lower.endswith("?"):