smolvla2(runtime): interactive task picker + drop action diagnostic

Task picker:
The dataset bootstrap used to silently overwrite args.task with the
canonical training task. Replace that with an interactive picker
(_select_task_interactively) that shows every unique task in
ds_meta.tasks as a numbered menu (canonical task first as default) plus
a 'type a custom task' option. --task on the CLI still skips the
picker, and non-TTY runs fall back to the bootstrap task so scripted
invocations are unchanged.

Action diagnostic removal:
Drop the [act] log block in LowLevelForward.run (|a|_mean / spread /
normalized + unnormalized first/last + state) that was added while
debugging the 'barely moving' issue. Robot motion is now healthy, the
output is noise in steady-state, and it depended on stashing the
postprocessor on runtime.state — also removed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pepijn
2026-05-25 12:59:08 +02:00
parent 6d2b8c80ab
commit e84f97a8c1
2 changed files with 105 additions and 53 deletions
@@ -168,46 +168,6 @@ class LowLevelForward(InferenceStep):
else:
chunk_iter = chunk.unsqueeze(0)
# Diagnostic: show what the action expert actually emitted for
# this chunk. The values here are *normalized* (pre-postprocessor),
# so we expect them roughly in [-1, 1] under QUANTILES; a chunk
# that stays near zero across all 50 steps is the canonical
# "barely moving" signature (model defaults to median pose).
# Also surface a few unnormalized samples by running the
# postprocessor on a copy so we can see the actual joint targets
# the robot will receive.
try:
import torch as _t # noqa: PLC0415
sample = chunk_iter.detach().float().cpu() # (T, D) normalized
mag = sample.abs().mean().item()
spread = (sample.amax(0) - sample.amin(0)).abs().mean().item()
first_norm = [round(float(x), 3) for x in sample[0].tolist()]
last_norm = [round(float(x), 3) for x in sample[-1].tolist()]
postprocessor = state.get("_postprocessor")
first_unnorm = last_unnorm = None
if postprocessor is not None:
try:
first_unnorm_t = postprocessor(sample[:1].clone())
last_unnorm_t = postprocessor(sample[-1:].clone())
if isinstance(first_unnorm_t, _t.Tensor):
first_unnorm = [round(float(x), 2) for x in first_unnorm_t.flatten().tolist()]
if isinstance(last_unnorm_t, _t.Tensor):
last_unnorm = [round(float(x), 2) for x in last_unnorm_t.flatten().tolist()]
except Exception: # noqa: BLE001
pass
state_now = observation.get("observation.state")
state_first = None
if isinstance(state_now, _t.Tensor):
s = state_now.detach().float().cpu().flatten().tolist()
state_first = [round(float(x), 2) for x in s[:6]]
push_log(state, f" [act] T={sample.shape[0]} |a|_mean={mag:.3f} spread={spread:.3f}")
push_log(state, f" [act] norm first={first_norm} last={last_norm}")
if first_unnorm is not None:
push_log(state, f" [act] joint first={first_unnorm} last={last_unnorm} state={state_first}")
except Exception as exc: # noqa: BLE001
logger.debug("act-diag failed: %s", exc)
for step in chunk_iter:
queue.append(step.unsqueeze(0))
state["last_chunk_size"] = int(chunk_iter.shape[0])
+105 -13
View File
@@ -574,6 +574,93 @@ def _bootstrap_state_from_dataset(
return out
def _select_task_interactively(
*,
ds_meta: Any,
bootstrap_task: str | None,
) -> str | None:
"""Ask the operator which task to run at startup.
Behaviour:
* If a dataset is loaded, build a numbered menu of every unique task
string in ``ds_meta.tasks`` (canonical bootstrap task listed first
as the default). Add a ``[c] type a custom task`` option.
* If no dataset is loaded, show a plain ``Enter task:`` prompt.
* Non-TTY runs (scripts, pipes) skip the prompt and return the
bootstrap task so the existing "first stdin line becomes task"
flow in ``_run_repl`` / ``_run_autonomous`` still works.
Returns the chosen task string, or ``None`` when the operator declines
to pick one (Ctrl-D / empty + no default).
"""
options: list[str] = []
seen: set[str] = set()
if bootstrap_task:
options.append(bootstrap_task)
seen.add(bootstrap_task)
if ds_meta is not None and getattr(ds_meta, "tasks", None) is not None:
try:
for t in list(ds_meta.tasks.index):
if isinstance(t, str) and t and t not in seen:
options.append(t)
seen.add(t)
except Exception: # noqa: BLE001 — defensive: tasks shape varies
pass
if not sys.stdin.isatty():
# Scripted / piped run: no interactive prompt; fall back to the
# bootstrap default (may be None — REPL handles that).
return bootstrap_task
print("\n[smolvla2] Select startup task:", flush=True)
if options:
for i, opt in enumerate(options, 1):
marker = " (dataset default)" if opt == bootstrap_task else ""
print(f" [{i}] {opt}{marker}", flush=True)
print(" [c] type a custom task", flush=True)
prompt = "Choice [1]: " if bootstrap_task else "Choice: "
else:
print(" (no tasks available from dataset)", flush=True)
prompt = "Enter task: "
while True:
try:
choice = input(prompt).strip()
except EOFError:
print(flush=True)
return bootstrap_task
# No dataset options at all: the entered line *is* the task.
if not options:
return choice or None
# Empty input: take the default (item 1) when there is one.
if not choice:
return options[0] if bootstrap_task else None
if choice.lower() in ("c", "custom"):
try:
free = input("Enter task: ").strip()
except EOFError:
print(flush=True)
return bootstrap_task
if free:
return free
# Empty free-form input → loop back to the menu.
continue
if choice.isdigit():
idx = int(choice)
if 1 <= idx <= len(options):
return options[idx - 1]
print(
f" invalid choice {choice!r}; pick 1{len(options)} or 'c'.",
flush=True,
)
def _build_robot(
*,
robot_type: str,
@@ -1425,8 +1512,8 @@ def main(argv: list[str] | None = None) -> int:
)
# Bootstrap the canonical task from the dataset whenever one is
# provided, so ``/action`` (no argument) has a sensible task to
# resume. The model is memorised on the exact training wording, so
# provided, so the interactive picker below can offer it as the
# default. The model is memorised on the exact training wording, so
# matching it is what gets recall to fire.
bootstrap_state: dict[str, str] = {}
if args.dataset_repo_id is not None:
@@ -1435,12 +1522,22 @@ def main(argv: list[str] | None = None) -> int:
episode=args.dataset_episode,
start_frame=args.dataset_start_frame,
)
if bootstrap_state.get("task") and not args.task:
args.task = bootstrap_state["task"]
print(
f"[smolvla2] canonical task from dataset: {args.task!r}",
flush=True,
)
# Interactive task picker. Skipped when ``--task`` is already set on
# the CLI (scripted runs and explicit overrides win). When no task
# was passed, prompt the operator: pick from the dataset's tasks or
# type a custom one. Non-TTY runs fall back to the bootstrap task
# silently — the existing "first stdin line becomes task" flow in
# ``_run_repl`` / ``_run_autonomous`` still handles the no-default
# case.
if not args.task:
chosen = _select_task_interactively(
ds_meta=ds_meta,
bootstrap_task=bootstrap_state.get("task"),
)
if chosen:
args.task = chosen
print(f"[smolvla2] task: {args.task!r}", flush=True)
# No startup prompts — the runtime is command-driven. It comes up at
# the command line in ``paused`` mode (robot idle) unless ``--mode``
@@ -1520,11 +1617,6 @@ def main(argv: list[str] | None = None) -> int:
# under-trained checkpoint without recompiling.
runtime.state["text_gen_min_new_tokens"] = int(getattr(args, "text_min_new_tokens", 0) or 0)
runtime.state["text_gen_temperature"] = float(getattr(args, "text_temperature", 0.0) or 0.0)
# Stash the postprocessor so LowLevelForward's action diagnostic
# can show both normalized chunk values AND unnormalized joint
# targets — answers "what is the model emitting + what does the
# robot actually receive" in one log line.
runtime.state["_postprocessor"] = postprocessor
runtime.state["text_gen_top_p"] = float(getattr(args, "text_top_p", 1.0) or 1.0)
# Subtask throttle: HighLevelSubtaskFwd fires only once every N
# action-chunk boundaries. Lets you run N action chunks per LM-head