mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-26 05:59:52 +00:00
smolvla2(runtime): interactive task picker + drop action diagnostic
Task picker: The dataset bootstrap used to silently overwrite args.task with the canonical training task. Replace that with an interactive picker (_select_task_interactively) that shows every unique task in ds_meta.tasks as a numbered menu (canonical task first as default) plus a 'type a custom task' option. --task on the CLI still skips the picker, and non-TTY runs fall back to the bootstrap task so scripted invocations are unchanged. Action diagnostic removal: Drop the [act] log block in LowLevelForward.run (|a|_mean / spread / normalized + unnormalized first/last + state) that was added while debugging the 'barely moving' issue. Robot motion is now healthy, the output is noise in steady-state, and it depended on stashing the postprocessor on runtime.state — also removed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -168,46 +168,6 @@ class LowLevelForward(InferenceStep):
|
||||
else:
|
||||
chunk_iter = chunk.unsqueeze(0)
|
||||
|
||||
# Diagnostic: show what the action expert actually emitted for
|
||||
# this chunk. The values here are *normalized* (pre-postprocessor),
|
||||
# so we expect them roughly in [-1, 1] under QUANTILES; a chunk
|
||||
# that stays near zero across all 50 steps is the canonical
|
||||
# "barely moving" signature (model defaults to median pose).
|
||||
# Also surface a few unnormalized samples by running the
|
||||
# postprocessor on a copy so we can see the actual joint targets
|
||||
# the robot will receive.
|
||||
try:
|
||||
import torch as _t # noqa: PLC0415
|
||||
|
||||
sample = chunk_iter.detach().float().cpu() # (T, D) normalized
|
||||
mag = sample.abs().mean().item()
|
||||
spread = (sample.amax(0) - sample.amin(0)).abs().mean().item()
|
||||
first_norm = [round(float(x), 3) for x in sample[0].tolist()]
|
||||
last_norm = [round(float(x), 3) for x in sample[-1].tolist()]
|
||||
postprocessor = state.get("_postprocessor")
|
||||
first_unnorm = last_unnorm = None
|
||||
if postprocessor is not None:
|
||||
try:
|
||||
first_unnorm_t = postprocessor(sample[:1].clone())
|
||||
last_unnorm_t = postprocessor(sample[-1:].clone())
|
||||
if isinstance(first_unnorm_t, _t.Tensor):
|
||||
first_unnorm = [round(float(x), 2) for x in first_unnorm_t.flatten().tolist()]
|
||||
if isinstance(last_unnorm_t, _t.Tensor):
|
||||
last_unnorm = [round(float(x), 2) for x in last_unnorm_t.flatten().tolist()]
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
state_now = observation.get("observation.state")
|
||||
state_first = None
|
||||
if isinstance(state_now, _t.Tensor):
|
||||
s = state_now.detach().float().cpu().flatten().tolist()
|
||||
state_first = [round(float(x), 2) for x in s[:6]]
|
||||
push_log(state, f" [act] T={sample.shape[0]} |a|_mean={mag:.3f} spread={spread:.3f}")
|
||||
push_log(state, f" [act] norm first={first_norm} last={last_norm}")
|
||||
if first_unnorm is not None:
|
||||
push_log(state, f" [act] joint first={first_unnorm} last={last_unnorm} state={state_first}")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("act-diag failed: %s", exc)
|
||||
|
||||
for step in chunk_iter:
|
||||
queue.append(step.unsqueeze(0))
|
||||
state["last_chunk_size"] = int(chunk_iter.shape[0])
|
||||
|
||||
@@ -574,6 +574,93 @@ def _bootstrap_state_from_dataset(
|
||||
return out
|
||||
|
||||
|
||||
def _select_task_interactively(
|
||||
*,
|
||||
ds_meta: Any,
|
||||
bootstrap_task: str | None,
|
||||
) -> str | None:
|
||||
"""Ask the operator which task to run at startup.
|
||||
|
||||
Behaviour:
|
||||
|
||||
* If a dataset is loaded, build a numbered menu of every unique task
|
||||
string in ``ds_meta.tasks`` (canonical bootstrap task listed first
|
||||
as the default). Add a ``[c] type a custom task`` option.
|
||||
* If no dataset is loaded, show a plain ``Enter task:`` prompt.
|
||||
* Non-TTY runs (scripts, pipes) skip the prompt and return the
|
||||
bootstrap task so the existing "first stdin line becomes task"
|
||||
flow in ``_run_repl`` / ``_run_autonomous`` still works.
|
||||
|
||||
Returns the chosen task string, or ``None`` when the operator declines
|
||||
to pick one (Ctrl-D / empty + no default).
|
||||
"""
|
||||
options: list[str] = []
|
||||
seen: set[str] = set()
|
||||
if bootstrap_task:
|
||||
options.append(bootstrap_task)
|
||||
seen.add(bootstrap_task)
|
||||
if ds_meta is not None and getattr(ds_meta, "tasks", None) is not None:
|
||||
try:
|
||||
for t in list(ds_meta.tasks.index):
|
||||
if isinstance(t, str) and t and t not in seen:
|
||||
options.append(t)
|
||||
seen.add(t)
|
||||
except Exception: # noqa: BLE001 — defensive: tasks shape varies
|
||||
pass
|
||||
|
||||
if not sys.stdin.isatty():
|
||||
# Scripted / piped run: no interactive prompt; fall back to the
|
||||
# bootstrap default (may be None — REPL handles that).
|
||||
return bootstrap_task
|
||||
|
||||
print("\n[smolvla2] Select startup task:", flush=True)
|
||||
if options:
|
||||
for i, opt in enumerate(options, 1):
|
||||
marker = " (dataset default)" if opt == bootstrap_task else ""
|
||||
print(f" [{i}] {opt}{marker}", flush=True)
|
||||
print(" [c] type a custom task", flush=True)
|
||||
prompt = "Choice [1]: " if bootstrap_task else "Choice: "
|
||||
else:
|
||||
print(" (no tasks available from dataset)", flush=True)
|
||||
prompt = "Enter task: "
|
||||
|
||||
while True:
|
||||
try:
|
||||
choice = input(prompt).strip()
|
||||
except EOFError:
|
||||
print(flush=True)
|
||||
return bootstrap_task
|
||||
|
||||
# No dataset options at all: the entered line *is* the task.
|
||||
if not options:
|
||||
return choice or None
|
||||
|
||||
# Empty input: take the default (item 1) when there is one.
|
||||
if not choice:
|
||||
return options[0] if bootstrap_task else None
|
||||
|
||||
if choice.lower() in ("c", "custom"):
|
||||
try:
|
||||
free = input("Enter task: ").strip()
|
||||
except EOFError:
|
||||
print(flush=True)
|
||||
return bootstrap_task
|
||||
if free:
|
||||
return free
|
||||
# Empty free-form input → loop back to the menu.
|
||||
continue
|
||||
|
||||
if choice.isdigit():
|
||||
idx = int(choice)
|
||||
if 1 <= idx <= len(options):
|
||||
return options[idx - 1]
|
||||
|
||||
print(
|
||||
f" invalid choice {choice!r}; pick 1–{len(options)} or 'c'.",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def _build_robot(
|
||||
*,
|
||||
robot_type: str,
|
||||
@@ -1425,8 +1512,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||
)
|
||||
|
||||
# Bootstrap the canonical task from the dataset whenever one is
|
||||
# provided, so ``/action`` (no argument) has a sensible task to
|
||||
# resume. The model is memorised on the exact training wording, so
|
||||
# provided, so the interactive picker below can offer it as the
|
||||
# default. The model is memorised on the exact training wording, so
|
||||
# matching it is what gets recall to fire.
|
||||
bootstrap_state: dict[str, str] = {}
|
||||
if args.dataset_repo_id is not None:
|
||||
@@ -1435,12 +1522,22 @@ def main(argv: list[str] | None = None) -> int:
|
||||
episode=args.dataset_episode,
|
||||
start_frame=args.dataset_start_frame,
|
||||
)
|
||||
if bootstrap_state.get("task") and not args.task:
|
||||
args.task = bootstrap_state["task"]
|
||||
print(
|
||||
f"[smolvla2] canonical task from dataset: {args.task!r}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Interactive task picker. Skipped when ``--task`` is already set on
|
||||
# the CLI (scripted runs and explicit overrides win). When no task
|
||||
# was passed, prompt the operator: pick from the dataset's tasks or
|
||||
# type a custom one. Non-TTY runs fall back to the bootstrap task
|
||||
# silently — the existing "first stdin line becomes task" flow in
|
||||
# ``_run_repl`` / ``_run_autonomous`` still handles the no-default
|
||||
# case.
|
||||
if not args.task:
|
||||
chosen = _select_task_interactively(
|
||||
ds_meta=ds_meta,
|
||||
bootstrap_task=bootstrap_state.get("task"),
|
||||
)
|
||||
if chosen:
|
||||
args.task = chosen
|
||||
print(f"[smolvla2] task: {args.task!r}", flush=True)
|
||||
|
||||
# No startup prompts — the runtime is command-driven. It comes up at
|
||||
# the command line in ``paused`` mode (robot idle) unless ``--mode``
|
||||
@@ -1520,11 +1617,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
# under-trained checkpoint without recompiling.
|
||||
runtime.state["text_gen_min_new_tokens"] = int(getattr(args, "text_min_new_tokens", 0) or 0)
|
||||
runtime.state["text_gen_temperature"] = float(getattr(args, "text_temperature", 0.0) or 0.0)
|
||||
# Stash the postprocessor so LowLevelForward's action diagnostic
|
||||
# can show both normalized chunk values AND unnormalized joint
|
||||
# targets — answers "what is the model emitting + what does the
|
||||
# robot actually receive" in one log line.
|
||||
runtime.state["_postprocessor"] = postprocessor
|
||||
runtime.state["text_gen_top_p"] = float(getattr(args, "text_top_p", 1.0) or 1.0)
|
||||
# Subtask throttle: HighLevelSubtaskFwd fires only once every N
|
||||
# action-chunk boundaries. Lets you run N action chunks per LM-head
|
||||
|
||||
Reference in New Issue
Block a user