From dc530e10fe76bb1d96dbb18ca1beeba939264b0f Mon Sep 17 00:00:00 2001 From: Pepijn Date: Mon, 18 May 2026 14:42:32 +0200 Subject: [PATCH] feat(smolvla2): VQA example prompts in the panel; drop quotes from hints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Command arguments never needed quotes (`_strip_quotes` only strips a matching pair if present) — `/question point to the yellow cube` works. The hints wrongly implied `""` were required; all hints/help now show `/action ` / `/question `. Also adds a reference line to the state panel showing the two overlay-producing VQA prompt shapes: /question point to the yellow cube -> point overlay /question detect the blue cube -> bounding-box overlay plus the same examples in /help. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../scripts/lerobot_smolvla2_runtime.py | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/lerobot/scripts/lerobot_smolvla2_runtime.py b/src/lerobot/scripts/lerobot_smolvla2_runtime.py index ef1f77bab..645985296 100644 --- a/src/lerobot/scripts/lerobot_smolvla2_runtime.py +++ b/src/lerobot/scripts/lerobot_smolvla2_runtime.py @@ -906,14 +906,18 @@ def _build_robot_action_executor( def _print_runtime_help() -> None: """Print the slash-command reference.""" print( - "[smolvla2] commands:\n" - ' /action "task" run the robot; an argument switches to that task\n' + "[smolvla2] commands (arguments need no quotes):\n" + " /action run the robot; an argument switches to that task\n" " /action resume the robot on the current task\n" " /action run the robot for N seconds, then auto-pause\n" " /pause pause the action loop — robot holds position\n" - ' /question "..." pause and answer one VQA question\n' + " /question pause and answer one VQA question\n" " /help show this help\n" - " stop | quit | exit end the session", + " stop | quit | exit end the session\n" + "\n" + " VQA examples:\n" + " /question point to the yellow cube -> point overlay\n" + " /question detect the blue cube -> bounding-box overlay", flush=True, ) @@ -989,7 +993,7 @@ def _handle_slash_command(runtime: Any, line: str) -> bool: else: runtime.state["mode"] = "paused" print( - '[smolvla2] no task set — use /action "your task"', + "[smolvla2] no task set — use /action ", flush=True, ) return True @@ -1008,7 +1012,11 @@ def _handle_slash_command(runtime: Any, line: str) -> bool: runtime.state["action_deadline"] = None _clear_action_queue(runtime) if not rest: - print('[smolvla2] usage: /question "your question"', flush=True) + print( + "[smolvla2] usage: /question " + "(e.g. /question point to the yellow cube)", + flush=True, + ) return True _run_vqa_query(runtime, rest) return True @@ -1101,8 +1109,8 @@ def _run_autonomous( redraw = _make_state_panel_renderer(runtime, mode_label="autonomous", scrollback=_scrollback) redraw() print( - ' [autonomous] /action "task" to run · /pause to stop · ' - '/question "..." to ask · /help · stop', + " [autonomous] /action to run · /pause to stop · " + "/question to ask · /help · stop", flush=True, ) @@ -1177,7 +1185,7 @@ def _run_autonomous( runtime.state.setdefault("events_this_tick", []).append("user_interjection") else: print( - '[smolvla2] no task yet — use /action "your task" to start', + "[smolvla2] no task yet — use /action to start", flush=True, ) except KeyboardInterrupt: @@ -1227,15 +1235,19 @@ def _make_state_panel_renderer( if run_mode == "action": console.print( " [dim]commands:[/] [bold]/pause[/] stop · " - '[bold]/question "..."[/bold] ask · [bold]/help[/] · ' - "[bold]stop[/]" + "[bold]/question[/] ask · [bold]/help[/] · [bold]stop[/]" ) else: console.print( - ' [dim]commands:[/] [bold]/action "task"[/bold] run · ' - '[bold]/question "..."[/bold] ask · [bold]/help[/] · ' - "[bold]stop[/]" + " [dim]commands:[/] [bold]/action[/] run · " + "[bold]/question[/] ask · [bold]/help[/] · [bold]stop[/]" ) + # Reference VQA prompts — the two answer shapes that draw an + # overlay (point + bounding box). No quotes needed. + console.print( + " [dim]vqa examples:[/] /question point to the yellow cube · " + "/question detect the blue cube" + ) for key, label in ( ("task", "task"), ("current_subtask", "subtask"), @@ -1304,8 +1316,8 @@ def _make_state_panel_renderer( console.print() if not st.get("task"): console.print( - ' [dim]Type [bold]/action "your task"[/bold] to begin. ' - '[bold]/question "..."[/bold] to ask, /help for commands, ' + " [dim]Type [bold]/action [/bold] to begin, " + "[bold]/question [/bold] to ask, /help for commands, " "stop to exit.[/]" ) @@ -1583,7 +1595,7 @@ def _run_repl(runtime: Any, *, initial_task: str | None, max_ticks: int | None) # task to be meaningful. if not runtime.state.get("task"): print( - '[smolvla2] no task yet — use /action "your task"', + "[smolvla2] no task yet — use /action ", flush=True, ) _redraw(last_logs)