From 1ca38d974826c177349233a7fa06c7210908d6f4 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Thu, 30 Apr 2026 10:54:12 +0200 Subject: [PATCH] fix(language): drop motion from VIEW_DEPENDENT_STYLES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motion primitives are described in robot-frame (joint / Cartesian) terms, not pixel space, so they are camera-agnostic. Only `vqa` (event) and `trace` (event, pixel-trajectory) are view-dependent. The `camera` field stays on PERSISTENT_ROW_FIELDS for schema symmetry — the validator, resolver, and HF feature mapping behave identically across the two columns regardless of which styles populate `camera` today — but persistent rows now always have `camera=None` in practice. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lerobot/datasets/language.py | 11 +++++++++-- tests/datasets/test_language.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/lerobot/datasets/language.py b/src/lerobot/datasets/language.py index 69866e6bd..cc52835cb 100644 --- a/src/lerobot/datasets/language.py +++ b/src/lerobot/datasets/language.py @@ -36,8 +36,15 @@ EVENT_ONLY_STYLES = {"interjection", "vqa", "trace"} # Styles whose ``content`` is grounded in a specific camera view. Rows of these # styles MUST carry a non-null ``camera`` referencing an ``observation.images.*`` -# feature key. Rows of every other style MUST have ``camera=None``. -VIEW_DEPENDENT_STYLES = {"vqa", "motion", "trace"} +# feature key. Rows of every other style MUST have ``camera=None``. ``motion`` +# is intentionally NOT in this set: motion primitives are described in +# robot-frame (joint / Cartesian) terms, not pixel space, so they are +# camera-agnostic. ``trace`` is the pixel-trajectory event style and IS +# view-dependent. The ``camera`` field nevertheless lives on +# ``PERSISTENT_ROW_FIELDS`` too so the schema, validator, and resolver +# behave symmetrically across the two columns; persistent rows simply +# always have ``camera=None`` in practice today. +VIEW_DEPENDENT_STYLES = {"vqa", "trace"} LanguageColumn = Literal["language_persistent", "language_events"] diff --git a/tests/datasets/test_language.py b/tests/datasets/test_language.py index f01075d2c..5c354a5e4 100644 --- a/tests/datasets/test_language.py +++ b/tests/datasets/test_language.py @@ -58,10 +58,13 @@ def test_style_registry_routes_columns(): def test_view_dependent_styles(): - assert {"vqa", "motion", "trace"} == VIEW_DEPENDENT_STYLES + # motion lives in PERSISTENT_STYLES and is described in robot-frame + # (joint / Cartesian) terms, so it is NOT view-dependent. Only vqa + # (event) and trace (event, pixel-trajectory) carry a camera tag. + assert {"vqa", "trace"} == VIEW_DEPENDENT_STYLES assert is_view_dependent_style("vqa") - assert is_view_dependent_style("motion") assert is_view_dependent_style("trace") + assert not is_view_dependent_style("motion") assert not is_view_dependent_style("subtask") assert not is_view_dependent_style("plan") assert not is_view_dependent_style("interjection") @@ -70,22 +73,24 @@ def test_view_dependent_styles(): def test_validate_camera_field_requires_camera_for_view_dependent_styles(): validate_camera_field("vqa", "observation.images.top") - validate_camera_field("motion", "observation.images.wrist") validate_camera_field("trace", "observation.images.front") with pytest.raises(ValueError, match="view-dependent"): validate_camera_field("vqa", None) with pytest.raises(ValueError, match="view-dependent"): - validate_camera_field("motion", "") + validate_camera_field("trace", "") def test_validate_camera_field_rejects_camera_on_non_view_dependent_styles(): validate_camera_field("subtask", None) validate_camera_field("plan", None) validate_camera_field("memory", None) + validate_camera_field("motion", None) validate_camera_field("interjection", None) validate_camera_field(None, None) with pytest.raises(ValueError, match="must have camera=None"): validate_camera_field("subtask", "observation.images.top") + with pytest.raises(ValueError, match="must have camera=None"): + validate_camera_field("motion", "observation.images.top") with pytest.raises(ValueError, match="must have camera=None"): validate_camera_field("interjection", "observation.images.top") with pytest.raises(ValueError, match="must have camera=None"):