diff --git a/examples/annotations/run_hf_job.py b/examples/annotations/run_hf_job.py
index 85ac8f17c..3fb730d4a 100644
--- a/examples/annotations/run_hf_job.py
+++ b/examples/annotations/run_hf_job.py
@@ -87,8 +87,6 @@ CMD = (
     # rephrasings are unused at best and harmful when they drift.
     "--plan.n_task_rephrasings=0 "
     # Keep subtask decomposition tight for atomic tasks.
-    # (action_records left off: the {verb,object,arm,grasp,dest} schema is for
-    # long manipulation tasks, not RoboCasa atomic/navigation.)
     "--plan.plan_max_steps=10 "
     # Only subtasks + memory — skip the numbered "plan" rows. true re-enables.
     "--plan.emit_plan=false "
diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py
index 1b16a927b..439201993 100644
--- a/src/lerobot/annotations/steerable_pipeline/config.py
+++ b/src/lerobot/annotations/steerable_pipeline/config.py
@@ -75,11 +75,6 @@ class PlanConfig:
     use_video_url: bool = False
     use_video_url_fps: float = 1.0
 
-    # Optional structured per-subtask action records (EgoMimic-style). When
-    # enabled, the VLM extracts a typed record per subtask span; see
-    # ``ActionRecordsConfig``. Purely additive — off by default.
-    action_records: ActionRecordsConfig = field(default_factory=lambda: ActionRecordsConfig())
-
     # Optional 5-axis task-augmentation taxonomy for the t=0 variants
     # (EgoMimic-style: synonym / omit_arm / omit_orientation /
     # omit_grasp_method / combined). Replaces the free-form
@@ -87,73 +82,6 @@ class PlanConfig:
     task_aug_axes: TaskAugAxesConfig = field(default_factory=lambda: TaskAugAxesConfig())
 
 
-@dataclass
-class ActionRecordsConfig:
-    """Structured per-subtask action record extraction.
-
-    When ``enabled=True``, after subtask-span generation the module makes
-    one extra VLM call per subtask to extract a typed record::
-
-        {
-            "verb": "pick" | "place" | "press" | ...,  # closed vocabulary
-            "object": "<canonical_object_name>",
-            "arm": "left" | "right" | "both" | null,
-            "grasp_type": "pinch" | "wrap" | "hook" | ... | null,
-            "destination": "<canonical_destination>" | null,
-            "mistake": "<short text>" | null,
-        }
-
-    Emitted as a separate ``style="action_record"`` row at the subtask's
-    start timestamp. PURELY ADDITIVE — it never touches the subtask text,
-    so downstream training can use the typed schema (e.g. auxiliary
-    verb/arm/grasp heads) while the conditioning string stays unchanged.
-
-    Cost: one extra VLM call per subtask (~8x plan-module calls on an
-    8-subtask episode).
-    """
-
-    enabled: bool = False
-
-    # Emit the ``style="action_record"`` row (JSON content) at the subtask
-    # start — the only output of the feature. ``enabled=False`` skips it.
-    emit_record_row: bool = True
-
-    # Frames sampled from the subtask span for the per-subtask VLM call.
-    frames_per_subtask: int = 4
-
-    # Closed verb vocabulary; the prompt picks exactly one. Override
-    # per-dataset (e.g. door-only manipulation) for a tighter constraint.
-    verb_vocabulary: tuple[str, ...] = (
-        "pick",
-        "place",
-        "push",
-        "pull",
-        "open",
-        "close",
-        "turn",
-        "press",
-        "lift",
-        "insert",
-        "pour",
-        "move",
-        "reach",
-        "grasp",
-        "release",
-        "wipe",
-        "dump",
-    )
-
-    # Closed grasp-type vocabulary (``null`` always allowed). Adjust
-    # per-hardware (e.g. drop ``hook`` / ``key`` for parallel-jaw grippers).
-    grasp_vocabulary: tuple[str, ...] = (
-        "pinch",
-        "wrap",
-        "hook",
-        "key",
-        "lateral",
-    )
-
-
 @dataclass
 class TaskAugAxesConfig:
     """Structured 5-axis augmentation taxonomy for t=0 task variants.
diff --git a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
index c76a6acad..8f25fcfba 100644
--- a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
+++ b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
@@ -17,7 +17,6 @@
 
 from __future__ import annotations
 
-import json
 import logging
 from collections.abc import Sequence
 from dataclasses import dataclass, field
@@ -29,7 +28,6 @@ from ..frames import (
     FrameProvider,
     VideoFrameProvider,
     null_provider,
-    to_image_blocks,
     to_video_block,
     to_video_url_block,
 )
@@ -84,20 +82,8 @@ class PlanSubtasksMemoryModule:
 
         subtask_spans = self._generate_subtasks(record, task=effective_task)
 
-        # Phase 1a: optional per-subtask action records. When enabled, emit a
-        # typed ActionRecord (verb/object/arm/grasp_type/destination/mistake)
-        # per span as a separate style="action_record" row. Purely additive —
-        # never touches the subtask text.
-        records_cfg = self.config.action_records
-        action_records: list[dict[str, Any] | None] = [None] * len(subtask_spans)
-        if records_cfg.enabled and subtask_spans:
-            for i, span in enumerate(subtask_spans):
-                rec = self._extract_action_record(record, span, effective_task)
-                if rec is not None:
-                    action_records[i] = rec
-
         # subtask rows
-        for i, span in enumerate(subtask_spans):
+        for span in subtask_spans:
             rows.append(
                 {
                     "role": "assistant",
@@ -107,16 +93,6 @@ class PlanSubtasksMemoryModule:
                     "tool_calls": None,
                 }
             )
-            if records_cfg.enabled and records_cfg.emit_record_row and action_records[i] is not None:
-                rows.append(
-                    {
-                        "role": "assistant",
-                        "content": json.dumps(action_records[i], sort_keys=True),
-                        "style": "action_record",
-                        "timestamp": snap_to_frame(span["start"], record.frame_timestamps),
-                        "tool_calls": None,
-                    }
-                )
         # Plan rows at every subtask boundary (incl. t=0). The plan is a
         # numbered list of still-todo subtasks, so re-emitting at each
         # boundary makes it shrink as work progresses — ${plan} at frame t is
@@ -264,107 +240,6 @@ class PlanSubtasksMemoryModule:
         out = [item.strip().strip('"').strip("'") for item in raw if isinstance(item, str)]
         return [s for s in out if s][:n]
 
-    # ------------------------------------------------------------------
-    # Phase 1a + 1b: structured per-subtask action records
-    # ------------------------------------------------------------------
-
-    def _extract_action_record(
-        self,
-        record: EpisodeRecord,
-        span: dict[str, Any],
-        episode_task: str,
-    ) -> dict[str, Any] | None:
-        """Ask the VLM to extract a typed ``ActionRecord`` from a subtask span.
-
-        Sends ``frames_per_subtask`` frames uniformly sampled from
-        ``[span.start, span.end]`` plus the canonical subtask text. The
-        VLM is constrained to verb + grasp vocabularies from the config
-        — invalid values are silently dropped at this layer (the
-        validator catches structural problems pre-write).
-
-        Returns ``None`` when the call fails or the VLM returns something
-        unrecognizable; callers fall back to the free-form subtask text.
-        """
-        cfg = self.config.action_records
-        start_t = float(span.get("start", 0.0))
-        end_t = float(span.get("end", start_t))
-        duration = max(0.0, end_t - start_t)
-
-        # Uniform timestamps within the span; fall back to a single
-        # center frame for very short spans.
-        n = max(1, int(cfg.frames_per_subtask))
-        if n == 1 or duration <= 0.0:
-            timestamps = [0.5 * (start_t + end_t)]
-        else:
-            step = duration / (n - 1)
-            timestamps = [start_t + i * step for i in range(n)]
-        frames = self.frame_provider.frames_at(record, timestamps)
-        if not frames:
-            logger.debug(
-                "action_record: no frames at span %.2f-%.2f for ep %s; skipping",
-                start_t,
-                end_t,
-                record.episode_index,
-            )
-            return None
-
-        prompt = load_prompt("plan_action_record").format(
-            episode_task=episode_task,
-            subtask_text=span.get("text", ""),
-            start_time=start_t,
-            end_time=end_t,
-            duration=duration,
-            n_frames=len(frames),
-            verb_vocabulary=", ".join(cfg.verb_vocabulary),
-            grasp_vocabulary=" | ".join(f'"{g}"' for g in cfg.grasp_vocabulary),
-        )
-        message = [
-            {
-                "role": "user",
-                "content": [*to_image_blocks(frames), {"type": "text", "text": prompt}],
-            }
-        ]
-        result = self.vlm.generate_json([message])[0]
-        if not isinstance(result, dict):
-            return None
-
-        # Light validation + normalisation. Verb is required; everything
-        # else may be null. Verb / grasp_type are clamped to the
-        # vocabularies (out-of-vocab → reject or null).
-        verb = (result.get("verb") or "").strip().lower()
-        if not verb or verb not in {v.lower() for v in cfg.verb_vocabulary}:
-            return None
-        obj = (result.get("object") or "").strip()
-        if not obj:
-            return None
-        grasp = result.get("grasp_type")
-        if isinstance(grasp, str):
-            grasp = grasp.strip().lower()
-            if grasp not in {g.lower() for g in cfg.grasp_vocabulary}:
-                grasp = None
-        else:
-            grasp = None
-        arm = result.get("arm")
-        if isinstance(arm, str):
-            arm = arm.strip().lower()
-            if arm not in {"left", "right", "both"}:
-                arm = None
-        else:
-            arm = None
-        destination = result.get("destination")
-        destination = destination.strip() if isinstance(destination, str) and destination.strip() else None
-        mistake = result.get("mistake")
-        mistake = mistake.strip() if isinstance(mistake, str) and mistake.strip() else None
-
-        return {
-            "verb": verb,
-            "object": obj,
-            "arm": arm,
-            "grasp_type": grasp,
-            "destination": destination,
-            "mistake": mistake,
-        }
-
     # ------------------------------------------------------------------
     # Structured 5-axis task augmentation (EgoMimic-style taxonomy)
     # ------------------------------------------------------------------
diff --git a/src/lerobot/annotations/steerable_pipeline/prompts/plan_action_record.txt b/src/lerobot/annotations/steerable_pipeline/prompts/plan_action_record.txt
deleted file mode 100644
index 1bd127048..000000000
--- a/src/lerobot/annotations/steerable_pipeline/prompts/plan_action_record.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-You are extracting a structured action record from a subtask span of a
-teleoperated robot demonstration. This is Phase 1a of a two-step
-process: you extract a typed record; a deterministic template then
-renders it back to canonical subtask text. Your job is the PERCEPTION
-step — not the language step.
-
-The user originally asked: "{episode_task}"
-The subtask span is:        "{subtask_text}"
-Span time window:           [{start_time:.2f}s, {end_time:.2f}s]
-                            ({duration:.2f}s of robot activity)
-
-You are shown {n_frames} frames sampled uniformly from the subtask
-window. Fill in a structured record describing the action that takes
-place between the first and last frame.
-
-Hard rules:
-- Use ONLY information visible in the frames. Do not infer details from
-  outside the span. Do not extrapolate from the original task wording.
-- Use canonical object names from the original task VERBATIM. Never
-  introduce synonyms: if the task says "cube", the record says "cube",
-  never "block" / "object" / "item".
-- For non-applicable fields, use ``null`` (not "n/a", not "none", not
-  an empty string).
-- For ``verb`` and ``grasp_type``, pick EXACTLY one value from the
-  vocabulary below. Never invent a new one.
-
-Field schema:
-
-  verb (required) — the imperative verb of the action. Vocabulary:
-    {verb_vocabulary}
-
-  object (required) — the manipulated object. Use the canonical noun
-    from the original task above.
-
-  arm — which arm performs the action. One of:
-    "left" | "right" | "both" | null
-    Use ``null`` when the source robot is single-arm or when the arm
-    is genuinely not visible in the frames.
-
-  grasp_type — which grip the gripper uses on contact. One of:
-    {grasp_vocabulary} | null
-    Use ``null`` when there is no contact in this span (e.g. a pure
-    ``move`` / ``reach`` subtask) or the grip is genuinely unclear.
-
-  destination — the target location for actions like ``place``,
-    ``move``, ``insert``, ``pour``. Use canonical names from the
-    original task. Use ``null`` for in-place actions (``press``,
-    ``turn``, ``grasp``, ``release``).
-
-  mistake — a brief one-clause description of any visible failure or
-    recovery during the span (e.g. "dropped the cube and re-grasped",
-    "missed the target on first attempt"). Use ``null`` when the span
-    completes cleanly with no visible recovery.
-
-Output strictly valid JSON of shape:
-
-  {{
-    "verb": "<one of vocabulary>",
-    "object": "<canonical noun>",
-    "arm": "left" | "right" | "both" | null,
-    "grasp_type": "<one of vocabulary>" | null,
-    "destination": "<canonical noun>" | null,
-    "mistake": "<short description>" | null
-  }}
diff --git a/src/lerobot/datasets/language.py b/src/lerobot/datasets/language.py
index f3d371545..124c25221 100644
--- a/src/lerobot/datasets/language.py
+++ b/src/lerobot/datasets/language.py
@@ -36,7 +36,6 @@ CORE_STYLES = {
     "vqa",
     "trace",
     "task_aug",
-    "action_record",
 }
 # Project-local styles can be registered at import time by appending to
 # ``EXTENDED_STYLES`` before ``column_for_style`` is called. Anything added
@@ -47,7 +46,7 @@ CORE_STYLES = {
 EXTENDED_STYLES: set[str] = set()
 STYLE_REGISTRY = CORE_STYLES | EXTENDED_STYLES
 
-PERSISTENT_STYLES = {"subtask", "plan", "memory", "motion", "task_aug", "action_record"}
+PERSISTENT_STYLES = {"subtask", "plan", "memory", "motion", "task_aug"}
 EVENT_ONLY_STYLES = {"interjection", "vqa", "trace"}
 
 # Styles whose ``content`` is grounded in a specific camera view. Rows of these
diff --git a/tests/datasets/test_language.py b/tests/datasets/test_language.py
index 2846dab1d..52c7b3708 100644
--- a/tests/datasets/test_language.py
+++ b/tests/datasets/test_language.py
@@ -64,7 +64,7 @@ def test_validate_feature_language_warns_only_on_non_empty_value(caplog):
 
 
 def test_style_registry_routes_columns():
-    assert {"subtask", "plan", "memory", "motion", "task_aug", "action_record"} == PERSISTENT_STYLES
+    assert {"subtask", "plan", "memory", "motion", "task_aug"} == PERSISTENT_STYLES
     assert {"interjection", "vqa", "trace"} == EVENT_ONLY_STYLES
     assert PERSISTENT_STYLES | EVENT_ONLY_STYLES <= STYLE_REGISTRY