test: add dataset guard + fix imports

update docs + docstrings + examples + add minimal test
add context guards
2026-06-18 16:57:12 +00:00 · 2026-04-20 00:36:02 +02:00 · 2026-04-19 23:53:53 +02:00 · 2026-04-19 23:21:14 +02:00 · 2026-04-19 22:48:08 +02:00 · 2026-04-19 16:50:19 +02:00
128 changed files with 1371 additions and 8908 deletions
@@ -83,13 +83,10 @@ jobs:
          cache-binary: false

      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}

      # Build the benchmark-specific image. The Dockerfile separates dep-install
      # from source-copy, so code-only changes skip the slow uv-sync layer
@@ -118,7 +115,7 @@ jobs:
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
-                --policy.path=lerobot/smolvla_libero \
+                --policy.path=pepijn223/smolvla_libero \
                --env.type=libero \
                --env.task=libero_spatial \
                --eval.batch_size=1 \
@@ -147,7 +144,7 @@ jobs:
            --artifacts-dir /tmp/libero-artifacts \
            --env libero \
            --task libero_spatial \
-            --policy lerobot/smolvla_libero
+            --policy pepijn223/smolvla_libero

      - name: Upload Libero rollout video
        if: always()
@@ -241,13 +238,10 @@ jobs:
          cache-binary: false

      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
        with:
          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}

      - name: Build MetaWorld benchmark image
        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
@@ -270,7 +264,7 @@ jobs:
            bash -c "
              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
              lerobot-eval \
-                --policy.path=lerobot/smolvla_metaworld \
+                --policy.path=pepijn223/smolvla_metaworld \
                --env.type=metaworld \
                --env.task=metaworld-push-v3 \
                --eval.batch_size=1 \
@@ -299,7 +293,7 @@ jobs:
            --artifacts-dir /tmp/metaworld-artifacts \
            --env metaworld \
            --task metaworld-push-v3 \
-            --policy lerobot/smolvla_metaworld
+            --policy pepijn223/smolvla_metaworld

      - name: Upload MetaWorld rollout video
        if: always()
@@ -316,630 +310,3 @@ jobs:
          name: metaworld-metrics
          path: /tmp/metaworld-artifacts/metrics.json
          if-no-files-found: warn
-
-  # ── ROBOTWIN 2.0 ──────────────────────────────────────────────────────────
-  # Isolated image: full RoboTwin 2.0 stack — SAPIEN, mplib, CuRobo,
-  # pytorch3d, + simulation assets (~4 GB).
-  # Build takes ~20 min on first run; subsequent runs hit the layer cache.
-  # Requires an NVIDIA GPU runner with CUDA 12.1 drivers.
-  robotwin-integration-test:
-    name: RoboTwin 2.0 — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-      ROBOTWIN_POLICY: lerobot/smolvla_robotwin
-      ROBOTWIN_TASKS: beat_block_hammer,click_bell,handover_block,stack_blocks_two,click_alarmclock,open_microwave,adjust_bottle,lift_pot,stamp_seal,turn_switch
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      # Build the full-install image: SAPIEN, mplib, CuRobo, pytorch3d +
-      # simulation assets (~4 GB). Layer cache lives in the runner's local
-      # Docker daemon — reused across re-runs on the same machine.
-      - name: Build RoboTwin 2.0 benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.robotwin
-          push: false
-          load: true
-          tags: lerobot-benchmark-robotwin:ci
-          cache-from: type=local,src=/tmp/.buildx-cache-robotwin
-          cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max
-
-      - name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          # Named container (no --rm) so we can docker cp artifacts out.
-          docker run --name robotwin-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e ROBOTWIN_POLICY="${ROBOTWIN_POLICY}" \
-            -e ROBOTWIN_TASKS="${ROBOTWIN_TASKS}" \
-            lerobot-benchmark-robotwin:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              cd /opt/robotwin && lerobot-eval \
-                --policy.path=\"\$ROBOTWIN_POLICY\" \
-                --env.type=robotwin \
-                --env.task=\"\$ROBOTWIN_TASKS\" \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.images.head_camera\": \"observation.images.camera1\", \"observation.images.left_camera\": \"observation.images.camera2\", \"observation.images.right_camera\": \"observation.images.camera3\"}' \
-                --output_dir=/tmp/eval-artifacts
-              python /lerobot/scripts/ci/extract_task_descriptions.py \
-                --env robotwin \
-                --task \"\$ROBOTWIN_TASKS\" \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy RoboTwin artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/robotwin-artifacts
-          docker cp robotwin-eval:/tmp/eval-artifacts/. /tmp/robotwin-artifacts/ 2>/dev/null || true
-          docker rm -f robotwin-eval || true
-
-      - name: Parse RoboTwin eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/robotwin-artifacts \
-            --env robotwin \
-            --task "${ROBOTWIN_TASKS}" \
-            --policy "${ROBOTWIN_POLICY}"
-
-      - name: Upload RoboTwin rollout video
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: robotwin-rollout-video
-          path: /tmp/robotwin-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload RoboTwin eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: robotwin-metrics
-          path: /tmp/robotwin-artifacts/metrics.json
-          if-no-files-found: warn
-
-  # ── ROBOCASA365 ──────────────────────────────────────────────────────────
-  # Isolated image: robocasa + robosuite installed manually as editable
-  # clones (no `lerobot[robocasa]` extra — robocasa's setup.py pins
-  # `lerobot==0.3.3`, which would shadow this repo's lerobot).
-  robocasa-integration-test:
-    name: RoboCasa365 — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      - name: Build RoboCasa365 benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.robocasa
-          push: false
-          load: true
-          tags: lerobot-benchmark-robocasa:ci
-
-      - name: Run RoboCasa365 smoke eval (10 atomic tasks, 1 episode each)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          docker run --name robocasa-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            -e MUJOCO_GL=egl \
-            lerobot-benchmark-robocasa:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=lerobot/smolvla_robocasa \
-                --env.type=robocasa \
-                --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.images.robot0_agentview_left\": \"observation.images.camera1\", \"observation.images.robot0_eye_in_hand\": \"observation.images.camera2\", \"observation.images.robot0_agentview_right\": \"observation.images.camera3\"}' \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env robocasa \
-                --task CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy RoboCasa365 artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/robocasa-artifacts
-          docker cp robocasa-eval:/tmp/eval-artifacts/. /tmp/robocasa-artifacts/ 2>/dev/null || true
-          docker rm -f robocasa-eval || true
-
-      - name: Parse RoboCasa365 eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/robocasa-artifacts \
-            --env robocasa \
-            --task atomic_smoke_10 \
-            --policy lerobot/smolvla_robocasa
-
-      - name: Upload RoboCasa365 rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robocasa-rollout-video
-          path: /tmp/robocasa-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload RoboCasa365 eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robocasa-metrics
-          path: /tmp/robocasa-artifacts/metrics.json
-          if-no-files-found: warn
-
-  # ── ROBOCEREBRA ───────────────────────────────────────────────────────────
-  # Reuses the LIBERO simulator (libero_10 suite) with RoboCerebra camera
-  # defaults (image/wrist_image). The image is layered on
-  # huggingface/lerobot-gpu, which already ships [libero] as part of [all].
-  robocerebra-integration-test:
-    name: RoboCerebra — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      - name: Build RoboCerebra benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.robocerebra
-          push: false
-          load: true
-          tags: lerobot-benchmark-robocerebra:ci
-          cache-from: type=local,src=/tmp/.buildx-cache-robocerebra
-          cache-to: type=local,dest=/tmp/.buildx-cache-robocerebra,mode=max
-
-      - name: Run RoboCerebra smoke eval (1 episode)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          docker run --name robocerebra-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            -e LIBERO_DATA_FOLDER=/tmp/libero_data \
-            lerobot-benchmark-robocerebra:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=lerobot/smolvla_robocerebra \
-                --env.type=libero \
-                --env.task=libero_10 \
-                --env.fps=20 \
-                --env.obs_type=pixels_agent_pos \
-                --env.observation_height=256 \
-                --env.observation_width=256 \
-                '--env.camera_name_mapping={\"agentview_image\": \"image\", \"robot0_eye_in_hand_image\": \"wrist_image\"}' \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
-                --policy.empty_cameras=1 \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env libero --task libero_10 \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy RoboCerebra artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/robocerebra-artifacts
-          docker cp robocerebra-eval:/tmp/eval-artifacts/. /tmp/robocerebra-artifacts/ 2>/dev/null || true
-          docker rm -f robocerebra-eval || true
-
-      - name: Parse RoboCerebra eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/robocerebra-artifacts \
-            --env robocerebra \
-            --task libero_10 \
-            --policy lerobot/smolvla_robocerebra
-
-      - name: Upload RoboCerebra rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robocerebra-rollout-video
-          path: /tmp/robocerebra-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload RoboCerebra eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robocerebra-metrics
-          path: /tmp/robocerebra-artifacts/metrics.json
-          if-no-files-found: warn
-
-  # ── ROBOMME ───────────────────────────────────────────────────────────────
-  # Isolated image: mani-skill/SAPIEN/Vulkan chain with gymnasium and numpy
-  # overrides (robomme can't be a pyproject extra due to numpy<2 pin).
-  robomme-integration-test:
-    name: RoboMME — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-      ROBOMME_POLICY: lerobot/smolvla_robomme
-      ROBOMME_TASKS: PickXtimes,BinFill,StopCube,MoveCube,InsertPeg,SwingXtimes,VideoUnmask,ButtonUnmask,PickHighlight,PatternLock
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      - name: Build RoboMME benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.robomme
-          push: false
-          load: true
-          tags: lerobot-benchmark-robomme:ci
-
-      - name: Run RoboMME smoke eval (10 tasks, 1 episode each)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          docker run --name robomme-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            -e ROBOMME_POLICY="${ROBOMME_POLICY}" \
-            -e ROBOMME_TASKS="${ROBOMME_TASKS}" \
-            lerobot-benchmark-robomme:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=\"\$ROBOMME_POLICY\" \
-                --env.type=robomme \
-                --env.task=\"\$ROBOMME_TASKS\" \
-                --env.dataset_split=test \
-                --env.task_ids=[0] \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.wrist_image\": \"observation.images.camera2\"}' \
-                --policy.empty_cameras=3 \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env robomme --task \"\$ROBOMME_TASKS\" \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy RoboMME artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/robomme-artifacts
-          docker cp robomme-eval:/tmp/eval-artifacts/. /tmp/robomme-artifacts/ 2>/dev/null || true
-          docker rm -f robomme-eval || true
-
-      - name: Parse RoboMME eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/robomme-artifacts \
-            --env robomme \
-            --task "${ROBOMME_TASKS}" \
-            --policy "${ROBOMME_POLICY}"
-
-      - name: Upload RoboMME rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robomme-rollout-video
-          path: /tmp/robomme-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload RoboMME eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: robomme-metrics
-          path: /tmp/robomme-artifacts/metrics.json
-          if-no-files-found: warn
-
-  # ── LIBERO-plus ───────────────────────────────────────────────────────────
-  # Isolated image: LIBERO-plus fork cloned into /home/user_lerobot on top of
-  # huggingface/lerobot-gpu (see docker/Dockerfile.benchmark.libero_plus).
-  libero-plus-integration-test:
-    name: LIBERO-plus — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-      LIBERO_PLUS_SUITE: libero_spatial
-      LIBERO_PLUS_POLICY: lerobot/smolvla_libero_plus
-      LIBERO_PLUS_TASK_IDS: "[0,100,260,500,1000,1500,2000,2400]"
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      - name: Build LIBERO-plus benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.libero_plus
-          push: false
-          load: true
-          tags: lerobot-benchmark-libero-plus:ci
-          cache-from: type=local,src=/tmp/.buildx-cache-libero-plus
-          cache-to: type=local,dest=/tmp/.buildx-cache-libero-plus,mode=max
-
-      - name: Run LIBERO-plus smoke eval (1 episode)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          docker run --name libero-plus-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            -e LIBERO_PLUS_SUITE="${LIBERO_PLUS_SUITE}" \
-            -e LIBERO_PLUS_POLICY="${LIBERO_PLUS_POLICY}" \
-            -e LIBERO_PLUS_TASK_IDS="${LIBERO_PLUS_TASK_IDS}" \
-            lerobot-benchmark-libero-plus:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=\"\$LIBERO_PLUS_POLICY\" \
-                --env.type=libero_plus \
-                --env.task=\"\$LIBERO_PLUS_SUITE\" \
-                --env.task_ids=\"\$LIBERO_PLUS_TASK_IDS\" \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--env.camera_name_mapping={\"agentview_image\": \"camera1\", \"robot0_eye_in_hand_image\": \"camera2\"}' \
-                --policy.empty_cameras=1 \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env libero_plus --task \"\$LIBERO_PLUS_SUITE\" \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy LIBERO-plus artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/libero-plus-artifacts
-          docker cp libero-plus-eval:/tmp/eval-artifacts/. /tmp/libero-plus-artifacts/ 2>/dev/null || true
-          docker rm -f libero-plus-eval || true
-
-      - name: Parse LIBERO-plus eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/libero-plus-artifacts \
-            --env libero_plus \
-            --task "${LIBERO_PLUS_SUITE}" \
-            --policy "${LIBERO_PLUS_POLICY}"
-
-      - name: Upload LIBERO-plus rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: libero-plus-rollout-video
-          path: /tmp/libero-plus-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload LIBERO-plus eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: libero-plus-metrics
-          path: /tmp/libero-plus-artifacts/metrics.json
-          if-no-files-found: warn
-
-  # ── VLABENCH ─────────────────────────────────────────────────────────────
-  # Isolated image: lerobot[vlabench] only (VLABench, mujoco==3.2.2, dm-control chain)
-  vlabench-integration-test:
-    name: VLABench — build image + 1-episode eval
-    runs-on:
-      group: aws-g6-4xlarge-plus
-    env:
-      HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }}
-
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-          lfs: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          cache-binary: false
-
-      - name: Login to Docker Hub
-        if: ${{ env.DOCKERHUB_USERNAME != '' }}
-        uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses]
-        with:
-          username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }}
-        env:
-          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }}
-
-      - name: Build VLABench benchmark image
-        uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses]
-        with:
-          context: .
-          file: docker/Dockerfile.benchmark.vlabench
-          push: false
-          load: true
-          tags: lerobot-benchmark-vlabench:ci
-          build-args: |
-            VLABENCH_ASSETS_REPO=lerobot/vlabench-assets
-
-      - name: Run VLABench smoke eval (10 tasks, 1 episode each)
-        if: env.HF_USER_TOKEN != ''
-        run: |
-          docker run --name vlabench-eval --gpus all \
-            --shm-size=4g \
-            -e HF_HOME=/tmp/hf \
-            -e HF_USER_TOKEN="${HF_USER_TOKEN}" \
-            -e HF_HUB_DOWNLOAD_TIMEOUT=300 \
-            -e MUJOCO_GL=egl \
-            lerobot-benchmark-vlabench:ci \
-            bash -c "
-              hf auth login --token \"\$HF_USER_TOKEN\" --add-to-git-credential 2>/dev/null || true
-              lerobot-eval \
-                --policy.path=lerobot/smolvla_vlabench \
-                --env.type=vlabench \
-                --env.task=select_fruit,select_toy,select_book,select_painting,select_drink,select_ingredient,select_billiards,select_poker,add_condiment,insert_flower \
-                --eval.batch_size=1 \
-                --eval.n_episodes=1 \
-                --eval.use_async_envs=false \
-                --policy.device=cuda \
-                '--rename_map={\"observation.images.image\": \"observation.images.camera1\", \"observation.images.second_image\": \"observation.images.camera2\", \"observation.images.wrist_image\": \"observation.images.camera3\"}' \
-                --output_dir=/tmp/eval-artifacts
-              python scripts/ci/extract_task_descriptions.py \
-                --env vlabench \
-                --task select_fruit,select_toy,select_book,select_painting,select_drink,select_ingredient,select_billiards,select_poker,add_condiment,insert_flower \
-                --output /tmp/eval-artifacts/task_descriptions.json
-            "
-
-      - name: Copy VLABench artifacts from container
-        if: always()
-        run: |
-          mkdir -p /tmp/vlabench-artifacts
-          docker cp vlabench-eval:/tmp/eval-artifacts/. /tmp/vlabench-artifacts/ 2>/dev/null || true
-          docker rm -f vlabench-eval || true
-
-      - name: Parse VLABench eval metrics
-        if: always()
-        run: |
-          python3 scripts/ci/parse_eval_metrics.py \
-            --artifacts-dir /tmp/vlabench-artifacts \
-            --env vlabench \
-            --task select_fruit,select_toy,select_book,select_painting,select_drink,select_ingredient,select_billiards,select_poker,add_condiment,insert_flower \
-            --policy lerobot/smolvla_vlabench
-
-      - name: Upload VLABench rollout video
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: vlabench-rollout-video
-          path: /tmp/vlabench-artifacts/videos/
-          if-no-files-found: warn
-
-      - name: Upload VLABench eval metrics
-        if: always()
-        uses: actions/upload-artifact@v4 # zizmor: ignore[unpinned-uses]
-        with:
-          name: vlabench-metrics
-          path: /tmp/vlabench-artifacts/metrics.json
-          if-no-files-found: warn
@@ -33,7 +33,7 @@ jobs:
      github.event.workflow_run.event == 'pull_request' &&
      github.event.workflow_run.conclusion == 'success' &&
      github.repository == 'huggingface/lerobot'
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@9ad2de8582b56c017cb530c1165116d40433f1c6  # main
    with:
      package_name: lerobot
    secrets:
@@ -55,7 +55,7 @@ jobs:
      github.repository == 'huggingface/lerobot'
    permissions:
      contents: read
-    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3  # main
    with:
      commit_sha: ${{ github.sha }}
      package: lerobot
@@ -78,7 +78,7 @@ jobs:
    permissions:
      contents: read
      pull-requests: write
-    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@2430c1ec91d04667414e2fa31ecfc36c153ea391  # main
+    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@90b4ee2c10b81b5c1a6367c4e6fc9e2fb510a7e3  # main
    with:
      commit_sha: ${{ github.event.pull_request.head.sha }}
      pr_number: ${{ github.event.number }}
@@ -1,7 +1,5 @@
 This file provides guidance to AI agents when working with code in this repository.

-> **User-facing help → [`AGENT_GUIDE.md`](./AGENT_GUIDE.md)** (SO-101 setup, recording, picking a policy, training duration, eval — with copy-pasteable commands).
-
 ## Project Overview

 LeRobot is a PyTorch-based library for real-world robotics, providing datasets, pretrained policies, and tools for training, evaluation, data collection, and robot control. It integrates with Hugging Face Hub for model/dataset sharing.
@@ -1,410 +0,0 @@
-# AGENT_GUIDE.md — LeRobot Helper for AI Agents & Users
-
-This file is a practical, copy-paste-friendly companion for any AI agent (Cursor, Claude, ChatGPT, Codex, etc.) helping a user work with LeRobot. It complements [`AGENTS.md`](./AGENTS.md) (dev/contributor context) with **user-facing guidance**: how to start, what to train, how long, how to record, and how to calibrate an SO-101.
-
---
-
-## 1. Start here — ask the user first (MANDATORY)
-
-Before suggesting any command, an agent MUST ask the user at least these questions and wait for answers:
-
-1. **What's your goal?** (e.g. "teach my SO-101 to fold a cloth", "train a policy on an existing HF dataset", "contribute a PR", "understand the codebase")
-2. **What hardware do you have?**
-   - Robot: none / SO-100 / SO-101 / Koch / LeKiwi / Reachy / other
-   - Teleop: leader arm / phone / keyboard / gamepad / none
-   - Cameras: how many, resolution, fixed or moving?
-3. **What machine will you train on?**
-   - GPU model + VRAM (e.g. "laptop 3060 6 GB", "RTX 4090 24 GB", "A100 80 GB", "CPU only")
-   - OS: macOS / Linux / Windows
-4. **Skill level & time budget?** First time, some ML, experienced? Hours, days, a weekend?
-5. **Do you already have a dataset?** Yes (HF repo id?) / no / want to record one
-6. **How can I help right now?** (pick one concrete next step)
-
-Only after you have answers, propose a concrete path. If something is ambiguous, ask again rather than guessing. Bias toward **the simplest thing that works** for the user's hardware and goal.
-
---
-
-## 2. LeRobot in 60 seconds
-
-LeRobot = **datasets + policies + envs + robot control**, unified by a small set of strong abstractions.
-
- **`LeRobotDataset`** — episode-aware dataset (video or images + actions + state), loadable from the Hub or disk.
- **Policies** (`ACT`, `Diffusion`, `SmolVLA`, `π0`, `π0.5`, `Wall-X`, `X-VLA`, `VQ-BeT`, `TD-MPC`, …) — all inherit `PreTrainedPolicy` and can be pushed/pulled from the Hub.
- **Processors** — small composable transforms between dataset → policy → robot.
- **Envs** (sim) and **Robots** (real) — same action/observation contract so code swaps cleanly.
- **CLI** — `lerobot-record`, `lerobot-train`, `lerobot-eval`, `lerobot-teleoperate`, `lerobot-calibrate`, `lerobot-find-port`, `lerobot-setup-motors`, `lerobot-replay`.
-
-See [`AGENTS.md`](./AGENTS.md) for repo architecture.
-
---
-
-## 3. Quickstart paths (pick one)
-
-### Path A — "I have an SO-101 and want my first trained policy"
-
-Go to §4 (SO-101 end-to-end), then §5 (data tips), then §6 (pick a policy — likely **ACT**), then §7 (how long), then §8 (eval).
-
-### Path B — "No hardware, I want to train on an existing dataset"
-
-Skip §4. Pick a policy in §6, pick a duration in §7, then run `lerobot-train` per §4.9 with a Hub `--dataset.repo_id` and an `--env.type` for eval. Finish with §8.
-
-### Path C — "I just want to understand the codebase"
-
-Read §2 above, then `AGENTS.md` "Architecture", then open `src/lerobot/policies/act/` and `src/lerobot/datasets/lerobot_dataset.py` as canonical examples.
-
---
-
-## 4. SO-101 end-to-end cheat-sheet
-
-Full details in [`docs/source/so101.mdx`](./docs/source/so101.mdx) and [`docs/source/il_robots.mdx`](./docs/source/il_robots.mdx). Minimum commands in order. Confirm arms are assembled + powered before issuing.
-
-**4.1 Install**
-
-```bash
-pip install 'lerobot[feetech]'              # SO-100/SO-101 motor stack
-# pip install 'lerobot[all]'                # everything
-# pip install 'lerobot[aloha,pusht]'        # specific features
-# pip install 'lerobot[smolvla]'            # add SmolVLA deps
-git lfs install && git lfs pull
-hf auth login                               # required to push datasets/policies
-```
-
-Contributors can alternatively use `uv sync --locked --extra feetech` (see `AGENTS.md`).
-
-**4.2 Find USB ports** — run once per arm, unplug when prompted.
-
-```bash
-lerobot-find-port
-```
-
-macOS: `/dev/tty.usbmodem...`; Linux: `/dev/ttyACM0` (may need `sudo chmod 666 /dev/ttyACM0`).
-
-**4.3 Setup motor IDs & baudrate** (one-time, per arm)
-
-```bash
-lerobot-setup-motors --robot.type=so101_follower --robot.port=<FOLLOWER_PORT>
-lerobot-setup-motors --teleop.type=so101_leader  --teleop.port=<LEADER_PORT>
-```
-
-**4.4 Calibrate** — center all joints, press Enter, sweep each joint through its full range. The `id` is the calibration key — reuse it everywhere.
-
-```bash
-lerobot-calibrate --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower
-lerobot-calibrate --teleop.type=so101_leader  --teleop.port=<LEADER_PORT>   --teleop.id=my_leader
-```
-
-**4.5 Teleoperate** (sanity check, no recording)
-
-```bash
-lerobot-teleoperate \
-  --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower \
-  --teleop.type=so101_leader  --teleop.port=<LEADER_PORT>  --teleop.id=my_leader \
-  --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-  --display_data=true
-```
-
-> **Feetech timeout / comms error on SO-100 / SO-101?** Before touching software, check the **red motor LEDs** on the daisy chain.
->
-> - **All steady red, gripper → base chain** → wiring OK.
-> - **One or more motors dark / chain stops mid-way** → wiring issue: reseat the 3-pin cables, check the controller-board power supply, and make sure each motor is fully clicked in.
-> - **LEDs blinking** → the motor is in an **error state**: usually overload (forcing a joint past its limit) **or wrong power supply voltage**. SO-100 / SO-101 ship in two variants — a **5 V / 7.4 V** build and a **12 V** build — they are NOT interchangeable. Using a 12 V PSU on a 5 V / 7.4 V arm (or vice-versa) will trip this error; confirm your motor variant before powering up.
->
-> Most "timeout" errors are physical, not code.
-
-**4.6 Record a dataset** — keys: **→** next, **←** redo, **ESC** finish & upload.
-
-```bash
-HF_USER=$(NO_COLOR=1 hf auth whoami | awk -F': *' 'NR==1 {print $2}')
-
-lerobot-record \
-  --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower \
-  --teleop.type=so101_leader  --teleop.port=<LEADER_PORT>  --teleop.id=my_leader \
-  --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-  --dataset.repo_id=${HF_USER}/my_task \
-  --dataset.single_task="<describe the task in one sentence>" \
-  --dataset.num_episodes=50 \
-  --dataset.episode_time_s=30 \
-  --dataset.reset_time_s=10 \
-  --display_data=true
-```
-
-**4.7 Visualize** — **always** do this before training. Look for missing frames, camera blur, unreachable targets, inconsistent object positions.
-After upload: https://huggingface.co/spaces/lerobot/visualize_dataset → paste `${HF_USER}/my_task`. Works for **any LeRobot-formatted Hub dataset** — use it to scout other datasets, inspect episode quality, or debug your own data before retraining.
-
-**4.8 Replay an episode** (sanity check)
-
-```bash
-lerobot-replay --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower \
-  --dataset.repo_id=${HF_USER}/my_task --dataset.episode=0
-```
-
-**4.9 Train** (default: ACT — fastest, lowest memory). Apple silicon: `--policy.device=mps`. See §6/§7 for policy and duration.
-
-```bash
-lerobot-train \
-  --dataset.repo_id=${HF_USER}/my_task \
-  --policy.type=act \
-  --policy.device=cuda \
-  --output_dir=outputs/train/act_my_task \
-  --job_name=act_my_task \
-  --batch_size=8 \
-  --wandb.enable=true \
-  --policy.repo_id=${HF_USER}/act_my_task
-```
-
-**4.10 Evaluate on the real robot** — compare success rate to a teleoperated baseline.
-
-```bash
-lerobot-record \
-  --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower \
-  --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-  --dataset.repo_id=${HF_USER}/eval_my_task \
-  --dataset.single_task="<same task description as training>" \
-  --dataset.num_episodes=10 \
-  --policy.path=${HF_USER}/act_my_task
-```
-
---
-
-## 5. Data collection tips (beginner → reliable policy)
-
-Good data beats clever models. Adopt these defaults and deviate only with evidence.
-
-### 5.1 Setup & ergonomics
-
- **Fix the rig and cameras** before touching the software. If the rig vibrates or the operator gets frustrated, fix that first — more bad data won't help.
- **Lighting matters more than resolution.** Diffuse, consistent light. Avoid moving shadows.
- **"Can you do the task from the camera view alone?"** If no, your cameras are wrong. Fix before recording.
- Enable **action interpolation** for rollouts when available for smoother trajectories.
-
-### 5.2 Practice before you record
-
- Do 5–10 demos without recording. Build a deliberate, repeatable strategy.
- Hesitant or inconsistent demos teach the model hesitation.
-
-### 5.3 Quality over speed
-
-Deliberate, high-quality execution beats fast sloppy runs. Optimize for speed only **after** strategy is dialed in — never trade quality for it.
-
-### 5.4 Consistency within and across episodes
-
-Same grasp, approach vector, and timing. Coherent strategies are much easier to learn than wildly varying movements.
-
-### 5.5 Start small, then extend (the golden rule)
-
- **First 50 episodes = constrained version** of the task: one object, fixed position, fixed camera setup, one operator.
- Train a quick ACT model. See what fails.
- **Then add diversity** along one axis at a time: more positions → more lighting → more objects → more operators.
- Don't try to collect the "perfect dataset" on day one. Iterate.
-
-### 5.6 Policy choice for beginners
-
- **Laptop / first time / want results fast → ACT.** Works surprisingly well, trains fast even on a laptop GPU.
- **Bigger GPU / language-conditioned / multi-task → SmolVLA.** Unfreezing the vision encoder (see §7) is a big win here.
- Defer π0 / π0.5 / Wall-X / X-VLA until you have a proven ACT baseline and a 20+ GB GPU.
-
-### 5.7 Recommended defaults for your first task
-
-| Setting          | Value                                                                                                                                                 |
-| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Episodes         | **50** to start, scale to 100–300 after first training                                                                                                |
-| Episode length   | 20–45 s (shorter is fine for grasp/place)                                                                                                             |
-| Reset time       | 10 s                                                                                                                                                  |
-| FPS              | 30                                                                                                                                                    |
-| Cameras          | **2 cameras recommended**: 1 fixed front + 1 wrist. Multi-view often outperforms single-view. A single fixed camera also works to keep things simple. |
-| Task description | Short, specific, action-phrased sentence                                                                                                              |
-
-### 5.8 Troubleshooting signal
-
- Policy fails at one specific stage → record 10–20 more episodes **targeting that stage**.
- Policy flaps / oscillates → likely inconsistent demos, or need more training; re-record worst episodes (use **←** to redo).
- Policy ignores the object → camera framing or lighting issue, not a model issue.
-
-See also: [What makes a good dataset](https://huggingface.co/blog/lerobot-datasets#what-makes-a-good-dataset).
-
---
-
-## 6. Which policy should I train?
-
-Match the policy to the user's **GPU memory** and **time budget**. Numbers below come from an internal profiling run (one training update per policy). They are **indicative only** — see caveats.
-
-### 6.1 Profiling snapshot (indicative)
-
-All policies typically train for **5–10 epochs** (see §7).
-
-| Policy      | Batch | Update (ms) | Peak GPU mem (GB) | Best for                                                                                         |
-| ----------- | ----: | ----------: | ----------------: | ------------------------------------------------------------------------------------------------ |
-| `act`       |     4 |    **83.9** |          **0.94** | First-time users, laptops, single-task. Fast and reliable.                                       |
-| `diffusion` |     4 |       168.6 |              4.94 | Multi-modal action distributions; needs mid-range GPU.                                           |
-| `smolvla`   |     1 |       357.8 |              3.93 | Language-conditioned, multi-task, small VLA. **Unfreeze vision encoder for big gains** (see §7). |
-| `xvla`      |     1 |       731.6 |             15.52 | Large VLA, multi-task.                                                                           |
-| `wall_x`    |     1 |       716.5 |             15.95 | Large VLA with world-model objective.                                                            |
-| `pi0`       |     1 |       940.3 |             15.50 | Strong large VLA baseline (Physical Intelligence).                                               |
-| `pi05`      |     1 |      1055.8 |             16.35 | Newer π policy; similar footprint to `pi0`.                                                      |
-
-**Critical caveats:**
-
- **Optimizer:** measured with **SGD**. LeRobot's default is **AdamW**, which keeps extra optimizer state → **peak memory will be noticeably higher** with the default, especially for `pi0`, `pi05`, `wall_x`, `xvla`.
- **Batch size:** the large policies were profiled at batch 1. In practice use a **larger batch** for stable training (see §7.4). Memory scales roughly linearly with batch.
-
-### 6.2 Decision rules
-
- **< 8 GB VRAM (laptop, 3060, M-series Mac):** → `act`. Maybe `diffusion` if you have ~6–8 GB free.
- **12–16 GB VRAM (4070/4080, A4000):** → `smolvla` with defaults, or `act`/`diffusion` with larger batch. `pi0`/`pi05`/`wall_x`/`xvla` feasible only with small batch + gradient accumulation.
- **24+ GB VRAM (3090/4090/A5000):** → any policy. Prefer `smolvla` (unfrozen) for multi-task; `act` for single-task grasp-and-place (still often the best ROI). Could experiment with `pi0` or `pi05` or `xvla`
- **80 GB (A100/H100):** → any, with healthy batch. `pi05`, `xvla`, `wall_x` become comfortable.
- **CPU only:** → don't train here. Use Google Colab (see [`docs/source/notebooks.mdx`](./docs/source/notebooks.mdx)) or a rented GPU.
-
---
-
-## 7. How long should I train?
-
-Robotics imitation learning usually converges in a **few epochs over the dataset**, not hundreds of thousands of raw steps. Think **epochs first**, then translate to steps.
-
-### 7.1 Rule of thumb
-
- **Typical total: 5–10 epochs.** Start at 5, eval, then decide if more helps.
- Very small datasets (< 30 episodes) may want slightly more epochs — but first, **collect more data**.
- VLAs with a pretrained vision backbone typically need **fewer** epochs than training from scratch.
-
-### 7.2 Steps ↔ epochs conversion
-
-```
-total_frames     = sum of frames over all episodes      # e.g. 50 eps × 30 fps × 30 s ≈ 45,000
-steps_per_epoch  = ceil(total_frames / batch_size)
-total_steps      = epochs × steps_per_epoch
-```
-
-Examples for `--batch_size=8`:
-
-| Dataset size            |  Frames | Steps / epoch | 5 epochs | 10 epochs |
-| ----------------------- | ------: | ------------: | -------: | --------: |
-| 50 eps × 30 s @ 30 fps  |  45,000 |        ~5,625 |      28k |       56k |
-| 100 eps × 30 s @ 30 fps |  90,000 |       ~11,250 |      56k |      113k |
-| 300 eps × 30 s @ 30 fps | 270,000 |       ~33,750 |     169k |      338k |
-
-Pass the resulting total with `--steps=<N>`; eval at intermediate checkpoints (`outputs/train/.../checkpoints/`).
-
-### 7.3 Per-policy starting points (single-task, ~50 episodes)
-
-| Policy         | Batch | Steps (first run) | Notes                                                             |
-| -------------- | ----: | ----------------: | ----------------------------------------------------------------- |
-| `act`          |  8–16 |           30k–80k | Usually converges under 50k for single-task.                      |
-| `diffusion`    |  8–16 |          80k–150k | Benefits from longer training than ACT.                           |
-| `smolvla`      |   4–8 |           30k–80k | Pretrained VLM → converges fast.                                  |
-| `pi0` / `pi05` |   1–4 |           30k–80k | Memory-bound; use gradient accumulation for effective batch ≥ 16! |
-
-### 7.4 Batch size guidance
-
- **Bigger batch is preferable** for stable gradients on teleop data.
- If GPU memory is the bottleneck, use **gradient accumulation** to raise _effective_ batch without raising peak memory.
- Scale **learning rate** gently with batch; most LeRobot defaults work fine for a 2–4× batch change.
-
-### 7.5 Scale LR schedule & checkpoints with `--steps`
-
-LeRobot's default schedulers (e.g. SmolVLA's cosine decay) use `scheduler_decay_steps=30_000`, which is sized for long training runs. When you shorten training (e.g. 5k–10k steps on a small dataset), **scale the scheduler down to match** — otherwise the LR stays near the peak and never decays. Same for checkpoint frequency.
-
-```bash
-lerobot-train ... \
-  --steps=5000 \
-  --policy.scheduler_decay_steps=5000 \
-  --save_freq=5000
-```
-
-Rule of thumb: set `scheduler_decay_steps ≈ steps`, and `save_freq` to whatever granularity you want for eval (e.g. every 1k–5k steps). Match `scheduler_warmup_steps` proportionally if your run is very short.
-
-### 7.6 SmolVLA: unfreeze the vision encoder for real gains
-
-SmolVLA ships with `freeze_vision_encoder=True`. Unfreezing usually **improves performance substantially** on specialized tasks, at the cost of more VRAM and slower steps. Enable with:
-
-```bash
-lerobot-train ... --policy.type=smolvla \
-  --policy.freeze_vision_encoder=false \
-  --policy.train_expert_only=false
-```
-
-### 7.7 Signals to stop / keep going
-
- Train loss plateaus → stop, save a Hub checkpoint.
- Train loss still dropping and you're under 10 epochs → keep going.
-
---
-
-## 8. Evaluation & benchmarks
-
-Two flavors of evaluation:
-
-### 8.1 Real-robot eval (SO-101, etc.)
-
-Reuse `lerobot-record` with `--policy.path` to run the trained policy on-robot and save the run as an eval dataset. Convention: prefix the dataset with `eval_`.
-
-```bash
-lerobot-record \
-  --robot.type=so101_follower --robot.port=<FOLLOWER_PORT> --robot.id=my_follower \
-  --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-  --dataset.repo_id=${HF_USER}/eval_my_task \
-  --dataset.single_task="<same task description used during training>" \
-  --dataset.num_episodes=10 \
-  --policy.path=${HF_USER}/act_my_task
-```
-
-Report success rate across episodes. Compare to a teleoperated baseline and to an earlier checkpoint to catch regressions.
-
-### 8.2 Sim-benchmark eval
-
-For policies trained on sim datasets (PushT, Aloha, LIBERO, MetaWorld, RoboCasa, …) use `lerobot-eval` against the matching `env.type`:
-
-```bash
-lerobot-eval \
-  --policy.path=${HF_USER}/diffusion_pusht \
-  --env.type=pusht \
-  --eval.n_episodes=50 \
-  --eval.batch_size=10 \
-  --policy.device=cuda
-```
-
- Use `--policy.path=outputs/train/.../checkpoints/<step>/pretrained_model` for local checkpoints.
- `--eval.n_episodes` should be ≥ 50 for a stable success-rate estimate.
- Available envs live in `src/lerobot/envs/`. See [`docs/source/libero.mdx`](./docs/source/libero.mdx), [`metaworld.mdx`](./docs/source/metaworld.mdx), [`robocasa.mdx`](./docs/source/robocasa.mdx), [`vlabench.mdx`](./docs/source/vlabench.mdx) for specific benchmarks.
- To add a new benchmark, see [`docs/source/adding_benchmarks.mdx`](./docs/source/adding_benchmarks.mdx) and [`envhub.mdx`](./docs/source/envhub.mdx).
-
-### 8.2b Dockerfiles for benchmark eval
-
-Benchmark envs have native dependencies that are painful to install locally. The repo ships **pre-baked Dockerfiles** for each supported benchmark — use these to run `lerobot-eval` in a reproducible environment:
-
-| Benchmark   | Dockerfile                                                                             |
-| ----------- | -------------------------------------------------------------------------------------- |
-| LIBERO      | [`docker/Dockerfile.benchmark.libero`](./docker/Dockerfile.benchmark.libero)           |
-| LIBERO+     | [`docker/Dockerfile.benchmark.libero_plus`](./docker/Dockerfile.benchmark.libero_plus) |
-| MetaWorld   | [`docker/Dockerfile.benchmark.metaworld`](./docker/Dockerfile.benchmark.metaworld)     |
-| RoboCasa    | [`docker/Dockerfile.benchmark.robocasa`](./docker/Dockerfile.benchmark.robocasa)       |
-| RoboCerebra | [`docker/Dockerfile.benchmark.robocerebra`](./docker/Dockerfile.benchmark.robocerebra) |
-| RoboMME     | [`docker/Dockerfile.benchmark.robomme`](./docker/Dockerfile.benchmark.robomme)         |
-| RoboTwin    | [`docker/Dockerfile.benchmark.robotwin`](./docker/Dockerfile.benchmark.robotwin)       |
-| VLABench    | [`docker/Dockerfile.benchmark.vlabench`](./docker/Dockerfile.benchmark.vlabench)       |
-
-Build and run (adapt to your benchmark):
-
-```bash
-docker build -f docker/Dockerfile.benchmark.robomme -t lerobot-bench-robomme .
-docker run --gpus all --rm -it \
-  -v $HOME/.cache/huggingface:/root/.cache/huggingface \
-  lerobot-bench-robomme \
-  lerobot-eval --policy.path=<your_policy> --env.type=<env> --eval.n_episodes=50
-```
-
-See [`docker/README.md`](./docker/README.md) for base-image details.
-
-### 8.3 Target success rates
-
-Single-task grasp-and-place with 50 clean episodes: ACT should reach **> 70% success** on the training configuration. Less → data problem (see §5), not model problem. Expect a drop when generalizing to new positions — scale episodes or diversity to recover.
-
---
-
-## 9. Further reading & resources
-
- **Getting started:** [`installation.mdx`](./docs/source/installation.mdx) · [`il_robots.mdx`](./docs/source/il_robots.mdx) · [What makes a good dataset](https://huggingface.co/blog/lerobot-datasets)
- **Per-policy docs:** browse [`docs/source/*.mdx`](./docs/source/) (policies, hardware, benchmarks, advanced training).
- **Community:** [Discord](https://discord.com/invite/s3KuuzsPFb) · [Hub `LeRobot` tag](https://huggingface.co/datasets?other=LeRobot) · [Dataset visualizer](https://huggingface.co/spaces/lerobot/visualize_dataset)
-
-> Keep this file current. If you learn a rule that would prevent a class of user mistakes, add it here and in [`AGENTS.md`](./AGENTS.md).
@@ -1,84 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for LIBERO-plus integration tests.
-# Extends the nightly GPU image (which has lerobot[all]) with the LIBERO-plus
-# fork source + its 6.4 GB perturbation assets.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.libero_plus -t lerobot-benchmark-libero-plus .
-# Run:    docker run --gpus all --rm lerobot-benchmark-libero-plus lerobot-eval ...
-
-FROM huggingface/lerobot-gpu:latest
-ENV MUJOCO_GL=egl
-
-# unzip for the 6.4 GB assets.zip; the rest are LIBERO-plus build-time extras
-# (wand / ImageMagick / fontconfig) not in the nightly base.
-USER root
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-         unzip libexpat1 libfontconfig1-dev libmagickwand-dev \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-USER user_lerobot
-
-# robosuite==1.4.1 is mandatory (the fork uses `single_arm_env` removed in
-# v1.5+). The rest are LIBERO-plus runtime deps pulled from its setup.py.
-# We install these explicitly instead of via the [libero_plus] extra because
-# the extra's `libero @ git+...` dep installs as a namespace package and then
-# clone and PYTHONPATH-override it below.
-RUN uv pip install --no-cache \
-        "robosuite==1.4.1" \
-        "bddl==1.0.1" \
-        "easydict==1.13" \
-        "mujoco==3.7.0" \
-        "matplotlib==3.10.8" \
-        "Wand==0.6.13" \
-        "scikit-image==0.25.2" \
-        "gym==0.26.2"
-
-# Clone LIBERO-plus and make it importable as `libero`. The nightly base has
-# hf-libero (10 tasks) preinstalled via lerobot[libero]; uninstall it so
-# Python resolves `import libero` to the 2402-task LIBERO-plus module instead.
-# Pinned to the current upstream main SHA so benchmark builds stay reproducible.
-ARG LIBERO_PLUS_SHA=4976dc3
-ENV LIBERO_PLUS_ROOT=/home/user_lerobot/libero-plus/libero/libero
-RUN git clone https://github.com/sylvestf/LIBERO-plus.git /home/user_lerobot/libero-plus \
-    && git -C /home/user_lerobot/libero-plus checkout ${LIBERO_PLUS_SHA} \
-    && cd /home/user_lerobot/libero-plus && uv pip install --no-cache --no-deps -e "." \
-    && (uv pip uninstall hf-libero 2>/dev/null || true)
-ENV PYTHONPATH="/home/user_lerobot/libero-plus:${PYTHONPATH}"
-
-# Perturbation textures/scenes: bddl_base_domain.py resolves XMLs via
-# DIR_PATH/../assets (package-relative, ignoring ~/.libero/config.yaml). All
-# 2402 tasks reference files that ship only in Sylvest/LIBERO-plus's
-# assets.zip (6.4 GB) under a deep author-internal prefix — extract and
-# flatten it under ${LIBERO_PLUS_ROOT}/assets.
-RUN python -c "\
-from huggingface_hub import hf_hub_download; \
-hf_hub_download(repo_id='Sylvest/LIBERO-plus', repo_type='dataset', \
-                filename='assets.zip', local_dir='/tmp/libero-plus-dl')" \
-    && unzip -q /tmp/libero-plus-dl/assets.zip -d /tmp/libero-plus-dl/extract \
-    && ASSETS_DIR=$(find /tmp/libero-plus-dl/extract -type d -name assets | head -1) \
-    && mv "${ASSETS_DIR}" ${LIBERO_PLUS_ROOT}/assets \
-    && rm -rf /tmp/libero-plus-dl
-
-# Point ~/.libero/config.yaml at the clone so LIBERO-plus's imports are
-# non-interactive (it calls input() when the config is missing).
-RUN mkdir -p /home/user_lerobot/.libero \
-    && printf "assets: ${LIBERO_PLUS_ROOT}/assets\nbddl_files: ${LIBERO_PLUS_ROOT}/bddl_files\ndatasets: ${LIBERO_PLUS_ROOT}/../datasets\ninit_states: ${LIBERO_PLUS_ROOT}/init_files\n" \
-       > /home/user_lerobot/.libero/config.yaml
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
@@ -1,71 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for RoboCasa365 integration tests.
-# Extends the nightly GPU image (which already has all extras installed)
-# with the PR's source code and RoboCasa-specific asset setup.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.robocasa -t lerobot-benchmark-robocasa .
-# Run:    docker run --gpus all --rm lerobot-benchmark-robocasa lerobot-eval ...
-
-FROM huggingface/lerobot-gpu:latest
-
-# Install robocasa + robosuite as editable clones. pip-installing from git
-# omits data files like robocasa/models/assets/box_links/box_links_assets.json
-# (not declared in package_data), which download_kitchen_assets needs at import.
-#
-# `--no-deps` on robocasa is deliberate: its setup.py pins `lerobot==0.3.3`
-# in install_requires, which would shadow the editable lerobot baked into
-# this image. We install robocasa's actual runtime deps explicitly instead.
-# Pinned SHAs for reproducible benchmark runs. Bump when you need an
-# upstream fix; don't rely on `main`/`master` drift.
-ARG ROBOCASA_SHA=56e355ccc64389dfc1b8a61a33b9127b975ba681
-ARG ROBOSUITE_SHA=aaa8b9b214ce8e77e82926d677b4d61d55e577ab
-RUN git clone https://github.com/robocasa/robocasa.git ~/robocasa && \
-    git -C ~/robocasa checkout ${ROBOCASA_SHA} && \
-    git clone https://github.com/ARISE-Initiative/robosuite.git ~/robosuite && \
-    git -C ~/robosuite checkout ${ROBOSUITE_SHA} && \
-    uv pip install --no-cache -e ~/robocasa --no-deps && \
-    uv pip install --no-cache -e ~/robosuite && \
-    uv pip install --no-cache \
-      "numpy==2.2.5" "numba==0.61.2" "scipy==1.15.3" "mujoco==3.3.1" \
-      "pygame==2.6.1" "Pillow==12.2.0" "opencv-python==4.13.0.92" \
-      "pyyaml==6.0.3" "pynput==1.8.1" "tqdm==4.67.3" "termcolor==3.3.0" \
-      "imageio==2.37.3" "h5py==3.16.0" "lxml==6.0.4" "hidapi==0.14.0.post4" \
-      "tianshou==0.4.10" "gymnasium==1.2.3"
-
-# Set up robocasa macros and download kitchen assets. We need:
-#   - tex              : base environment textures
-#   - tex_generative   : AI-generated textures; kitchen fixture XMLs embed
-#                        refs to generative_textures/wall/tex*.png
-#                        unconditionally, so MjModel.from_xml_string fails
-#                        at reset time without them (even if the env is
-#                        constructed with generative_textures=None).
-#   - fixtures_lw      : lightwheel kitchen fixtures (fridge, counters...)
-#   - objs_lw          : lightwheel object meshes (stools, misc props)
-# We skip the objaverse/aigen object packs (~30GB combined) by pairing
-# this with --env.obj_registries=["lightwheel"] on the lerobot side.
-# The download script prompts interactively, so pipe 'y' to auto-accept.
-RUN python -m robocasa.scripts.setup_macros && \
-    yes y | python -m robocasa.scripts.download_kitchen_assets \
-      --type tex tex_generative fixtures_lw objs_lw
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-# Re-install lerobot editably so the new source (with RoboCasaEnv registration)
-# replaces the stale package baked into the nightly image.
-RUN uv pip install --no-cache --no-deps -e .
-
-CMD ["/bin/bash"]
@@ -1,43 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for RoboCerebra integration tests.
-# RoboCerebra reuses LIBERO's simulator (libero_10 suite) with a different
-# rename_map, so this image is identical to the LIBERO benchmark image —
-# extends the nightly GPU base with LIBERO assets + the PR's source code.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.robocerebra -t lerobot-benchmark-robocerebra .
-# Run:    docker run --gpus all --rm lerobot-benchmark-robocerebra lerobot-eval ...
-
-FROM huggingface/lerobot-gpu:latest
-
-# Pre-download lerobot/libero-assets from HF Hub so nothing is fetched at
-# runtime (which times out on CI). Point the libero config at the cached path.
-# libero/libero/__init__.py calls input() when ~/.libero/config.yaml is missing,
-# so we write the config before any libero import can happen.
-RUN LIBERO_DIR=$(python -c \
-      "import importlib.util, os; s=importlib.util.find_spec('libero'); \
-       print(os.path.join(os.path.dirname(s.origin), 'libero'))") && \
-    mkdir -p /home/user_lerobot/.libero && \
-    python -c "\
-from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='lerobot/libero-assets', repo_type='dataset', \
-                  local_dir='/home/user_lerobot/.libero/assets')" && \
-    printf "assets: /home/user_lerobot/.libero/assets\nbddl_files: ${LIBERO_DIR}/bddl_files\ndatasets: ${LIBERO_DIR}/../datasets\ninit_states: ${LIBERO_DIR}/init_files\n" \
-    > /home/user_lerobot/.libero/config.yaml
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
@@ -1,56 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for RoboMME integration tests.
-# Extends the nightly GPU image (which has lerobot[all]) with Vulkan system
-# libs for ManiSkill/SAPIEN and the robomme extra. robomme isn't in [all]
-# because mani-skill hard-pins gymnasium==0.29.1 and numpy<2.0.0 which
-# conflict with lerobot's defaults; both are safe at runtime:
-#   - gymnasium 0.29.x has the same 5-tuple step() API as 1.x (since 0.26)
-#   - numpy 1.26.4 is API-compatible with lerobot's actual usage.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.robomme -t lerobot-benchmark-robomme .
-# Run:    docker run --gpus all --rm lerobot-benchmark-robomme lerobot-eval ...
-
-FROM huggingface/lerobot-gpu:latest
-
-# NVIDIA Container Toolkit: expose Vulkan driver capability for headless rendering.
-ENV NVIDIA_DRIVER_CAPABILITIES=all \
-    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json
-
-# ManiSkill/SAPIEN's renderer needs Vulkan, which isn't in the base image.
-USER root
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-         libvulkan1 libvulkan-dev mesa-vulkan-drivers \
-    && mkdir -p /usr/share/vulkan/icd.d \
-    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
-       > /usr/share/vulkan/icd.d/nvidia_icd.json \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-USER user_lerobot
-
-# Install smolvla + av-dep via the PR's pyproject, then layer robomme on top
-# with gymnasium/numpy overrides. robomme isn't a pyproject extra because its
-# mani-skill pin conflicts with lerobot's base numpy>=2 (see pyproject.toml).
-COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml uv.lock README.md MANIFEST.in ./
-RUN printf 'gymnasium==0.29.1\nnumpy==1.26.4\n' > /tmp/robomme_override.txt \
-    && uv pip install --no-cache --override /tmp/robomme_override.txt \
-         -e ".[smolvla,av-dep]" \
-         "robomme @ git+https://github.com/RoboMME/robomme_benchmark.git@main" \
-    && python -c "import robomme; print('robomme import OK')"
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
@@ -1,138 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for RoboTwin 2.0 integration tests.
-# Extends the nightly GPU image with the RoboTwin simulator stack:
-#   sapien/mplib/pytorch3d + NVlabs CuRobo + embodiments.zip + objects.zip
-# (~3.96 GB of assets; background_texture.zip ~11 GB skipped for smoke eval).
-#
-# Build: docker build -f docker/Dockerfile.benchmark.robotwin -t lerobot-benchmark-robotwin .
-# Run:   docker run --gpus all --rm lerobot-benchmark-robotwin \
-#            lerobot-eval --env.type=robotwin --env.task=beat_block_hammer ...
-
-FROM huggingface/lerobot-gpu:latest
-
-ENV NVIDIA_DRIVER_CAPABILITIES=all \
-    VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json \
-    ROBOTWIN_ROOT=/opt/robotwin
-
-# The nightly base is CUDA -base (no compiler, no Vulkan loader). CuRobo's
-# `pip install -e .` runs nvcc, and SAPIEN renders via Vulkan — add both.
-USER root
-# Pinned upstream SHA for reproducible benchmark runs. Bump when we need
-# an upstream fix; don't rely on `main` drift.
-ARG ROBOTWIN_SHA=0aeea2d669c0f8516f4d5785f0aa33ba812c14b4
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-         cuda-nvcc-12-4 cuda-cudart-dev-12-4 \
-         libvulkan1 vulkan-tools \
-    && mkdir -p /usr/share/vulkan/icd.d \
-    && echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libGLX_nvidia.so.0","api_version":"1.3.0"}}' \
-       > /usr/share/vulkan/icd.d/nvidia_icd.json \
-    && git clone https://github.com/RoboTwin-Platform/RoboTwin.git ${ROBOTWIN_ROOT} \
-    && git -C ${ROBOTWIN_ROOT} checkout ${ROBOTWIN_SHA} \
-    && chown -R user_lerobot:user_lerobot ${ROBOTWIN_ROOT} \
-    && apt-get clean && rm -rf /var/lib/apt/lists/*
-USER user_lerobot
-
-# RoboTwin runtime deps (av is already in the base via [av-dep]).
-RUN uv pip install --no-cache \
-        "sapien==3.0.0b1" "mplib==0.2.1" "transforms3d==0.4.2" "trimesh==4.4.3" \
-        "open3d==0.19.0" "imageio==2.34.2" termcolor zarr pydantic h5py
-
-# pytorch3d has no universal wheel; must be built from source (~10 min, cached).
-RUN uv pip install --no-cache --no-build-isolation \
-        "git+https://github.com/facebookresearch/pytorch3d.git@stable"
-
-# CuRobo — NVlabs motion generator; TORCH_CUDA_ARCH_LIST must be set or the
-# build aborts on an empty arch list. RoboTwin's own installer pins v0.7.8,
-# which still exposes the v1 API (`curobo.types.math`) that RoboTwin imports.
-ARG CUROBO_REF=v0.7.8
-RUN cd ${ROBOTWIN_ROOT}/envs \
-    && git clone --branch ${CUROBO_REF} --depth 1 https://github.com/NVlabs/curobo.git \
-    && cd curobo \
-    && TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" \
-       uv pip install -e . --no-build-isolation --no-cache
-
-# Upstream patches (mirror RoboTwin's script/_install.sh).
-# These patches target the exact versions pinned above; re-check when upgrading.
-# mplib==0.2.1: drop a broken `or collide` clause in planner.py.
-#   Safe to remove once mplib > 0.2.1 ships with the fix upstream.
-# sapien==3.0.0b1: fix URDF loader encoding + .srdf extension check.
-#   Safe to remove once sapien > 3.0.0b1 ships with the fix upstream.
-RUN python - <<'EOF'
-import pathlib, re, site
-for d in site.getsitepackages():
-    p = pathlib.Path(d) / "mplib" / "planner.py"
-    if p.exists():
-        p.write_text(re.sub(r"\bor collide\b", "", p.read_text(), count=1))
-        print(f"mplib patch applied: {p}")
-    p = pathlib.Path(d) / "sapien" / "wrapper" / "urdf_loader.py"
-    if p.exists():
-        src = p.read_text().replace(
-            "with open(srdf_path) as f:", 'with open(srdf_path, encoding="utf-8") as f:'
-        ).replace('"srdf"', '".srdf"')
-        p.write_text(src)
-        print(f"sapien patch applied: {p}")
-EOF
-
-# Simulation assets from TianxingChen/RoboTwin2.0: embodiments (~220 MB) +
-# objects (~3.74 GB). background_texture (~11 GB) is intentionally skipped.
-# The dataset is public — no auth token needed.
-RUN python - <<'EOF'
-import os, pathlib, zipfile
-from huggingface_hub import hf_hub_download
-
-assets_dir = pathlib.Path(os.environ["ROBOTWIN_ROOT"]) / "assets"
-assets_dir.mkdir(parents=True, exist_ok=True)
-for fname in ("embodiments.zip", "objects.zip"):
-    local = hf_hub_download(
-        repo_id="TianxingChen/RoboTwin2.0",
-        repo_type="dataset",
-        filename=fname,
-        local_dir=str(assets_dir),
-    )
-    with zipfile.ZipFile(local, "r") as z:
-        z.extractall(str(assets_dir))
-    pathlib.Path(local).unlink()
-EOF
-
-WORKDIR ${ROBOTWIN_ROOT}
-RUN python script/update_embodiment_config_path.py
-
-ENV PYTHONPATH="${ROBOTWIN_ROOT}"
-
-# Fail the image build early if the CuRobo package layout regresses. Importing
-# RoboTwin's planner here is too eager because CuRobo constructs CUDA-backed
-# defaults at import time, while Docker builds don't have access to an NVIDIA
-# driver.
-RUN python - <<'EOF'
-from pathlib import Path
-
-from curobo.types.math import Pose
-
-planner_src = (Path("/opt/robotwin/envs/robot/planner.py")).read_text()
-assert "from curobo.types.math import Pose as CuroboPose" in planner_src
-
-print("CuRobo import OK:", Pose.__name__)
-print("RoboTwin planner import references curobo.types.math")
-EOF
-
-# Return to the lerobot source directory (set by base image) before overlaying.
-WORKDIR /lerobot
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-CMD ["/bin/bash"]
@@ -1,99 +0,0 @@
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Benchmark image for VLABench integration tests.
-# Extends the nightly GPU image with the PR's source code and VLABench setup.
-#
-# Build:  docker build -f docker/Dockerfile.benchmark.vlabench -t lerobot-benchmark-vlabench .
-# Run:    docker run --gpus all --rm lerobot-benchmark-vlabench lerobot-eval ...
-
-FROM huggingface/lerobot-gpu:latest
-
-# Install VLABench from GitHub (not on PyPI) and pin MuJoCo/dm-control.
-# Shallow-clone without submodule recursion (nested SSH-only submodules fail in CI).
-# Editable install (-e) because VLABench/utils/ has no __init__.py, so
-# find_packages() omits it from wheels; editable mode uses the source tree directly.
-# rrt-algorithms has the same packaging issue (rrt/ dir missing __init__.py).
-# Patch: constant.py calls os.listdir on ~100 asset/obj/meshes/* dirs at import
-# time. Guard the call so missing dirs return [] instead of crashing (in case
-# the asset download is partial).
-#
-# Pinned upstream SHAs for reproducible benchmark runs. Bump when you need
-# an upstream fix; don't rely on `main`/`develop` drift.
-ARG VLABENCH_SHA=cf588fe60c0c7282174fe979f5913170cfe69017
-ARG RRT_ALGORITHMS_SHA=e51d95ee489a225220d6ae2a764c4111f6ba7d85
-RUN git clone https://github.com/OpenMOSS/VLABench.git ~/VLABench && \
-    git -C ~/VLABench checkout ${VLABENCH_SHA} && \
-    git clone https://github.com/motion-planning/rrt-algorithms.git ~/rrt-algorithms && \
-    git -C ~/rrt-algorithms checkout ${RRT_ALGORITHMS_SHA} && \
-    python3 -c "\
-import pathlib; \
-p = pathlib.Path.home() / 'VLABench/VLABench/configs/constant.py'; \
-t = p.read_text(); \
-p.write_text(t.replace( \
-    'subdirs = os.listdir(xml_dir)', \
-    'if not os.path.isdir(xml_dir): return []\n    subdirs = os.listdir(xml_dir)'))" && \
-    uv pip install --no-cache -e ~/VLABench -e ~/rrt-algorithms \
-      mujoco==3.2.2 dm-control==1.0.22 \
-      open3d colorlog scikit-learn openai gdown
-
-# Download VLABench mesh assets. Task configs reference object meshes
-# (obj/meshes/fruit/, containers/basket/, tablewares/plates/, etc.); without
-# them the task builder picks from an empty mesh list and crashes with
-# IndexError at task-build time (random.choice([]) in config_manager.py).
-#
-# Preferred source: an HF Hub mirror. Set VLABENCH_ASSETS_REPO at build time
-# (e.g. --build-arg VLABENCH_ASSETS_REPO=lerobot/vlabench-assets) and we'll
-# snapshot_download the repo into VLABench's assets dir. This is the reliable
-# path for CI — Google Drive frequently returns HTTP 429 ("Too many users have
-# viewed or downloaded this file recently") on shared academic files.
-#
-# After download we *validate* that at least one XML exists under each
-# task-critical subtree and fail the build loudly if not. Silent-empty asset
-# dirs are the #1 cause of VLABench runtime crashes in CI, so we surface them
-# here rather than after a 10-minute eval build.
-#
-# Fallback: VLABench's own gdown-based script. Best-effort only.
-ARG VLABENCH_ASSETS_REPO=""
-RUN ASSETS_DIR="$HOME/VLABench/VLABench/assets" && \
-    if [ -n "${VLABENCH_ASSETS_REPO}" ]; then \
-        echo "Downloading VLABench assets from HF Hub: ${VLABENCH_ASSETS_REPO}" && \
-        uv pip install --no-cache "huggingface_hub[hf_xet]>=0.26" && \
-        python -c "from huggingface_hub import snapshot_download; \
-p = snapshot_download(repo_id='${VLABENCH_ASSETS_REPO}', repo_type='dataset', \
-    local_dir='${ASSETS_DIR}', allow_patterns=['obj/**', 'scenes/**']); \
-print('snapshot_download returned:', p)"; \
-    else \
-        echo "No VLABENCH_ASSETS_REPO set — falling back to gdown" && \
-        python ~/VLABench/scripts/download_assets.py --choice all; \
-    fi && \
-    python -c "\
-from pathlib import Path; \
-import sys; \
-root = Path('${ASSETS_DIR}'); \
-checks = ['obj/meshes/tablewares/plates', 'obj/meshes/containers/basket', 'obj/meshes/fruit', 'obj/meshes/containers/tray']; \
-failed = []; \
-print(f'Validating VLABench assets under {root}'); \
-[print(f'  {c}: {len(list((root/c).rglob(\"*.xml\")))} XMLs') for c in checks]; \
-[failed.append(c) for c in checks if not any((root/c).rglob('*.xml'))]; \
-sys.exit(f'Empty asset dirs (no *.xml): {failed}') if failed else print('All asset dirs populated.')"
-
-# Overlay the PR's source code on top of the nightly image.
-COPY --chown=user_lerobot:user_lerobot . .
-
-# Re-install lerobot editably so the new source (with VLABenchEnv registration
-# and updated obs handling) replaces the stale package baked into the nightly image.
-RUN uv pip install --no-cache --no-deps -e .
-
-CMD ["/bin/bash"]
@@ -79,22 +79,10 @@
    title: Adding a New Benchmark
  - local: libero
    title: LIBERO
-  - local: libero_plus
-    title: LIBERO-plus
  - local: metaworld
    title: Meta-World
-  - local: robotwin
-    title: RoboTwin 2.0
-  - local: robocasa
-    title: RoboCasa365
-  - local: robocerebra
-    title: RoboCerebra
-  - local: robomme
-    title: RoboMME
  - local: envhub_isaaclab_arena
    title: NVIDIA IsaacLab Arena Environments
-  - local: vlabench
-    title: VLABench
  title: "Benchmarks"
 - sections:
  - local: introduction_processors
@@ -108,7 +108,7 @@ lerobot-rollout --strategy.type=dagger \
    --teleop.port_left=/dev/ttyACM0 \
    --teleop.port_right=/dev/ttyACM1 \
    --policy.path=outputs/pretrain/checkpoints/last/pretrained_model \
-    --dataset.repo_id=your-username/rollout_hil_dataset \
+    --dataset.repo_id=your-username/hil-dataset \
    --dataset.single_task="Fold the T-shirt properly" \
    --dataset.fps=30 \
    --strategy.num_episodes=50 \
@@ -135,7 +135,7 @@ lerobot-rollout --strategy.type=dagger \
    --teleop.port_left=/dev/ttyACM0 \
    --teleop.port_right=/dev/ttyACM1 \
    --policy.path=outputs/pretrain/checkpoints/last/pretrained_model \
-    --dataset.repo_id=your-username/rollout_hil_rtc_dataset \
+    --dataset.repo_id=your-username/hil-rtc-dataset \
    --dataset.single_task="Fold the T-shirt properly" \
    --dataset.fps=30 \
    --strategy.num_episodes=50 \
@@ -59,7 +59,7 @@ lerobot-rollout \
    --robot.type=so100_follower \
    --robot.port=/dev/ttyACM0 \
    --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \
-    --dataset.repo_id=${HF_USER}/rollout_eval_data \
+    --dataset.repo_id=${HF_USER}/eval_data \
    --dataset.single_task="Put lego brick into the box" \
    --duration=3600
 ```
@@ -84,7 +84,7 @@ lerobot-rollout \
    --policy.path=${HF_USER}/my_policy \
    --robot.type=koch_follower \
    --robot.port=/dev/ttyACM0 \
-    --dataset.repo_id=${HF_USER}/rollout_highlight_data \
+    --dataset.repo_id=${HF_USER}/highlight_data \
    --dataset.single_task="Pick up the red cube"
 ```

@@ -118,7 +118,7 @@ lerobot-rollout \
    --policy.path=outputs/pretrain/checkpoints/last/pretrained_model \
    --robot.type=bi_openarm_follower \
    --teleop.type=openarm_mini \
-    --dataset.repo_id=${HF_USER}/rollout_hil_data \
+    --dataset.repo_id=${HF_USER}/hil_data \
    --dataset.single_task="Fold the T-shirt"
 ```

@@ -134,7 +134,7 @@ lerobot-rollout \
    --robot.port=/dev/ttyACM0 \
    --teleop.type=so101_leader \
    --teleop.port=/dev/ttyACM1 \
-    --dataset.repo_id=${HF_USER}/rollout_dagger_data \
+    --dataset.repo_id=${HF_USER}/dagger_data \
    --dataset.single_task="Grasp the block"
 ```

@@ -1,188 +0,0 @@
-# LIBERO-plus
-
-LIBERO-plus is a **robustness benchmark** for Vision-Language-Action (VLA) models built on top of [LIBERO](./libero). It systematically stress-tests policies by applying **seven independent perturbation dimensions** to the original LIBERO task set, exposing failure modes that standard benchmarks miss.
-
- Paper: [In-depth Robustness Analysis of Vision-Language-Action Models](https://arxiv.org/abs/2510.13626)
- GitHub: [sylvestf/LIBERO-plus](https://github.com/sylvestf/LIBERO-plus)
- Dataset: [lerobot/libero_plus](https://huggingface.co/datasets/lerobot/libero_plus)
-
-![An overview of the LIBERO-plus benchmark perturbation dimensions](https://github.com/sylvestf/LIBERO-plus/raw/main/static/images/libero-plus.jpg)
-
-## Perturbation dimensions
-
-LIBERO-plus creates ~10 000 task variants by perturbing each original LIBERO task along these axes:
-
-| Dimension             | What changes                                          |
-| --------------------- | ----------------------------------------------------- |
-| Objects layout        | Target position, presence of confounding objects      |
-| Camera viewpoints     | Camera position, orientation, field-of-view           |
-| Robot initial states  | Manipulator start pose                                |
-| Language instructions | LLM-rewritten task description (paraphrase / synonym) |
-| Light conditions      | Intensity, direction, color, shadow                   |
-| Background textures   | Scene surface and object appearance                   |
-| Sensor noise          | Photometric distortions and image degradation         |
-
-## Available task suites
-
-LIBERO-plus covers the same five suites as LIBERO:
-
-| Suite          | CLI name         | Tasks | Max steps | Description                                        |
-| -------------- | ---------------- | ----- | --------- | -------------------------------------------------- |
-| LIBERO-Spatial | `libero_spatial` | 10    | 280       | Tasks requiring reasoning about spatial relations  |
-| LIBERO-Object  | `libero_object`  | 10    | 280       | Tasks centered on manipulating different objects   |
-| LIBERO-Goal    | `libero_goal`    | 10    | 300       | Goal-conditioned tasks with changing targets       |
-| LIBERO-90      | `libero_90`      | 90    | 400       | Short-horizon tasks from the LIBERO-100 collection |
-| LIBERO-Long    | `libero_10`      | 10    | 520       | Long-horizon tasks from the LIBERO-100 collection  |
-
-<Tip warning={true}>
-  Installing LIBERO-plus **replaces** vanilla LIBERO — it uninstalls `hf-libero`
-  so that `import libero` resolves to the LIBERO-plus fork. You cannot have both
-  installed at the same time. To switch back to vanilla LIBERO, uninstall the
-  fork and reinstall with `pip install -e ".[libero]"`.
-</Tip>
-
-## Installation
-
-### System dependencies (Linux only)
-
-```bash
-sudo apt install libexpat1 libfontconfig1-dev libmagickwand-dev
-```
-
-### Python package
-
-```bash
-pip install -e ".[libero]" "robosuite==1.4.1" bddl easydict mujoco wand scikit-image gym
-git clone https://github.com/sylvestf/LIBERO-plus.git
-cd LIBERO-plus && pip install --no-deps -e .
-pip uninstall -y hf-libero  # so `import libero` resolves to the fork
-```
-
-LIBERO-plus is installed from its GitHub fork rather than a pyproject extra — the fork ships as a namespace package that pip can't handle, so it must be cloned and added to `PYTHONPATH`. See `docker/Dockerfile.benchmark.libero_plus` for the canonical install. MuJoCo is required, so only Linux is supported.
-
-<Tip>
-Set the MuJoCo rendering backend before running evaluation:
-
-```bash
-export MUJOCO_GL=egl   # headless / HPC / cloud
-```
-
-</Tip>
-
-### Download LIBERO-plus assets
-
-LIBERO-plus ships its extended asset pack separately. Download `assets.zip` from the [Hugging Face dataset](https://huggingface.co/datasets/Sylvest/LIBERO-plus/tree/main) and extract it into the LIBERO-plus package directory:
-
-```bash
-# After installing the package, find where it was installed:
-python -c "import libero; print(libero.__file__)"
-# Then extract assets.zip into <package_root>/libero/assets/
-```
-
-## Evaluation
-
-### Default evaluation (recommended)
-
-Evaluate across the four standard suites (10 episodes per task):
-
-```bash
-lerobot-eval \
-  --policy.path="your-policy-id" \
-  --env.type=libero_plus \
-  --env.task=libero_spatial,libero_object,libero_goal,libero_10 \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --env.max_parallel_tasks=1
-```
-
-### Single-suite evaluation
-
-Evaluate on one LIBERO-plus suite:
-
-```bash
-lerobot-eval \
-  --policy.path="your-policy-id" \
-  --env.type=libero_plus \
-  --env.task=libero_spatial \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10
-```
-
- `--env.task` picks the suite (`libero_spatial`, `libero_object`, etc.).
- `--env.task_ids` restricts to specific task indices (`[0]`, `[1,2,3]`, etc.). Omit to run all tasks in the suite.
- `--eval.batch_size` controls how many environments run in parallel.
- `--eval.n_episodes` sets how many episodes to run per task.
-
-### Multi-suite evaluation
-
-Benchmark a policy across multiple suites at once by passing a comma-separated list:
-
-```bash
-lerobot-eval \
-  --policy.path="your-policy-id" \
-  --env.type=libero_plus \
-  --env.task=libero_spatial,libero_object \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10
-```
-
-### Control mode
-
-LIBERO-plus supports two control modes — `relative` (default) and `absolute`. Different VLA checkpoints are trained with different action parameterizations, so make sure the mode matches your policy:
-
-```bash
--env.control_mode=relative   # or "absolute"
-```
-
-### Policy inputs and outputs
-
-**Observations:**
-
- `observation.state` — 8-dim proprioceptive features (eef position, axis-angle orientation, gripper qpos)
- `observation.images.image` — main camera view (`agentview_image`), HWC uint8
- `observation.images.image2` — wrist camera view (`robot0_eye_in_hand_image`), HWC uint8
-
-**Actions:**
-
- Continuous control in `Box(-1, 1, shape=(7,))` — 6D end-effector delta + 1D gripper
-
-### Recommended evaluation episodes
-
-For reproducible benchmarking, use **10 episodes per task** across all four standard suites (Spatial, Object, Goal, Long). This gives 400 total episodes and matches the protocol used for published results.
-
-## Training
-
-### Dataset
-
-A LeRobot-format training dataset for LIBERO-plus is available at:
-
- [lerobot/libero_plus](https://huggingface.co/datasets/lerobot/libero_plus)
-
-### Example training command
-
-```bash
-lerobot-train \
-    --policy.type=smolvla \
-    --policy.repo_id=${HF_USER}/smolvla_libero_plus \
-    --policy.load_vlm_weights=true \
-    --dataset.repo_id=lerobot/libero_plus \
-    --env.type=libero_plus \
-    --env.task=libero_spatial \
-    --output_dir=./outputs/ \
-    --steps=100000 \
-    --batch_size=4 \
-    --eval.batch_size=1 \
-    --eval.n_episodes=1 \
-    --eval_freq=1000
-```
-
-## Relationship to LIBERO
-
-LIBERO-plus is a drop-in extension of LIBERO:
-
- Same Python gym interface (`LiberoEnv`, `LiberoProcessorStep`)
- Same camera names and observation/action format
- Same task suite names
- Installs under the same `libero` Python package name (different GitHub repo)
-
-To use the original LIBERO benchmark, see [LIBERO](./libero) and use `--env.type=libero`.
@@ -61,6 +61,17 @@ lerobot-eval \
  --rename_map='{"observation.images.image": "observation.images.base_0_rgb", "observation.images.image2": "observation.images.left_wrist_0_rgb"}'
 ```

+### Recording
+
+`lerobot-record` also supports rename maps, nested under the dataset config:
+
+```bash
+lerobot-record \ # When running inference
+  --policy.path="<user>/smolVLA_finetuned" \
+  ... \
+  --dataset.rename_map='{"observation.images.glove2": "observation.images.image"}'
+```
+
 ## Alternative: edit the policy config directly

 If you always use the same dataset or environment, you can **edit the policy's `config.json`** so its observation keys match your data source. Then no rename map is needed.
@@ -94,10 +105,10 @@ XVLA-base has three visual inputs and `empty_cameras=0` by default. Your dataset

 ## Quick reference

-| Goal                                    | What to do                                                                  |
-| --------------------------------------- | --------------------------------------------------------------------------- |
-| Dataset keys ≠ policy keys              | `--rename_map='{"dataset_key": "policy_key", ...}'`                         |
-| Env keys ≠ policy keys (eval)           | `--rename_map='{"env_key": "policy_key", ...}'`                             |
-| Rollout with different keys (inference) | `--rename_map='{"source_key": "policy_key", ...}'`.                         |
-| Fewer cameras than policy expects       | `--policy.empty_cameras=N` (supported by PI0, PI05, PI0Fast, SmolVLA, XVLA) |
-| Avoid passing a rename map              | Edit the policy's `config.json` so its keys match your data source          |
+| Goal                                      | What to do                                                                  |
+| ----------------------------------------- | --------------------------------------------------------------------------- |
+| Dataset keys ≠ policy keys                | `--rename_map='{"dataset_key": "policy_key", ...}'`                         |
+| Env keys ≠ policy keys (eval)             | `--rename_map='{"env_key": "policy_key", ...}'`                             |
+| Recording with different keys (inference) | `--dataset.rename_map='{"source_key": "policy_key", ...}'`.                 |
+| Fewer cameras than policy expects         | `--policy.empty_cameras=N` (supported by PI0, PI05, PI0Fast, SmolVLA, XVLA) |
+| Avoid passing a rename map                | Edit the policy's `config.json` so its keys match your data source          |
@@ -1,188 +0,0 @@
-# RoboCasa365
-
-[RoboCasa365](https://robocasa.ai) is a large-scale simulation framework for training and benchmarking **generalist robots** in everyday kitchen tasks. It ships 365 diverse manipulation tasks across 2,500 kitchen environments, 3,200+ object assets and 600+ hours of human demonstration data, on a PandaOmron 12-DOF mobile manipulator (Franka arm on a holonomic base).
-
- Paper: [RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots](https://arxiv.org/abs/2406.02523)
- GitHub: [robocasa/robocasa](https://github.com/robocasa/robocasa)
- Project website: [robocasa.ai](https://robocasa.ai)
- Pretrained policy: [`lerobot/smolvla_robocasa`](https://huggingface.co/lerobot/smolvla_robocasa)
- Single-task dataset (CloseFridge): [`pepijn223/robocasa_CloseFridge`](https://huggingface.co/datasets/pepijn223/robocasa_CloseFridge)
-
-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/robocasa-banner.webp"
-  alt="RoboCasa365 benchmark overview"
-  width="85%"
-/>
-
-## Available tasks
-
-RoboCasa365 organizes its 365 tasks into two families and three upstream benchmark groups that LeRobot exposes as first-class `--env.task` shortcuts:
-
-| Family    | Tasks | Description                                                                     |
-| --------- | ----- | ------------------------------------------------------------------------------- |
-| Atomic    | ~65   | Single-skill tasks: pick-and-place, door/drawer manipulation, appliance control |
-| Composite | ~300  | Multi-step tasks across 60+ categories: cooking, cleaning, organizing, etc.     |
-
-**Atomic task examples:** `CloseFridge`, `OpenDrawer`, `OpenCabinet`, `TurnOnMicrowave`, `TurnOffStove`, `NavigateKitchen`, `PickPlaceCounterToStove`.
-
-**Composite task categories:** baking, boiling, brewing, chopping, clearing table, defrosting food, loading dishwasher, making tea, microwaving food, washing dishes, and more.
-
-`--env.task` accepts three forms:
-
- a single task name (`CloseFridge`)
- a comma-separated list (`CloseFridge,OpenBlenderLid,PickPlaceCoffee`)
- a benchmark-group shortcut — `atomic_seen`, `composite_seen`, `composite_unseen`, `pretrain50`, `pretrain100`, `pretrain200`, `pretrain300` — which auto-expands to the upstream task list and auto-sets the dataset `split` (`target` or `pretrain`).
-
-## Installation
-
-RoboCasa and its dependency `robosuite` are not published on PyPI, and RoboCasa's own `setup.py` hardcodes `lerobot==0.3.3`, which conflicts with this repo's `lerobot`. LeRobot therefore does **not** expose a `robocasa` extra — install the two packages manually as editable clones (using `--no-deps` on `robocasa` to skip its shadowed `lerobot` pin):
-
-```bash
-# After following the standard LeRobot installation instructions.
-
-git clone https://github.com/robocasa/robocasa.git ~/robocasa
-git clone https://github.com/ARISE-Initiative/robosuite.git ~/robosuite
-pip install -e ~/robocasa --no-deps
-pip install -e ~/robosuite
-
-# Robocasa's runtime deps (the ones its setup.py would have pulled, minus
-# the bad lerobot pin).
-pip install numpy numba scipy mujoco pygame Pillow opencv-python \
-            pyyaml pynput tqdm termcolor imageio h5py lxml hidapi \
-            tianshou gymnasium
-
-python -m robocasa.scripts.setup_macros
-# Lightweight assets (lightwheel object meshes + textures). Enough for
-# the default env out of the box.
-python -m robocasa.scripts.download_kitchen_assets \
-  --type tex tex_generative fixtures_lw objs_lw
-# Optional: full objaverse/aigen registries (~30GB) for richer object
-# variety. Enable at eval time via --env.obj_registries (see below).
-# python -m robocasa.scripts.download_kitchen_assets --type objs_objaverse
-```
-
-<Tip>
-RoboCasa requires MuJoCo. Set the rendering backend before training or evaluation:
-
-```bash
-export MUJOCO_GL=egl  # for headless servers (HPC, cloud)
-```
-
-</Tip>
-
-### Object registries
-
-By default the env samples objects only from the `lightwheel` registry (what `--type objs_lw` ships), which avoids a `Probabilities contain NaN` crash when the objaverse / aigen packs aren't on disk. If you've downloaded the full asset set, enable the full registry at runtime:
-
-```bash
--env.obj_registries='[objaverse,lightwheel]'
-```
-
-## Evaluation
-
-All eval snippets below mirror the CI command (see `.github/workflows/benchmark_tests.yml`). The `--rename_map` argument maps RoboCasa's native camera keys (`robot0_agentview_left` / `robot0_eye_in_hand` / `robot0_agentview_right`) onto the three-camera (`camera1` / `camera2` / `camera3`) input layout the released `smolvla_robocasa` policy was trained on.
-
-### Single-task evaluation (recommended for quick iteration)
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_robocasa \
-  --env.type=robocasa \
-  --env.task=CloseFridge \
-  --eval.batch_size=1 \
-  --eval.n_episodes=20 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
-```
-
-### Multi-task evaluation
-
-Pass a comma-separated list of tasks:
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_robocasa \
-  --env.type=robocasa \
-  --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove \
-  --eval.batch_size=1 \
-  --eval.n_episodes=20 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
-```
-
-### Benchmark-group evaluation
-
-Run an entire upstream group (e.g. all 18 `atomic_seen` tasks with `split=target`):
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_robocasa \
-  --env.type=robocasa \
-  --env.task=atomic_seen \
-  --eval.batch_size=1 \
-  --eval.n_episodes=20 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.robot0_agentview_left": "observation.images.camera1", "observation.images.robot0_eye_in_hand": "observation.images.camera2", "observation.images.robot0_agentview_right": "observation.images.camera3"}'
-```
-
-### Recommended evaluation episodes
-
-**20 episodes per task** for reproducible benchmarking. Matches the protocol used in published results.
-
-## Policy inputs and outputs
-
-**Observations** (raw RoboCasa camera names are preserved verbatim):
-
- `observation.state` — 16-dim proprioceptive state (base position, base quaternion, relative end-effector position, relative end-effector quaternion, gripper qpos)
- `observation.images.robot0_agentview_left` — left agent view, 256×256 HWC uint8
- `observation.images.robot0_eye_in_hand` — wrist camera view, 256×256 HWC uint8
- `observation.images.robot0_agentview_right` — right agent view, 256×256 HWC uint8
-
-**Actions:**
-
- Continuous control in `Box(-1, 1, shape=(12,))` — base motion (4D) + control mode (1D) + end-effector position (3D) + end-effector rotation (3D) + gripper (1D).
-
-## Training
-
-### Single-task example
-
-A ready-to-use single-task dataset is on the Hub:
-[`pepijn223/robocasa_CloseFridge`](https://huggingface.co/datasets/pepijn223/robocasa_CloseFridge).
-
-Fine-tune a SmolVLA base on `CloseFridge`:
-
-```bash
-lerobot-train \
-  --policy.type=smolvla \
-  --policy.repo_id=${HF_USER}/smolvla_robocasa_CloseFridge \
-  --policy.load_vlm_weights=true \
-  --policy.push_to_hub=true \
-  --dataset.repo_id=pepijn223/robocasa_CloseFridge \
-  --env.type=robocasa \
-  --env.task=CloseFridge \
-  --output_dir=./outputs/smolvla_robocasa_CloseFridge \
-  --steps=100000 \
-  --batch_size=4 \
-  --eval_freq=5000 \
-  --eval.batch_size=1 \
-  --eval.n_episodes=5 \
-  --save_freq=10000
-```
-
-Evaluate the resulting checkpoint:
-
-```bash
-lerobot-eval \
-  --policy.path=${HF_USER}/smolvla_robocasa_CloseFridge \
-  --env.type=robocasa \
-  --env.task=CloseFridge \
-  --eval.batch_size=1 \
-  --eval.n_episodes=20
-```
-
-## Reproducing published results
-
-The released checkpoint [`lerobot/smolvla_robocasa`](https://huggingface.co/lerobot/smolvla_robocasa) is evaluated with the commands in the [Evaluation](#evaluation) section. CI runs a 10-atomic-task smoke eval (one episode each) on every PR touching the benchmark, picking fixture-centric tasks that don't require the objaverse asset pack.
@@ -1,99 +0,0 @@
-# RoboCerebra
-
-[RoboCerebra](https://robocerebra-project.github.io/) is a long-horizon manipulation benchmark that evaluates **high-level reasoning, planning, and memory** in VLAs. Episodes chain multiple sub-goals with language-grounded intermediate instructions, built on top of LIBERO's simulator stack (MuJoCo + robosuite, Franka Panda 7-DOF).
-
- Paper: [RoboCerebra: A Large-scale Benchmark for Long-horizon Robotic Manipulation Evaluation](https://arxiv.org/abs/2506.06677)
- Project website: [robocerebra-project.github.io](https://robocerebra-project.github.io/)
- Dataset: [`lerobot/robocerebra_unified`](https://huggingface.co/datasets/lerobot/robocerebra_unified) — LeRobot v3.0, 6,660 episodes / 571,116 frames at 20 fps, 1,728 language-grounded sub-tasks.
- Pretrained policy: [`lerobot/smolvla_robocerebra`](https://huggingface.co/lerobot/smolvla_robocerebra)
-
-## Available tasks
-
-RoboCerebra reuses LIBERO's simulator, so evaluation runs against the LIBERO `libero_10` long-horizon suite:
-
-| Suite     | CLI name    | Tasks | Description                                                   |
-| --------- | ----------- | ----- | ------------------------------------------------------------- |
-| LIBERO-10 | `libero_10` | 10    | Long-horizon kitchen/living room tasks chaining 3–6 sub-goals |
-
-Each RoboCerebra episode in the dataset is segmented into multiple sub-tasks with natural-language instructions, which the unified dataset exposes as independent supervision signals.
-
-## Installation
-
-RoboCerebra piggybacks on LIBERO, so the `libero` extra is all you need:
-
-```bash
-pip install -e ".[libero]"
-```
-
-<Tip>
-RoboCerebra requires Linux (MuJoCo / robosuite). Set the rendering backend before training or evaluation:
-
-```bash
-export MUJOCO_GL=egl  # for headless servers (HPC, cloud)
-```
-
-</Tip>
-
-## Evaluation
-
-RoboCerebra eval runs against LIBERO's `libero_10` suite with RoboCerebra's camera naming (`image` + `wrist_image`) and an extra empty-camera slot so a three-view-trained policy receives the expected input layout:
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_robocerebra \
-  --env.type=libero \
-  --env.task=libero_10 \
-  --env.fps=20 \
-  --env.obs_type=pixels_agent_pos \
-  --env.observation_height=256 \
-  --env.observation_width=256 \
-  '--env.camera_name_mapping={"agentview_image": "image", "robot0_eye_in_hand_image": "wrist_image"}' \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.wrist_image": "observation.images.camera2"}' \
-  --policy.empty_cameras=1
-```
-
-### Recommended evaluation episodes
-
-**10 episodes per task** across the `libero_10` suite (100 total) for reproducible benchmarking. Matches the protocol used in the RoboCerebra paper.
-
-## Policy inputs and outputs
-
-**Observations:**
-
- `observation.state` — 8-dim proprioceptive state (7 joint positions + gripper)
- `observation.images.image` — third-person view, 256×256 HWC uint8
- `observation.images.wrist_image` — wrist-mounted camera view, 256×256 HWC uint8
-
-**Actions:**
-
- Continuous control in `Box(-1, 1, shape=(7,))` — end-effector delta (6D) + gripper (1D)
-
-## Training
-
-The unified dataset at [`lerobot/robocerebra_unified`](https://huggingface.co/datasets/lerobot/robocerebra_unified) exposes two RGB streams and language-grounded sub-task annotations:
-
-| Feature                          | Shape         | Description          |
-| -------------------------------- | ------------- | -------------------- |
-| `observation.images.image`       | (256, 256, 3) | Third-person view    |
-| `observation.images.wrist_image` | (256, 256, 3) | Wrist-mounted camera |
-| `observation.state`              | (8,)          | Joint pos + gripper  |
-| `action`                         | (7,)          | EEF delta + gripper  |
-
-Fine-tune a SmolVLA base on it:
-
-```bash
-lerobot-train \
-  --policy.path=lerobot/smolvla_base \
-  --dataset.repo_id=lerobot/robocerebra_unified \
-  --env.type=libero \
-  --env.task=libero_10 \
-  --output_dir=outputs/smolvla_robocerebra
-```
-
-## Reproducing published results
-
-The released checkpoint [`lerobot/smolvla_robocerebra`](https://huggingface.co/lerobot/smolvla_robocerebra) was trained on `lerobot/robocerebra_unified` and evaluated with the command in the [Evaluation](#evaluation) section. CI runs the same command with `--eval.n_episodes=1` as a smoke test on every PR touching the benchmark.
@@ -1,130 +0,0 @@
-# RoboMME
-
-[RoboMME](https://robomme.github.io) is a memory-augmented manipulation benchmark built on ManiSkill (SAPIEN). It evaluates a robot's ability to retain and use information across an episode — counting, object permanence, reference, and imitation.
-
- **16 tasks** across 4 memory-skill suites
- **1,600 training demos** (100 per task, 50 val, 50 test)
- **Dataset**: [`lerobot/robomme`](https://huggingface.co/datasets/lerobot/robomme) — LeRobot v3.0, 768K frames at 10 fps
- **Simulator**: ManiSkill / SAPIEN, Panda arm, Linux only
-
-![RoboMME benchmark tasks overview](https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2603.04639/gradient.png)
-
-## Tasks
-
-| Suite                             | Tasks                                                         |
-| --------------------------------- | ------------------------------------------------------------- |
-| **Counting** (temporal memory)    | BinFill, PickXtimes, SwingXtimes, StopCube                    |
-| **Permanence** (spatial memory)   | VideoUnmask, VideoUnmaskSwap, ButtonUnmask, ButtonUnmaskSwap  |
-| **Reference** (object memory)     | PickHighlight, VideoRepick, VideoPlaceButton, VideoPlaceOrder |
-| **Imitation** (procedural memory) | MoveCube, InsertPeg, PatternLock, RouteStick                  |
-
-## Installation
-
-> RoboMME requires **Linux** (ManiSkill/SAPIEN uses Vulkan rendering). Docker is recommended to isolate dependency conflicts.
-
-### Native (Linux)
-
-```bash
-pip install --override <(printf 'gymnasium==0.29.1\nnumpy==1.26.4\n') \
-  -e '.[smolvla,av-dep]' \
-  'robomme @ git+https://github.com/RoboMME/robomme_benchmark.git@main'
-```
-
-> **Dependency note**: `mani-skill` (pulled by `robomme`) pins `gymnasium==0.29.1` and `numpy<2.0.0`, which conflict with lerobot's base `numpy>=2.0.0`. That's why `robomme` is not a pyproject extra — use the override install above, or the Docker approach below to avoid conflicts entirely.
-
-### Docker (recommended)
-
-```bash
-# Build base image first (from repo root)
-docker build -f docker/Dockerfile.eval-base -t lerobot-eval-base .
-
-# Build RoboMME eval image (applies gymnasium + numpy pin overrides)
-docker build -f docker/Dockerfile.benchmark.robomme -t lerobot-robomme .
-```
-
-The `docker/Dockerfile.benchmark.robomme` image overrides `gymnasium==0.29.1` and `numpy==1.26.4` after lerobot's install. Both versions are runtime-safe for lerobot's actual API usage.
-
-## Running Evaluation
-
-### Default (single task, single episode)
-
-```bash
-lerobot-eval \
-    --policy.path=<your_policy_repo> \
-    --env.type=robomme \
-    --env.task=PickXtimes \
-    --env.dataset_split=test \
-    --env.task_ids=[0] \
-    --eval.batch_size=1 \
-    --eval.n_episodes=1
-```
-
-### Multi-task evaluation
-
-Evaluate multiple tasks in one run by comma-separating task names. Use `task_ids` to control which episodes are evaluated per task. Recommended: 50 episodes per task for the test split.
-
-```bash
-lerobot-eval \
-    --policy.path=<your_policy_repo> \
-    --env.type=robomme \
-    --env.task=PickXtimes,BinFill,StopCube,MoveCube,InsertPeg \
-    --env.dataset_split=test \
-    --env.task_ids=[0,1,2,3,4,5,6,7,8,9] \
-    --eval.batch_size=1 \
-    --eval.n_episodes=50
-```
-
-### Key CLI options for `env.type=robomme`
-
-| Option               | Default       | Description                                        |
-| -------------------- | ------------- | -------------------------------------------------- |
-| `env.task`           | `PickXtimes`  | Any of the 16 task names above (comma-separated)   |
-| `env.dataset_split`  | `test`        | `train`, `val`, or `test`                          |
-| `env.action_space`   | `joint_angle` | `joint_angle` (8-D) or `ee_pose` (7-D)             |
-| `env.episode_length` | `300`         | Max steps per episode                              |
-| `env.task_ids`       | `null`        | List of episode indices to evaluate (null = `[0]`) |
-
-## Dataset
-
-The dataset [`lerobot/robomme`](https://huggingface.co/datasets/lerobot/robomme) is in **LeRobot v3.0 format** and can be loaded directly:
-
-```python
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
-
-dataset = LeRobotDataset("lerobot/robomme")
-```
-
-### Dataset features
-
-| Feature            | Shape         | Description                     |
-| ------------------ | ------------- | ------------------------------- |
-| `image`            | (256, 256, 3) | Front camera RGB                |
-| `wrist_image`      | (256, 256, 3) | Wrist camera RGB                |
-| `actions`          | (8,)          | Joint angles + gripper          |
-| `state`            | (8,)          | Joint positions + gripper state |
-| `simple_subgoal`   | str           | High-level language annotation  |
-| `grounded_subgoal` | str           | Grounded language annotation    |
-| `episode_index`    | int           | Episode ID                      |
-| `frame_index`      | int           | Frame within episode            |
-
-### Feature key alignment (training)
-
-The env wrapper exposes `pixels/image` and `pixels/wrist_image` as observation keys. The `features_map` in `RoboMMEEnv` maps these to `observation.images.image` and `observation.images.wrist_image` for the policy. State is exposed as `agent_pos` and maps to `observation.state`.
-
-The dataset's `image` and `wrist_image` columns already align with the policy input keys, so no renaming is needed when fine-tuning.
-
-## Action Spaces
-
-| Type          | Dim | Description                                               |
-| ------------- | --- | --------------------------------------------------------- |
-| `joint_angle` | 8   | 7 joint angles + 1 gripper (−1 closed, +1 open, absolute) |
-| `ee_pose`     | 7   | xyz + roll/pitch/yaw + gripper                            |
-
-Set via `--env.action_space=joint_angle` (default) or `--env.action_space=ee_pose`.
-
-## Platform Notes
-
- **Linux only**: ManiSkill requires SAPIEN/Vulkan. macOS and Windows are not supported.
- **GPU recommended**: Rendering is CPU-capable but slow; CUDA + Vulkan gives full speed.
- **gymnasium / numpy conflict**: See installation note above. Docker image handles this automatically.
- **ManiSkill fork**: `robomme` depends on a specific ManiSkill fork (`YinpeiDai/ManiSkill`), pulled in automatically via the `robomme` package.
@@ -1,223 +0,0 @@
-# RoboTwin 2.0
-
-RoboTwin 2.0 is a **large-scale dual-arm manipulation benchmark** built on the SAPIEN physics engine. It provides a standardized evaluation protocol for bimanual robotic policies across 50 tasks (as of upstream `main`) with strong domain randomization (clutter, lighting, background, tabletop height, and language instructions).
-
- Paper: [RoboTwin 2.0: A Scalable Data Generator and Benchmark with Strong Domain Randomization for Robust Bimanual Robotic Manipulation](https://arxiv.org/abs/2506.18088)
- GitHub: [RoboTwin-Platform/RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin)
- Leaderboard: [robotwin-platform.github.io/leaderboard](https://robotwin-platform.github.io/leaderboard)
- Dataset: [lerobot/robotwin_unified](https://huggingface.co/datasets/lerobot/robotwin_unified)
-
-![RoboTwin 2.0 benchmark overview](https://www.aitntnews.com/pictures/2025/7/8/9a7f79cb-5ba9-11f0-8581-fa163e47d677.png)
-
-## Overview
-
-| Property      | Value                                                    |
-| ------------- | -------------------------------------------------------- |
-| Tasks         | 50 dual-arm manipulation tasks                           |
-| Robot         | Aloha-AgileX bimanual (14 DOF, 7 per arm)                |
-| Action space  | 14-dim joint-space, continuous in `[-1, 1]`              |
-| Cameras       | `head_camera`, `left_camera`, `right_camera`             |
-| Simulator     | SAPIEN (not MuJoCo)                                      |
-| Eval protocol | 100 episodes/task, 50 demo_clean demonstrations          |
-| Eval settings | **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) |
-
-## Available tasks
-
-RoboTwin 2.0 ships 50 dual-arm manipulation tasks in its upstream `envs/` directory. The canonical list is the `ROBOTWIN_TASKS` tuple in `src/lerobot/envs/robotwin.py`, mirrored verbatim from the upstream repo. Example tasks:
-
-| Task                     | CLI name                 | Category          |
-| ------------------------ | ------------------------ | ----------------- |
-| Beat block with hammer   | `beat_block_hammer`      | Tool use          |
-| Click bell / alarm clock | `click_bell`             | Precision press   |
-| Stack blocks (2 / 3)     | `stack_blocks_two/three` | Stacking          |
-| Stack bowls (2 / 3)      | `stack_bowls_two/three`  | Stacking          |
-| Handover block / mic     | `handover_block`         | Bimanual coord.   |
-| Lift pot                 | `lift_pot`               | Bimanual lift     |
-| Shake bottle             | `shake_bottle`           | Continuous motion |
-| Turn switch              | `turn_switch`            | Articulated obj   |
-| Stamp seal               | `stamp_seal`             | Precision place   |
-| Scan object              | `scan_object`            | Mobile manip.     |
-
-Pass a comma-separated list to `--env.task` to run multiple tasks in a single eval sweep.
-
-<Tip warning={true}>
-  `open_laptop` is currently broken upstream (its `check_success()` uses
-  `self.arm_tag`, which is only set inside the scripted-expert `play_once()`
-  path and therefore unavailable during normal policy eval). Avoid it until the
-  upstream bug is fixed, or patch the task to default `self.arm_tag = "left"` in
-  `load_actors()`.
-</Tip>
-
-## Dataset
-
-The RoboTwin 2.0 dataset is available in **LeRobot v3.0 format** on the Hugging Face Hub:
-
-```
-lerobot/robotwin_unified
-```
-
-It contains over 100,000 pre-collected trajectories across all 50 tasks (79.6 GB, Apache 2.0 license). No format conversion is needed — it is already in the correct LeRobot v3.0 schema with video observations and action labels.
-
-You can load it directly with the HF Datasets library:
-
-```python
-from datasets import load_dataset
-
-ds = load_dataset("lerobot/robotwin_unified", split="train")
-```
-
-## Installation
-
-RoboTwin 2.0 requires **Linux** with an NVIDIA GPU (CUDA 12.1 recommended). Installation takes approximately 20 minutes.
-
-### 1. Create a conda environment
-
-```bash
-conda create -n robotwin python=3.10 -y
-conda activate robotwin
-```
-
-### 2. Install LeRobot
-
-```bash
-git clone https://github.com/huggingface/lerobot.git
-cd lerobot
-pip install -e "."
-```
-
-### 3. Install RoboTwin 2.0
-
-```bash
-git clone https://github.com/RoboTwin-Platform/RoboTwin.git
-cd RoboTwin
-bash script/_install.sh
-bash script/_download_assets.sh
-```
-
-The install script handles all Python dependencies including SAPIEN, CuRobo, mplib, and pytorch3d.
-
-<Tip warning={true}>
-If the automated install fails, install manually:
-
-```bash
-pip install -r requirements.txt
-pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable"
-cd envs && git clone https://github.com/NVlabs/curobo.git && cd curobo
-pip install -e . --no-build-isolation
-```
-
-Then apply the required mplib fix: in `mplib/planner.py` line 807, remove `or collide` from the conditional.
-
-</Tip>
-
-### 4. Add RoboTwin to PYTHONPATH
-
-The RoboTwin task modules must be importable by LeRobot. From within the `RoboTwin/` directory:
-
-```bash
-export PYTHONPATH="${PYTHONPATH}:$(pwd)"
-```
-
-Add this to your shell profile to make it permanent.
-
-## Evaluation
-
-### Standard evaluation (recommended)
-
-Evaluate a policy on a single task with the official protocol (100 episodes):
-
-```bash
-lerobot-eval \
-  --policy.path="your-hf-policy-id" \
-  --env.type=robotwin \
-  --env.task=beat_block_hammer \
-  --eval.batch_size=1 \
-  --eval.n_episodes=100
-```
-
-### Single-task quick check
-
-```bash
-lerobot-eval \
-  --policy.path="your-hf-policy-id" \
-  --env.type=robotwin \
-  --env.task=beat_block_hammer \
-  --eval.batch_size=1 \
-  --eval.n_episodes=5
-```
-
-### Multi-task sweep
-
-Evaluate on several tasks in one run:
-
-```bash
-lerobot-eval \
-  --policy.path="your-hf-policy-id" \
-  --env.type=robotwin \
-  --env.task=beat_block_hammer,click_bell,handover_block,stack_blocks_two \
-  --eval.batch_size=1 \
-  --eval.n_episodes=100
-```
-
-### Full benchmark (all 50 tasks)
-
-```bash
-lerobot-eval \
-  --policy.path="your-hf-policy-id" \
-  --env.type=robotwin \
-  --env.task=adjust_bottle,beat_block_hammer,blocks_ranking_rgb,blocks_ranking_size,click_alarmclock,click_bell,dump_bin_bigbin,grab_roller,handover_block,handover_mic,hanging_mug,lift_pot,move_can_pot,move_pillbottle_pad,move_playingcard_away,move_stapler_pad,open_microwave,pick_diverse_bottles,pick_dual_bottles,place_a2b_left,place_a2b_right,place_bread_basket,place_bread_skillet,place_burger_fries,place_can_basket,place_cans_plasticbox,place_container_plate,place_dual_shoes,place_empty_cup,place_fan,place_mouse_pad,place_object_basket,place_object_scale,place_object_stand,place_phone_stand,place_shoe,press_stapler,put_bottles_dustbin,put_object_cabinet,rotate_qrcode,scan_object,shake_bottle,shake_bottle_horizontally,stack_blocks_three,stack_blocks_two,stack_bowls_three,stack_bowls_two,stamp_seal,turn_switch \
-  --eval.batch_size=1 \
-  --eval.n_episodes=100
-```
-
-<Tip>
-  `open_laptop` is intentionally omitted above because of the upstream
-  `self.arm_tag` bug (see the **Available tasks** section). Re-add it once the
-  upstream fix lands.
-</Tip>
-
-## Camera configuration
-
-By default, all three cameras are included:
-
-| Camera key     | Description                    |
-| -------------- | ------------------------------ |
-| `head_camera`  | Torso-mounted overhead view    |
-| `left_camera`  | Left arm wrist-mounted camera  |
-| `right_camera` | Right arm wrist-mounted camera |
-
-To use a subset of cameras, override `--env.camera_names`:
-
-```bash
-lerobot-eval \
-  --policy.path="your-hf-policy-id" \
-  --env.type=robotwin \
-  --env.task=beat_block_hammer \
-  --env.camera_names="head_camera,left_camera" \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10
-```
-
-## Environment config reference
-
-Key parameters for `RoboTwinEnvConfig`:
-
-| Parameter            | Default                                  | Description                        |
-| -------------------- | ---------------------------------------- | ---------------------------------- |
-| `task`               | `"beat_block_hammer"`                    | Comma-separated task name(s)       |
-| `fps`                | `25`                                     | Simulation FPS                     |
-| `episode_length`     | `300`                                    | Max steps per episode              |
-| `obs_type`           | `"pixels_agent_pos"`                     | `"pixels"` or `"pixels_agent_pos"` |
-| `camera_names`       | `"head_camera,left_camera,right_camera"` | Comma-separated active cameras     |
-| `observation_height` | `240`                                    | Camera pixel height                |
-| `observation_width`  | `320`                                    | Camera pixel width                 |
-
-## Leaderboard submission
-
-Results can be submitted to the [RoboTwin 2.0 leaderboard](https://robotwin-platform.github.io/leaderboard). The official protocol requires:
-
- Training on 50 `demo_clean` demonstrations per task
- Evaluating 100 episodes per task
- Reporting success rate separately for **Easy** (`demo_clean`) and **Hard** (`demo_randomized`) settings
-
-For submission instructions, refer to the [RoboTwin 2.0 documentation](https://robotwin-platform.github.io/doc/).
@@ -46,7 +46,7 @@ This ensures identical task states map to consistent progress values, even acros

 ## Inputs and Targets (What the new code expects)

-SARM is trained through its processor (`src/lerobot/rewards/sarm/processor_sarm.py`), which:
+SARM is trained through its processor (`src/lerobot/policies/sarm/processor_sarm.py`), which:

 - **Encodes** images and task text with CLIP (ViT-B/32) into `video_features` and `text_features`
 - **Pads/truncates** robot state into `state_features` (up to `max_state_dim`)
@@ -347,7 +347,7 @@ Use `compute_rabc_weights.py` with `--visualize-only` to visualize model predict
 <hfoption id="single_stage">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -360,7 +360,7 @@ python -m lerobot.rewards.sarm.compute_rabc_weights \
 <hfoption id="dense_only">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -373,7 +373,7 @@ python -m lerobot.rewards.sarm.compute_rabc_weights \
 <hfoption id="dual">

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --visualize-only \
@@ -429,7 +429,7 @@ The weighting follows **Equations 8-9** from the paper:
 First, run the SARM model on all frames in your dataset to compute progress values:

 ```bash
-python -m lerobot.rewards.sarm.compute_rabc_weights \
+python src/lerobot/policies/sarm/compute_rabc_weights.py \
  --dataset-repo-id your-username/your-dataset \
  --reward-model-path your-username/sarm-model \
  --head-mode sparse \
@@ -465,15 +465,15 @@ This script:

 ### Step 5b: Train Policy with RA-BC

-Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`) if not explicitly provided. Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:
+Once you have the progress file, train your policy with RA-BC weighting. The progress file is auto-detected from the dataset path (`sarm_progress.parquet`). Currently PI0, PI0.5 and SmolVLA are supported with RA-BC:

 ```bash
 lerobot-train \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=pi0 \
-  --sample_weighting.type=rabc \
-  --sample_weighting.head_mode=sparse \
-  --sample_weighting.kappa=0.01 \
+  --use_rabc=true \
+  --rabc_head_mode=sparse \
+  --rabc_kappa=0.01 \
  --output_dir=outputs/train/policy_rabc \
  --batch_size=32 \
  --steps=40000
@@ -488,13 +488,12 @@ The training script automatically:

 **RA-BC Arguments:**

-| Argument                           | Description                                            | Default                 |
-| ---------------------------------- | ------------------------------------------------------ | ----------------------- |
-| `--sample_weighting.type`          | Weighting strategy type (`rabc` or `uniform`)          | `rabc`                  |
-| `--sample_weighting.progress_path` | Path to progress parquet file                          | `sarm_progress.parquet` |
-| `--sample_weighting.head_mode`     | Which SARM head's progress to use: `sparse` or `dense` | `sparse`                |
-| `--sample_weighting.kappa`         | Threshold κ for high-quality samples                   | `0.01`                  |
-| `--sample_weighting.epsilon`       | Small constant for numerical stability                 | `1e-6`                  |
+| Argument               | Description                                                | Default                            |
+| ---------------------- | ---------------------------------------------------------- | ---------------------------------- |
+| `--use_rabc`           | Enable RA-BC sample weighting                              | `false`                            |
+| `--rabc_progress_path` | Path to progress parquet file (auto-detected from dataset) | `sarm_progress.parquet` in dataset |
+| `--rabc_head_mode`     | Which SARM head's progress to use: `sparse` or `dense`     | `sparse`                           |
+| `--rabc_kappa`         | Threshold κ for high-quality samples                       | `0.01`                             |

 ### Tuning RA-BC Kappa

@@ -512,30 +511,30 @@ The `kappa` parameter is the threshold that determines which samples get full we

 Monitor these WandB metrics during training:

-| Metric                        | Healthy Range | Problem Indicator         |
-| ----------------------------- | ------------- | ------------------------- |
-| `sample_weight_mean_weight`   | 0.3 - 0.8     | ≈ 1.0 means kappa too low |
-| `sample_weighting/delta_mean` | > 0           | Should be positive        |
-| `sample_weighting/delta_std`  | > 0           | Variance in data quality  |
+| Metric             | Healthy Range | Problem Indicator         |
+| ------------------ | ------------- | ------------------------- |
+| `rabc_mean_weight` | 0.3 - 0.8     | ≈ 1.0 means kappa too low |
+| `rabc_delta_mean`  | > 0           | Should be positive        |
+| `rabc_delta_std`   | > 0           | Variance in data quality  |

-**If `sample_weight_mean_weight ≈ 1.0`:** Your kappa is too low. Most samples have `delta > kappa` and bypass the soft-weighting entirely. RA-BC becomes equivalent to vanilla BC.
+**If `rabc_mean_weight ≈ 1.0`:** Your kappa is too low. Most samples have `delta > kappa` and bypass the soft-weighting entirely. RA-BC becomes equivalent to vanilla BC.

 **Setting kappa based on your data:**

-The default `kappa=0.01` was tuned for the paper's T-shirt folding task (~90s episodes at 30fps). For your dataset, check the logged `sample_weighting/delta_mean` and `sample_weighting/delta_std`:
+The default `kappa=0.01` was tuned for the paper's T-shirt folding task (~90s episodes at 30fps). For your dataset, check the logged `rabc_delta_mean` and `rabc_delta_std`:

 ```
 # If delta_mean ≈ 0.03 and delta_std ≈ 0.02:
 # Most deltas fall in range [0.01, 0.05]

 # Option 1: Set kappa = delta_mean (medium selectivity)
--sample_weighting.kappa=0.03
+--rabc_kappa=0.03

 # Option 2: Set kappa = delta_mean + delta_std (high selectivity)
--sample_weighting.kappa=0.05
+--rabc_kappa=0.05

 # Option 3: Set kappa = delta_mean + 2*delta_std (very selective)
--sample_weighting.kappa=0.07
+--rabc_kappa=0.07
 ```

 **When RA-BC may not help:**
@@ -551,8 +550,8 @@ accelerate launch \
  src/lerobot/scripts/lerobot_train.py \
  --dataset.repo_id=your-username/your-dataset \
  --policy.type=pi0 \
-  --sample_weighting.type=rabc \
-  --sample_weighting.kappa=0.01 \
+  --use_rabc=true \
+  --rabc_kappa=0.01 \
  --output_dir=outputs/train/policy_rabc \
  --batch_size=32 \
  --steps=40000
@@ -577,7 +576,7 @@ accelerate launch \
 ### RA-BC

 1. **Train SARM first**: RA-BC quality depends entirely on SARM quality
-2. **Monitor `sample_weight_mean_weight`**: If it's ≈ 1.0, increase kappa (see [Tuning RA-BC Kappa](#tuning-ra-bc-kappa))
+2. **Monitor `rabc_mean_weight`**: If it's ≈ 1.0, increase kappa (see [Tuning RA-BC Kappa](#tuning-ra-bc-kappa))

 ---

@@ -274,8 +274,7 @@ python src/lerobot/scripts/lerobot_train.py \
 Once trained, we recommend deploying policies using inference-time RTC:

 ```bash
-lerobot-rollout \
-  --strategy.type=base \
+python examples/rtc/eval_with_real_robot.py \
  --policy.path=your-username/your-repo-id \
  --policy.device=cuda \
  --robot.type=unitree_g1 \
@@ -1,176 +0,0 @@
-# VLABench
-
-[VLABench](https://github.com/OpenMOSS/VLABench) is a large-scale benchmark for **language-conditioned robotic manipulation with long-horizon reasoning**. The upstream suite covers 100 task categories across 2,000+ objects and evaluates six dimensions of robot intelligence: mesh & texture understanding, spatial reasoning, world-knowledge transfer, semantic instruction comprehension, physical-law understanding, and long-horizon planning. Built on MuJoCo / dm_control with a Franka Panda 7-DOF arm. LeRobot exposes **43 of these tasks** through `--env.task` (21 primitives + 22 composites, see [Available tasks](#available-tasks) below).
-
- Paper: [VLABench: A Large-Scale Benchmark for Language-Conditioned Robotics Manipulation with Long-Horizon Reasoning](https://arxiv.org/abs/2412.18194)
- GitHub: [OpenMOSS/VLABench](https://github.com/OpenMOSS/VLABench)
- Project website: [vlabench.github.io](https://vlabench.github.io)
- Pretrained policy: [`lerobot/smolvla_vlabench`](https://huggingface.co/lerobot/smolvla_vlabench)
-
-<img
-  src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/vlabench.png"
-  alt="VLABench benchmark overview"
-  width="85%"
-/>
-
-## Available tasks
-
-VLABench ships two task suites covering **43 task categories** in LeRobot's `--env.task` surface:
-
-| Suite     | CLI name    | Tasks | Description                                                      |
-| --------- | ----------- | ----- | ---------------------------------------------------------------- |
-| Primitive | `primitive` | 21    | Single / few-skill combinations (select, insert, physics QA)     |
-| Composite | `composite` | 22    | Multi-step reasoning and long-horizon planning (cook, rearrange) |
-
-**Primitive tasks:** `select_fruit`, `select_toy`, `select_chemistry_tube`, `add_condiment`, `select_book`, `select_painting`, `select_drink`, `insert_flower`, `select_billiards`, `select_ingredient`, `select_mahjong`, `select_poker`, and physical-reasoning tasks (`density_qa`, `friction_qa`, `magnetism_qa`, `reflection_qa`, `simple_cuestick_usage`, `simple_seesaw_usage`, `sound_speed_qa`, `thermal_expansion_qa`, `weight_qa`).
-
-**Composite tasks:** `cluster_billiards`, `cluster_book`, `cluster_drink`, `cluster_toy`, `cook_dishes`, `cool_drink`, `find_unseen_object`, `get_coffee`, `hammer_nail`, `heat_food`, `make_juice`, `play_mahjong`, `play_math_game`, `play_poker`, `play_snooker`, `rearrange_book`, `rearrange_chemistry_tube`, `set_dining_table`, `set_study_table`, `store_food`, `take_chemistry_experiment`, `use_seesaw_complex`.
-
-`--env.task` accepts three forms:
-
- a single task name (`select_fruit`)
- a comma-separated list (`select_fruit,heat_food`)
- a suite shortcut (`primitive`, `composite`, or `primitive,composite`)
-
-## Installation
-
-VLABench is **not on PyPI** — its only distribution is the [OpenMOSS/VLABench](https://github.com/OpenMOSS/VLABench) GitHub repo — so LeRobot does not expose a `vlabench` extra. Install it manually as an editable clone, alongside the MuJoCo / dm_control pins VLABench needs, then fetch the mesh assets:
-
-```bash
-# After following the standard LeRobot installation instructions.
-
-git clone https://github.com/OpenMOSS/VLABench.git ~/VLABench
-git clone https://github.com/motion-planning/rrt-algorithms.git ~/rrt-algorithms
-pip install -e ~/VLABench -e ~/rrt-algorithms
-pip install "mujoco==3.2.2" "dm-control==1.0.22" \
-            open3d colorlog scikit-learn openai gdown
-
-python ~/VLABench/scripts/download_assets.py
-```
-
-<Tip>
-VLABench requires Linux (`sys_platform == 'linux'`) and Python 3.10+. Set the MuJoCo rendering backend before running:
-
-```bash
-export MUJOCO_GL=egl  # for headless servers (HPC, cloud)
-```
-
-</Tip>
-
-## Evaluation
-
-All eval snippets below mirror the command CI runs (see `.github/workflows/benchmark_tests.yml`). The `--rename_map` argument maps VLABench's `image` / `second_image` / `wrist_image` camera keys onto the three-camera (`camera1` / `camera2` / `camera3`) input layout the released `smolvla_vlabench` policy was trained on.
-
-### Single-task evaluation (recommended for quick iteration)
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_vlabench \
-  --env.type=vlabench \
-  --env.task=select_fruit \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.second_image": "observation.images.camera2", "observation.images.wrist_image": "observation.images.camera3"}'
-```
-
-### Multi-task evaluation
-
-Pass a comma-separated list of tasks:
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_vlabench \
-  --env.type=vlabench \
-  --env.task=select_fruit,select_toy,add_condiment,heat_food \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.second_image": "observation.images.camera2", "observation.images.wrist_image": "observation.images.camera3"}'
-```
-
-### Suite-wide evaluation
-
-Run an entire suite (all 21 primitives or all 22 composites):
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_vlabench \
-  --env.type=vlabench \
-  --env.task=primitive \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  --env.max_parallel_tasks=1 \
-  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.second_image": "observation.images.camera2", "observation.images.wrist_image": "observation.images.camera3"}'
-```
-
-Or both suites:
-
-```bash
-lerobot-eval \
-  --policy.path=lerobot/smolvla_vlabench \
-  --env.type=vlabench \
-  --env.task=primitive,composite \
-  --eval.batch_size=1 \
-  --eval.n_episodes=10 \
-  --eval.use_async_envs=false \
-  --policy.device=cuda \
-  --env.max_parallel_tasks=1 \
-  '--rename_map={"observation.images.image": "observation.images.camera1", "observation.images.second_image": "observation.images.camera2", "observation.images.wrist_image": "observation.images.camera3"}'
-```
-
-### Recommended evaluation episodes
-
-**10 episodes per task** for reproducible benchmarking (210 total for the full primitive suite, 220 for composite). Matches the protocol in the VLABench paper.
-
-## Policy inputs and outputs
-
-**Observations:**
-
- `observation.state` — 7-dim end-effector state (position xyz + Euler xyz + gripper)
- `observation.images.image` — front camera, 480×480 HWC uint8
- `observation.images.second_image` — second camera, 480×480 HWC uint8
- `observation.images.wrist_image` — wrist camera, 480×480 HWC uint8
-
-**Actions:**
-
- Continuous control in `Box(-1, 1, shape=(7,))` — 3D position + 3D Euler orientation + 1D gripper.
-
-## Training
-
-### Datasets
-
-Pre-collected VLABench datasets in LeRobot format on the Hub:
-
- [`VLABench/vlabench_primitive_ft_lerobot_video`](https://huggingface.co/datasets/VLABench/vlabench_primitive_ft_lerobot_video) — 5,000 episodes, 128 tasks, 480×480 images.
- [`VLABench/vlabench_composite_ft_lerobot_video`](https://huggingface.co/datasets/VLABench/vlabench_composite_ft_lerobot_video) — 5,977 episodes, 167 tasks, 224×224 images.
-
-### Example training command
-
-Fine-tune a SmolVLA base on the primitive suite:
-
-```bash
-lerobot-train \
-  --policy.type=smolvla \
-  --policy.repo_id=${HF_USER}/smolvla_vlabench_primitive \
-  --policy.load_vlm_weights=true \
-  --policy.push_to_hub=true \
-  --dataset.repo_id=VLABench/vlabench_primitive_ft_lerobot_video \
-  --env.type=vlabench \
-  --env.task=select_fruit \
-  --output_dir=./outputs/smolvla_vlabench_primitive \
-  --steps=100000 \
-  --batch_size=4 \
-  --eval_freq=5000 \
-  --eval.batch_size=1 \
-  --eval.n_episodes=1 \
-  --save_freq=10000
-```
-
-## Reproducing published results
-
-The released checkpoint [`lerobot/smolvla_vlabench`](https://huggingface.co/lerobot/smolvla_vlabench) was trained on the primitive-suite dataset above and is evaluated with the [Single-task](#single-task-evaluation-recommended-for-quick-iteration) / [Suite-wide](#suite-wide-evaluation) commands. CI runs a 10-primitive-task smoke eval (one episode each) on every PR touching the benchmark.
@@ -220,7 +220,7 @@ REAL_DIM = 12
 # Postprocessing: Trim 20D predictions to 12D for deployment
 ```

-See the [action_hub.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/xvla/action_hub.py) implementation for details.
+See the [action_hub.py](/home/jade_choghari/robot/lerobot/src/lerobot/policies/xvla/action_hub.py) implementation for details.

 #### Auto Action Mode (Recommended)

@@ -519,9 +519,9 @@ If you use X-VLA in your research, please cite:

 - [X-VLA Paper](https://arxiv.org/pdf/2510.10274)
 - [LeRobot Documentation](https://github.com/huggingface/lerobot)
- [Action Registry Implementation](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/xvla/action_hub.py)
- [Processor Implementation](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/xvla/processor_xvla.py)
- [Model Configuration](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/xvla/configuration_xvla.py)
+- [Action Registry Implementation](https://github.com/huggingface/lerobot/src/lerobot/policies/xvla/action_hub.py)
+- [Processor Implementation](https://github.com/huggingface/lerobot/src/lerobot/policies/xvla/processor_xvla.py)
+- [Model Configuration](https://github.com/huggingface/lerobot/src/lerobot/policies/xvla/configuration_xvla.py)

 ## Contributing

@@ -69,7 +69,7 @@ class ComputeProgressShards(PipelineStep):
        import torch
        from tqdm import tqdm

-        from lerobot.rewards.sarm.compute_rabc_weights import (
+        from lerobot.policies.sarm.compute_rabc_weights import (
            generate_all_frame_indices,
            interpolate_progress,
            load_sarm_resources,
@@ -10,7 +10,7 @@ from lerobot.datasets import LeRobotDataset
 from lerobot.envs.configs import HILSerlProcessorConfig, HILSerlRobotEnvConfig
 from lerobot.policies import SACConfig
 from lerobot.policies.sac.modeling_sac import SACPolicy
-from lerobot.rewards.classifier.modeling_classifier import Classifier
+from lerobot.policies.sac.reward_model.modeling_classifier import Classifier
 from lerobot.rl.buffer import ReplayBuffer
 from lerobot.rl.gym_manipulator import make_robot_env
 from lerobot.robots.so_follower import SO100FollowerConfig
@@ -1,7 +1,7 @@
 import torch

 from lerobot.datasets import LeRobotDataset
-from lerobot.rewards import RewardClassifierConfig, make_reward_model, make_reward_pre_post_processors
+from lerobot.policies import RewardClassifierConfig, make_policy, make_pre_post_processors


 def main():
@@ -22,10 +22,10 @@ def main():
        model_name="microsoft/resnet-18",
    )

-    # Make reward model, preprocessor, and optimizer
-    reward_model = make_reward_model(config, dataset_stats=dataset.meta.stats)
-    optimizer = config.get_optimizer_preset().build(reward_model.parameters())
-    preprocessor, _ = make_reward_pre_post_processors(config, dataset_stats=dataset.meta.stats)
+    # Make policy, preprocessor, and optimizer
+    policy = make_policy(config, ds_meta=dataset.meta)
+    optimizer = config.get_optimizer_preset().build(policy.parameters())
+    preprocessor, _ = make_pre_post_processors(policy_cfg=config, dataset_stats=dataset.meta.stats)

    classifier_id = "<user>/reward_classifier_hil_serl_example"

@@ -42,7 +42,7 @@ def main():
            batch = preprocessor(batch)

            # Forward pass
-            loss, output_dict = reward_model.forward(batch)
+            loss, output_dict = policy.forward(batch)

            # Backward pass and optimization
            optimizer.zero_grad()
@@ -58,8 +58,8 @@ def main():

    print("Training finished!")

-    # You can now save the trained reward model.
-    reward_model.push_to_hub(classifier_id)
+    # You can now save the trained policy.
+    policy.push_to_hub(classifier_id)


 if __name__ == "__main__":
@@ -212,20 +212,6 @@ aloha = ["lerobot[dataset]", "gym-aloha>=0.1.2,<0.2.0", "lerobot[scipy-dep]"]
 pusht = ["lerobot[dataset]", "gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
 libero = ["lerobot[dataset]", "lerobot[transformers-dep]", "hf-libero>=0.1.3,<0.2.0; sys_platform == 'linux'", "lerobot[scipy-dep]"]
 metaworld = ["lerobot[dataset]", "metaworld==3.0.0", "lerobot[scipy-dep]"]
-# NOTE: vlabench is NOT exposed as a `lerobot` extra. Its only distribution
-# is the OpenMOSS/VLABench GitHub repo (package name `VLABench`, no PyPI
-# release), so any `vlabench>=X` pip spec is unresolvable. Install it
-# manually alongside MuJoCo / dm-control — see docs/source/vlabench.mdx
-# for the recipe.
-# NOTE: robomme is NOT a pyproject extra — mani-skill hard-pins numpy<2
-# which conflicts with lerobot's numpy>=2 base pin, so the two trees can't
-# resolve into a single env. Install it only in the RoboMME Docker image
-# via `uv pip install --override` (see docker/Dockerfile.benchmark.robomme).
-# NOTE: robocasa is NOT exposed as a `lerobot` extra. Its setup.py pins
-# `lerobot==0.3.3` in install_requires, which cyclically shadows our own
-# workspace `lerobot` and makes the graph unsolvable under any resolver
-# (uv, pip). Install it manually alongside robosuite — see
-# docs/source/robocasa.mdx for the recipe.

 # All
 all = [
@@ -31,23 +31,9 @@ from __future__ import annotations

 import argparse
 import json
-import re
 import sys
 from pathlib import Path

-# LIBERO-plus derives task.language by space-joining the perturbation-variant
-# filename (grab_language_from_filename in libero/libero/benchmark/__init__.py),
-# so non-_language_ variants inherit a trailing metadata blob like
-# "view 0 0 100 0 0 initstate 0 noise 45" or "add 16". Strip those tokens so
-# the description matches the base instruction used in the training dataset.
-_LIBERO_PERTURBATION_TAIL_RE = re.compile(
-    r"(?:\s(?:view|initstate|noise|add|tb|table|light|level)(?:\s\d+)+)+$"
-)
-
-
-def _strip_libero_perturbation_tail(instruction: str) -> str:
-    return _LIBERO_PERTURBATION_TAIL_RE.sub("", instruction).strip()
-

 def _libero_descriptions(task_suite: str) -> dict[str, str]:
    from libero.libero import benchmark  # type: ignore[import-untyped]
@@ -61,10 +47,7 @@ def _libero_descriptions(task_suite: str) -> dict[str, str]:
        )
        return {}
    suite = suite_dict[task_suite]()
-    return {
-        f"{task_suite}_{i}": _strip_libero_perturbation_tail(suite.get_task(i).language)
-        for i in range(suite.n_tasks)
-    }
+    return {f"{task_suite}_{i}": suite.get_task(i).language for i in range(suite.n_tasks)}


 def _metaworld_descriptions(task_name: str) -> dict[str, str]:
@@ -74,120 +57,19 @@ def _metaworld_descriptions(task_name: str) -> dict[str, str]:
    return {f"{task_name}_0": label}


-def _robotwin_descriptions(task_names: str) -> dict[str, str]:
-    """Return descriptions for each requested RoboTwin task. Reads
-    `description/task_instruction/<task>.json` from the RoboTwin clone
-    (cwd is /opt/robotwin in CI). Falls back to the task name if missing."""
-    out: dict[str, str] = {}
-    root = Path("description/task_instruction")
-    for name in (t.strip() for t in task_names.split(",") if t.strip()):
-        desc_file = root / f"{name}.json"
-        desc = name.replace("_", " ")
-        if desc_file.is_file():
-            data = json.loads(desc_file.read_text())
-            full = data.get("full_description") or desc
-            # Strip the schema placeholders ({A}, {a}) — keep the sentence readable.
-            desc = full.replace("<", "").replace(">", "")
-        out[f"{name}_0"] = desc
-    return out
-
-
-def _robocasa_descriptions(task_spec: str) -> dict[str, str]:
-    """For each task in the comma-separated list, emit a cleaned-name label.
-
-    RoboCasa episodes carry their language instruction in the env's
-    `ep_meta['lang']`, populated per reset. Pulling it requires spinning
-    up the full kitchen env per task (~seconds each); we use the task
-    name as the key here and let the eval's episode info carry the
-    actual instruction.
-    """
-    out: dict[str, str] = {}
-    for task in (t.strip() for t in task_spec.split(",") if t.strip()):
-        # Split CamelCase into words: "CloseFridge" → "close fridge".
-        label = "".join(f" {c.lower()}" if c.isupper() else c for c in task).strip()
-        out[f"{task}_0"] = label or task
-    return out
-
-
-_ROBOMME_DESCRIPTIONS = {
-    "BinFill": "Fill the target bin with the correct number of cubes",
-    "PickXtimes": "Pick the indicated cube the specified number of times",
-    "SwingXtimes": "Swing the object the specified number of times",
-    "StopCube": "Grasp and stop the moving cube",
-    "VideoUnmask": "Pick the cube shown in the reference video",
-    "VideoUnmaskSwap": "Pick the cube matching the reference video after a swap",
-    "ButtonUnmask": "Press the button indicated by the reference",
-    "ButtonUnmaskSwap": "Press the correct button after objects are swapped",
-    "PickHighlight": "Pick the highlighted cube",
-    "VideoRepick": "Repick the cube shown in the reference video",
-    "VideoPlaceButton": "Place the cube on the button shown in the video",
-    "VideoPlaceOrder": "Place cubes in the order shown in the video",
-    "MoveCube": "Move the cube to the target location",
-    "InsertPeg": "Insert the peg into the target hole",
-    "PatternLock": "Unlock the pattern by pressing buttons in sequence",
-    "RouteStick": "Route the stick through the required waypoints",
-}
-
-
-def _robomme_descriptions(task_names: str, task_ids: list[int] | None = None) -> dict[str, str]:
-    """Return descriptions for each requested RoboMME task. Keys match the
-    video filename pattern `<task>_<task_id>` used by the eval script."""
-    if task_ids is None:
-        task_ids = [0]
-    out: dict[str, str] = {}
-    for name in (t.strip() for t in task_names.split(",") if t.strip()):
-        desc = _ROBOMME_DESCRIPTIONS.get(name, name)
-        for tid in task_ids:
-            out[f"{name}_{tid}"] = desc
-    return out
-
-
-def _vlabench_descriptions(task_spec: str) -> dict[str, str]:
-    """For each task in the comma-separated list, emit a cleaned-name label.
-
-    VLABench tasks carry language instructions on their dm_control task
-    object, but pulling them requires loading the full env per task
-    (~seconds each). The CI smoke-eval already captures the instruction
-    inside its episode info; this mapping is just enough to key
-    `metrics.json` by `<task>_0`.
-    """
-    out: dict[str, str] = {}
-    for task in (t.strip() for t in task_spec.split(",") if t.strip()):
-        out[f"{task}_0"] = task.replace("_", " ").strip()
-    return out
-
-
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--env", required=True, help="Environment family (libero, metaworld, ...)")
    parser.add_argument("--task", required=True, help="Task/suite name (e.g. libero_spatial)")
-    parser.add_argument(
-        "--task-ids",
-        type=str,
-        default=None,
-        help="Comma-separated task IDs (e.g. '0,1,2'). Default: [0]",
-    )
    parser.add_argument("--output", required=True, help="Path to write task_descriptions.json")
    args = parser.parse_args()

-    task_ids: list[int] | None = None
-    if args.task_ids:
-        task_ids = [int(x.strip()) for x in args.task_ids.split(",")]
-
    descriptions: dict[str, str] = {}
    try:
-        if args.env == ("libero", "libero_plus"):
+        if args.env == "libero":
            descriptions = _libero_descriptions(args.task)
        elif args.env == "metaworld":
            descriptions = _metaworld_descriptions(args.task)
-        elif args.env == "robotwin":
-            descriptions = _robotwin_descriptions(args.task)
-        elif args.env == "robocasa":
-            descriptions = _robocasa_descriptions(args.task)
-        elif args.env == "robomme":
-            descriptions = _robomme_descriptions(args.task, task_ids=task_ids)
-        elif args.env == "vlabench":
-            descriptions = _vlabench_descriptions(args.task)
        else:
            print(
                f"[extract_task_descriptions] No description extractor for env '{args.env}'.",
@@ -17,7 +17,6 @@ Provides the RealSenseCamera class for capturing frames from Intel RealSense cam
 """

 import logging
-import sys
 import time
 from threading import Event, Lock, Thread
 from typing import TYPE_CHECKING, Any
@@ -42,7 +41,6 @@ from ..utils import get_cv2_rotation
 from .configuration_realsense import RealSenseCameraConfig

 logger = logging.getLogger(__name__)
-pkg_name = "pyrealsense2-macosx" if sys.platform == "darwin" else "pyrealsense2"


 class RealSenseCamera(Camera):
@@ -116,7 +114,7 @@ class RealSenseCamera(Camera):
        Args:
            config: The configuration settings for the camera.
        """
-        require_package(pkg_name, extra="intelrealsense", import_name="pyrealsense2")
+        require_package("pyrealsense2", extra="intelrealsense")
        super().__init__(config)

        self.config = config
@@ -41,12 +41,8 @@ def cfg_to_group(
            return tag
        return tag[:max_tag_length]

-    if cfg.is_reward_model_training:
-        trainable_tag = f"reward_model:{cfg.reward_model.type}"
-    else:
-        trainable_tag = f"policy:{cfg.policy.type}"
    lst = [
-        trainable_tag,
+        f"policy:{cfg.policy.type}",
        f"seed:{cfg.seed}",
    ]
    if cfg.dataset is not None:
@@ -14,7 +14,7 @@

 """Shared dataset recording configuration used by both ``lerobot-record`` and ``lerobot-rollout``."""

-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path

@@ -68,13 +68,10 @@ class DatasetRecordConfig:
    # Number of threads per encoder instance. None = auto (codec default).
    # Lower values reduce CPU usage, maps to 'lp' (via svtav1-params) for libsvtav1 and 'threads' for h264/hevc..
    encoder_threads: int | None = None
+    # Rename map for the observation to override the image and state keys
+    rename_map: dict[str, str] = field(default_factory=dict)

-    def stamp_repo_id(self) -> None:
-        """Append a date-time tag to ``repo_id`` so each recording session gets a unique name.
-
-        Must be called explicitly at dataset *creation* time — not on resume,
-        where the existing ``repo_id`` (already stamped) must be preserved.
-        """
+    def __post_init__(self) -> None:
        if self.repo_id:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.repo_id = f"{self.repo_id}_{timestamp}"
@@ -1,163 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-import builtins
-import json
-import logging
-import os
-import tempfile
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, TypeVar
-
-import draccus
-from huggingface_hub import hf_hub_download
-from huggingface_hub.constants import CONFIG_NAME
-from huggingface_hub.errors import HfHubHTTPError
-
-from lerobot.configs.types import PolicyFeature
-from lerobot.optim.optimizers import OptimizerConfig
-from lerobot.optim.schedulers import LRSchedulerConfig
-from lerobot.utils.device_utils import auto_select_torch_device, is_torch_device_available
-from lerobot.utils.hub import HubMixin
-
-T = TypeVar("T", bound="RewardModelConfig")
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class RewardModelConfig(draccus.ChoiceRegistry, HubMixin, abc.ABC):
-    """Base configuration for reward models.
-
-    Args:
-    input_features: A dictionary defining the PolicyFeature of the input data for the reward. The key represents
-        the input data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
-    output_features: A dictionary defining the PolicyFeature of the output data for the reward. The key represents
-        the output data name, and the value is PolicyFeature, which consists of FeatureType and shape attributes.
-    """
-
-    # Reuses PolicyFeature
-    input_features: dict[str, PolicyFeature] = field(default_factory=dict)
-    output_features: dict[str, PolicyFeature] = field(default_factory=dict)
-
-    device: str | None = None
-
-    pretrained_path: str | None = None
-
-    push_to_hub: bool = False
-    repo_id: str | None = None
-
-    # Hub metadata
-    license: str | None = None
-    tags: list[str] | None = None
-    private: bool | None = None
-
-    def __post_init__(self) -> None:
-        if not self.device or not is_torch_device_available(self.device):
-            auto_device = auto_select_torch_device()
-            logger.warning(f"Device '{self.device}' is not available. Switching to '{auto_device}'.")
-            self.device = auto_device.type
-
-    @property
-    def type(self) -> str:
-        choice_name = self.get_choice_name(self.__class__)
-        if not isinstance(choice_name, str):
-            raise TypeError(f"Expected string from get_choice_name, got {type(choice_name)}")
-        return choice_name
-
-    @property
-    def observation_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    @property
-    def action_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    @property
-    def reward_delta_indices(self) -> list | None:  # type: ignore[type-arg]
-        return None
-
-    @abc.abstractmethod
-    def get_optimizer_preset(self) -> OptimizerConfig:
-        raise NotImplementedError
-
-    def get_scheduler_preset(self) -> LRSchedulerConfig | None:
-        return None
-
-    def validate_features(self) -> None:
-        pass
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        with open(save_directory / CONFIG_NAME, "w") as f, draccus.config_type("json"):
-            draccus.dump(self, f, indent=4)
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict[Any, Any] | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        **reward_kwargs: Any,
-    ) -> T:
-        model_id = str(pretrained_name_or_path)
-        config_file: str | None = None
-        if Path(model_id).is_dir():
-            if CONFIG_NAME in os.listdir(model_id):
-                config_file = os.path.join(model_id, CONFIG_NAME)
-            else:
-                logger.error(f"{CONFIG_NAME} not found in {Path(model_id).resolve()}")
-        else:
-            try:
-                config_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=CONFIG_NAME,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{CONFIG_NAME} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        if config_file is None:
-            raise FileNotFoundError(f"{CONFIG_NAME} not found in {model_id}")
-
-        # HACK: Parse the original config to get the config subclass, so that we can
-        # apply cli overrides.
-        with draccus.config_type("json"):
-            orig_config = draccus.parse(cls, config_file, args=[])
-
-        with open(config_file) as f:
-            config = json.load(f)
-
-        config.pop("type", None)
-        with tempfile.NamedTemporaryFile("w+", delete=False, suffix=".json") as f:
-            json.dump(config, f)
-            config_file = f.name
-
-        cli_overrides = reward_kwargs.pop("cli_overrides", [])
-        with draccus.config_type("json"):
-            return draccus.parse(orig_config.__class__, config_file, args=cli_overrides)
@@ -26,11 +26,9 @@ from lerobot import envs
 from lerobot.configs import parser
 from lerobot.optim import LRSchedulerConfig, OptimizerConfig
 from lerobot.utils.hub import HubMixin
-from lerobot.utils.sample_weighting import SampleWeightingConfig

 from .default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig
 from .policies import PreTrainedConfig
-from .rewards import RewardModelConfig

 TRAIN_CONFIG_NAME = "train_config.json"

@@ -40,7 +38,6 @@ class TrainPipelineConfig(HubMixin):
    dataset: DatasetConfig
    env: envs.EnvConfig | None = None
    policy: PreTrainedConfig | None = None
-    reward_model: RewardModelConfig | None = None
    # Set `dir` to where you would like to save all of the run outputs. If you run another training session
    # with the same value for `dir` its contents will be overwritten unless you set `resume` to true.
    output_dir: Path | None = None
@@ -75,41 +72,27 @@ class TrainPipelineConfig(HubMixin):
    wandb: WandBConfig = field(default_factory=WandBConfig)
    peft: PeftConfig | None = None

-    # Sample weighting configuration (e.g., for RA-BC training)
-    sample_weighting: SampleWeightingConfig | None = None
+    # RA-BC (Reward-Aligned Behavior Cloning) parameters
+    use_rabc: bool = False  # Enable reward-weighted training
+    rabc_progress_path: str | None = None  # Path to precomputed SARM progress parquet file
+    rabc_kappa: float = 0.01  # Hard threshold for high-quality samples
+    rabc_epsilon: float = 1e-6  # Small constant for numerical stability
+    rabc_head_mode: str | None = "sparse"  # For dual-head models: "sparse" or "dense"

    # Rename map for the observation to override the image and state keys
    rename_map: dict[str, str] = field(default_factory=dict)
    checkpoint_path: Path | None = field(init=False, default=None)

-    @property
-    def is_reward_model_training(self) -> bool:
-        """True when the config targets a reward model rather than a policy."""
-        return self.reward_model is not None
-
-    @property
-    def trainable_config(self) -> PreTrainedConfig | RewardModelConfig:
-        """Return whichever config (policy or reward_model) is active."""
-        if self.is_reward_model_training:
-            return self.reward_model  # type: ignore[return-value]
-        return self.policy  # type: ignore[return-value]
-
    def validate(self) -> None:
        # HACK: We parse again the cli args here to get the pretrained paths if there was some.
        policy_path = parser.get_path_arg("policy")
-        reward_model_path = parser.get_path_arg("reward_model")
-
-        if reward_model_path:
-            cli_overrides = parser.get_cli_overrides("reward_model")
-            self.reward_model = RewardModelConfig.from_pretrained(
-                reward_model_path, cli_overrides=cli_overrides
-            )
-            self.reward_model.pretrained_path = str(Path(reward_model_path))
-        elif policy_path:
+        if policy_path:
+            # Only load the policy config
            cli_overrides = parser.get_cli_overrides("policy")
            self.policy = PreTrainedConfig.from_pretrained(policy_path, cli_overrides=cli_overrides)
            self.policy.pretrained_path = Path(policy_path)
        elif self.resume:
+            # The entire train config is already loaded, we just need to get the checkpoint dir
            config_path = parser.parse_arg("config_path")
            if not config_path:
                raise ValueError(
@@ -125,22 +108,18 @@ class TrainPipelineConfig(HubMixin):
            policy_dir = Path(config_path).parent
            if self.policy is not None:
                self.policy.pretrained_path = policy_dir
-            if self.reward_model is not None:
-                self.reward_model.pretrained_path = str(policy_dir)
            self.checkpoint_path = policy_dir.parent

-        if self.policy is None and self.reward_model is None:
+        if self.policy is None:
            raise ValueError(
-                "Neither policy nor reward_model is configured. "
-                "Please specify one with `--policy.path` or `--reward_model.path`."
+                "Policy is not configured. Please specify a pretrained policy with `--policy.path`."
            )

-        active_cfg = self.trainable_config
        if not self.job_name:
            if self.env is None:
-                self.job_name = f"{active_cfg.type}"
+                self.job_name = f"{self.policy.type}"
            else:
-                self.job_name = f"{self.env.type}_{active_cfg.type}"
+                self.job_name = f"{self.env.type}_{self.policy.type}"

        if not self.resume and isinstance(self.output_dir, Path) and self.output_dir.is_dir():
            raise FileExistsError(
@@ -158,16 +137,26 @@ class TrainPipelineConfig(HubMixin):
        if not self.use_policy_training_preset and (self.optimizer is None or self.scheduler is None):
            raise ValueError("Optimizer and Scheduler must be set when the policy presets are not used.")
        elif self.use_policy_training_preset and not self.resume:
-            self.optimizer = active_cfg.get_optimizer_preset()
-            self.scheduler = active_cfg.get_scheduler_preset()
+            self.optimizer = self.policy.get_optimizer_preset()
+            self.scheduler = self.policy.get_scheduler_preset()

-        if hasattr(active_cfg, "push_to_hub") and active_cfg.push_to_hub and not active_cfg.repo_id:
-            raise ValueError("'repo_id' argument missing. Please specify it to push the model to the hub.")
+        if self.policy.push_to_hub and not self.policy.repo_id:
+            raise ValueError(
+                "'policy.repo_id' argument missing. Please specify it to push the model to the hub."
+            )
+
+        if self.use_rabc and not self.rabc_progress_path:
+            # Auto-detect from dataset path
+            repo_id = self.dataset.repo_id
+            if self.dataset.root:
+                self.rabc_progress_path = str(Path(self.dataset.root) / "sarm_progress.parquet")
+            else:
+                self.rabc_progress_path = f"hf://datasets/{repo_id}/sarm_progress.parquet"

    @classmethod
    def __get_path_fields__(cls) -> list[str]:
-        """Keys for draccus pretrained-path loading."""
-        return ["policy", "reward_model"]
+        """This enables the parser to load config from the policy using `--policy.path=local/dir`"""
+        return ["policy"]

    def to_dict(self) -> dict[str, Any]:
        return draccus.encode(self)  # type: ignore[no-any-return]  # because of the third-party library draccus uses Any as the return type
@@ -97,8 +97,8 @@ def update_data_df(df, src_meta, dst_meta):
        pd.DataFrame: Updated DataFrame with adjusted indices.
    """

-    df["episode_index"] = df["episode_index"] + dst_meta.info.total_episodes
-    df["index"] = df["index"] + dst_meta.info.total_frames
+    df["episode_index"] = df["episode_index"] + dst_meta.info["total_episodes"]
+    df["index"] = df["index"] + dst_meta.info["total_frames"]

    src_task_names = src_meta.tasks.index.take(df["task_index"].to_numpy())
    df["task_index"] = dst_meta.tasks.loc[src_task_names, "task_index"].to_numpy()
@@ -225,9 +225,9 @@ def update_meta_data(
        # Clean up temporary columns
        df = df.drop(columns=["_orig_chunk", "_orig_file"])

-    df["dataset_from_index"] = df["dataset_from_index"] + dst_meta.info.total_frames
-    df["dataset_to_index"] = df["dataset_to_index"] + dst_meta.info.total_frames
-    df["episode_index"] = df["episode_index"] + dst_meta.info.total_episodes
+    df["dataset_from_index"] = df["dataset_from_index"] + dst_meta.info["total_frames"]
+    df["dataset_to_index"] = df["dataset_to_index"] + dst_meta.info["total_frames"]
+    df["episode_index"] = df["episode_index"] + dst_meta.info["total_episodes"]

    return df

@@ -237,8 +237,8 @@ def aggregate_datasets(
    aggr_repo_id: str,
    roots: list[Path] | None = None,
    aggr_root: Path | None = None,
-    data_files_size_in_mb: int | None = None,
-    video_files_size_in_mb: int | None = None,
+    data_files_size_in_mb: float | None = None,
+    video_files_size_in_mb: float | None = None,
    chunk_size: int | None = None,
 ):
    """Aggregates multiple LeRobot datasets into a single unified dataset.
@@ -313,8 +313,8 @@ def aggregate_datasets(
        # to avoid interference between different source datasets
        data_idx.pop("src_to_dst", None)

-        dst_meta.info.total_episodes += src_meta.total_episodes
-        dst_meta.info.total_frames += src_meta.total_frames
+        dst_meta.info["total_episodes"] += src_meta.total_episodes
+        dst_meta.info["total_frames"] += src_meta.total_frames

    finalize_aggregation(dst_meta, all_metadata)
    logging.info("Aggregation complete.")
@@ -640,10 +640,14 @@ def finalize_aggregation(aggr_meta, all_metadata):
    write_tasks(aggr_meta.tasks, aggr_meta.root)

    logging.info("write info")
-    aggr_meta.info.total_tasks = len(aggr_meta.tasks)
-    aggr_meta.info.total_episodes = sum(m.total_episodes for m in all_metadata)
-    aggr_meta.info.total_frames = sum(m.total_frames for m in all_metadata)
-    aggr_meta.info.splits = {"train": f"0:{sum(m.total_episodes for m in all_metadata)}"}
+    aggr_meta.info.update(
+        {
+            "total_tasks": len(aggr_meta.tasks),
+            "total_episodes": sum(m.total_episodes for m in all_metadata),
+            "total_frames": sum(m.total_frames for m in all_metadata),
+            "splits": {"train": f"0:{sum(m.total_episodes for m in all_metadata)}"},
+        }
+    )
    write_info(aggr_meta.info, aggr_meta.root)

    logging.info("write stats")
@@ -37,11 +37,13 @@ from .io_utils import (
    load_subtasks,
    load_tasks,
    write_info,
+    write_json,
    write_stats,
    write_tasks,
 )
 from .utils import (
    DEFAULT_EPISODES_PATH,
+    INFO_PATH,
    check_version_compatibility,
    get_safe_version,
    has_legacy_hub_download_metadata,
@@ -226,7 +228,7 @@ class LeRobotDatasetMetadata:
    @property
    def _version(self) -> packaging.version.Version:
        """Codebase version used to create this dataset."""
-        return packaging.version.parse(self.info.codebase_version)
+        return packaging.version.parse(self.info["codebase_version"])

    def get_data_file_path(self, ep_index: int) -> Path:
        """Return the relative parquet file path for the given episode index.
@@ -281,27 +283,27 @@ class LeRobotDatasetMetadata:
    @property
    def data_path(self) -> str:
        """Formattable string for the parquet files."""
-        return self.info.data_path
+        return self.info["data_path"]

    @property
    def video_path(self) -> str | None:
        """Formattable string for the video files."""
-        return self.info.video_path
+        return self.info["video_path"]

    @property
    def robot_type(self) -> str | None:
        """Robot type used in recording this dataset."""
-        return self.info.robot_type
+        return self.info["robot_type"]

    @property
    def fps(self) -> int:
        """Frames per second used during data collection."""
-        return self.info.fps
+        return self.info["fps"]

    @property
    def features(self) -> dict[str, dict]:
        """All features contained in the dataset."""
-        return self.info.features
+        return self.info["features"]

    @property
    def image_keys(self) -> list[str]:
@@ -331,32 +333,32 @@ class LeRobotDatasetMetadata:
    @property
    def total_episodes(self) -> int:
        """Total number of episodes available."""
-        return self.info.total_episodes
+        return self.info["total_episodes"]

    @property
    def total_frames(self) -> int:
        """Total number of frames saved in this dataset."""
-        return self.info.total_frames
+        return self.info["total_frames"]

    @property
    def total_tasks(self) -> int:
        """Total number of different tasks performed in this dataset."""
-        return self.info.total_tasks
+        return self.info["total_tasks"]

    @property
    def chunks_size(self) -> int:
        """Max number of files per chunk."""
-        return self.info.chunks_size
+        return self.info["chunks_size"]

    @property
    def data_files_size_in_mb(self) -> int:
        """Max size of data file in mega bytes."""
-        return self.info.data_files_size_in_mb
+        return self.info["data_files_size_in_mb"]

    @property
    def video_files_size_in_mb(self) -> int:
        """Max size of video file in mega bytes."""
-        return self.info.video_files_size_in_mb
+        return self.info["video_files_size_in_mb"]

    def get_task_index(self, task: str) -> int | None:
        """
@@ -500,10 +502,10 @@ class LeRobotDatasetMetadata:
        self._save_episode_metadata(episode_dict)

        # Update info
-        self.info.total_episodes += 1
-        self.info.total_frames += episode_length
-        self.info.total_tasks = len(self.tasks)
-        self.info.splits = {"train": f"0:{self.info.total_episodes}"}
+        self.info["total_episodes"] += 1
+        self.info["total_frames"] += episode_length
+        self.info["total_tasks"] = len(self.tasks)
+        self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"}

        write_info(self.info, self.root)

@@ -522,7 +524,7 @@ class LeRobotDatasetMetadata:
        for key in video_keys:
            if not self.features[key].get("info", None):
                video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0)
-                self.info.features[key]["info"] = get_video_info(video_path)
+                self.info["features"][key]["info"] = get_video_info(video_path)

    def update_chunk_settings(
        self,
@@ -544,17 +546,17 @@ class LeRobotDatasetMetadata:
        if chunks_size is not None:
            if chunks_size <= 0:
                raise ValueError(f"chunks_size must be positive, got {chunks_size}")
-            self.info.chunks_size = chunks_size
+            self.info["chunks_size"] = chunks_size

        if data_files_size_in_mb is not None:
            if data_files_size_in_mb <= 0:
                raise ValueError(f"data_files_size_in_mb must be positive, got {data_files_size_in_mb}")
-            self.info.data_files_size_in_mb = data_files_size_in_mb
+            self.info["data_files_size_in_mb"] = data_files_size_in_mb

        if video_files_size_in_mb is not None:
            if video_files_size_in_mb <= 0:
                raise ValueError(f"video_files_size_in_mb must be positive, got {video_files_size_in_mb}")
-            self.info.video_files_size_in_mb = video_files_size_in_mb
+            self.info["video_files_size_in_mb"] = video_files_size_in_mb

        # Update the info file on disk
        write_info(self.info, self.root)
@@ -651,7 +653,7 @@ class LeRobotDatasetMetadata:
                f"Features contain video keys {obj.video_keys}, but 'use_videos' is set to False. "
                "Either remove video features from the features dict, or set 'use_videos=True'."
            )
-        write_info(obj.info, obj.root)
+        write_json(obj.info, obj.root / INFO_PATH)
        obj.revision = None
        obj._pq_writer = None
        obj.latest_episode = None
@@ -897,10 +897,14 @@ def _copy_and_reindex_episodes_metadata(

    dst_meta.finalize()

-    dst_meta.info.total_episodes = len(episode_mapping)
-    dst_meta.info.total_frames = total_frames
-    dst_meta.info.total_tasks = len(dst_meta.tasks) if dst_meta.tasks is not None else 0
-    dst_meta.info.splits = {"train": f"0:{len(episode_mapping)}"}
+    dst_meta.info.update(
+        {
+            "total_episodes": len(episode_mapping),
+            "total_frames": total_frames,
+            "total_tasks": len(dst_meta.tasks) if dst_meta.tasks is not None else 0,
+            "splits": {"train": f"0:{len(episode_mapping)}"},
+        }
+    )
    write_info(dst_meta.info, dst_meta.root)

    if not all_stats:
@@ -1065,20 +1069,21 @@ def _copy_episodes_metadata_and_stats(
    if episodes_dir.exists():
        shutil.copytree(episodes_dir, dst_episodes_dir, dirs_exist_ok=True)

-    dst_meta.info.total_episodes = src_dataset.meta.total_episodes
-    dst_meta.info.total_frames = src_dataset.meta.total_frames
-    dst_meta.info.total_tasks = src_dataset.meta.total_tasks
-    # Preserve original splits if available, otherwise create default
-    dst_meta.info.splits = (
-        src_dataset.meta.info.splits
-        if src_dataset.meta.info.splits
-        else {"train": f"0:{src_dataset.meta.total_episodes}"}
+    dst_meta.info.update(
+        {
+            "total_episodes": src_dataset.meta.total_episodes,
+            "total_frames": src_dataset.meta.total_frames,
+            "total_tasks": src_dataset.meta.total_tasks,
+            "splits": src_dataset.meta.info.get("splits", {"train": f"0:{src_dataset.meta.total_episodes}"}),
+        }
    )

    if dst_meta.video_keys and src_dataset.meta.video_keys:
        for key in dst_meta.video_keys:
            if key in src_dataset.meta.features:
-                dst_meta.info.features[key]["info"] = src_dataset.meta.info.features[key].get("info", {})
+                dst_meta.info["features"][key]["info"] = src_dataset.meta.info["features"][key].get(
+                    "info", {}
+                )

    write_info(dst_meta.info, dst_meta.root)

@@ -1520,7 +1525,7 @@ def modify_tasks(
    write_tasks(new_task_df, root)

    # Update info.json
-    dataset.meta.info.total_tasks = len(unique_tasks)
+    dataset.meta.info["total_tasks"] = len(unique_tasks)
    write_info(dataset.meta.info, root)

    # Reload metadata to reflect changes
@@ -1853,10 +1858,10 @@ def convert_image_to_video_dataset(
        episodes_df.to_parquet(episodes_path, index=False)

        # Update metadata info
-        new_meta.info.total_episodes = len(episode_indices)
-        new_meta.info.total_frames = sum(ep["length"] for ep in all_episode_metadata.values())
-        new_meta.info.total_tasks = dataset.meta.total_tasks
-        new_meta.info.splits = {"train": f"0:{len(episode_indices)}"}
+        new_meta.info["total_episodes"] = len(episode_indices)
+        new_meta.info["total_frames"] = sum(ep["length"] for ep in all_episode_metadata.values())
+        new_meta.info["total_tasks"] = dataset.meta.total_tasks
+        new_meta.info["splits"] = {"train": f"0:{len(episode_indices)}"}

        # Update video info for all image keys (now videos)
        # We need to manually set video info since update_video_info() checks video_keys first
@@ -1865,7 +1870,7 @@ def convert_image_to_video_dataset(
                video_path = new_meta.root / new_meta.video_path.format(
                    video_key=img_key, chunk_index=0, file_index=0
                )
-                new_meta.info.features[img_key]["info"] = get_video_info(video_path)
+                new_meta.info["features"][img_key]["info"] = get_video_info(video_path)

        write_info(new_meta.info, new_meta.root)

@@ -19,7 +19,6 @@ from pprint import pformat
 import torch

 from lerobot.configs import PreTrainedConfig
-from lerobot.configs.rewards import RewardModelConfig
 from lerobot.configs.train import TrainPipelineConfig
 from lerobot.transforms import ImageTransforms
 from lerobot.utils.constants import ACTION, IMAGENET_STATS, OBS_PREFIX, REWARD
@@ -31,14 +30,12 @@ from .streaming_dataset import StreamingLeRobotDataset


 def resolve_delta_timestamps(
-    cfg: PreTrainedConfig | RewardModelConfig, ds_meta: LeRobotDatasetMetadata
+    cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
 ) -> dict[str, list] | None:
-    """Resolves delta_timestamps by reading from the 'delta_indices' properties of the config.
+    """Resolves delta_timestamps by reading from the 'delta_indices' properties of the PreTrainedConfig.

    Args:
-        cfg (PreTrainedConfig | RewardModelConfig): The config to read delta_indices from. Both
-            ``PreTrainedConfig`` and concrete ``RewardModelConfig`` subclasses expose the
-            ``{observation,action,reward}_delta_indices`` properties used below.
+        cfg (PreTrainedConfig): The PreTrainedConfig to read delta_indices from.
        ds_meta (LeRobotDatasetMetadata): The dataset from which features and fps are used to build
            delta_timestamps against.

@@ -85,7 +82,7 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas
        ds_meta = LeRobotDatasetMetadata(
            cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
        )
-        delta_timestamps = resolve_delta_timestamps(cfg.trainable_config, ds_meta)
+        delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
        if not cfg.dataset.streaming:
            dataset = LeRobotDataset(
                cfg.dataset.repo_id,
@@ -28,7 +28,6 @@ from .utils import (
    DEFAULT_DATA_PATH,
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
-    DatasetInfo,
 )


@@ -79,8 +78,8 @@ def create_empty_dataset_info(
    chunks_size: int | None = None,
    data_files_size_in_mb: int | None = None,
    video_files_size_in_mb: int | None = None,
-) -> DatasetInfo:
-    """Create a template ``DatasetInfo`` object for a new dataset's ``meta/info.json``.
+) -> dict:
+    """Create a template dictionary for a new dataset's `info.json`.

    Args:
        codebase_version (str): The version of the LeRobot codebase.
@@ -88,24 +87,25 @@ def create_empty_dataset_info(
        features (dict): The LeRobot features dictionary for the dataset.
        use_videos (bool): Whether the dataset will store videos.
        robot_type (str | None): The type of robot used, if any.
-        chunks_size (int | None): Max files per chunk directory. Defaults to ``DEFAULT_CHUNK_SIZE``.
-        data_files_size_in_mb (int | None): Max parquet file size in MB. Defaults to ``DEFAULT_DATA_FILE_SIZE_IN_MB``.
-        video_files_size_in_mb (int | None): Max video file size in MB. Defaults to ``DEFAULT_VIDEO_FILE_SIZE_IN_MB``.

    Returns:
-        DatasetInfo: A typed dataset information object with initial metadata.
+        dict: A dictionary with the initial dataset metadata.
    """
-    return DatasetInfo(
-        codebase_version=codebase_version,
-        fps=fps,
-        features=features,
-        robot_type=robot_type,
-        chunks_size=chunks_size or DEFAULT_CHUNK_SIZE,
-        data_files_size_in_mb=data_files_size_in_mb or DEFAULT_DATA_FILE_SIZE_IN_MB,
-        video_files_size_in_mb=video_files_size_in_mb or DEFAULT_VIDEO_FILE_SIZE_IN_MB,
-        data_path=DEFAULT_DATA_PATH,
-        video_path=DEFAULT_VIDEO_PATH if use_videos else None,
-    )
+    return {
+        "codebase_version": codebase_version,
+        "robot_type": robot_type,
+        "total_episodes": 0,
+        "total_frames": 0,
+        "total_tasks": 0,
+        "chunks_size": chunks_size or DEFAULT_CHUNK_SIZE,
+        "data_files_size_in_mb": data_files_size_in_mb or DEFAULT_DATA_FILE_SIZE_IN_MB,
+        "video_files_size_in_mb": video_files_size_in_mb or DEFAULT_VIDEO_FILE_SIZE_IN_MB,
+        "fps": fps,
+        "splits": {},
+        "data_path": DEFAULT_DATA_PATH,
+        "video_path": DEFAULT_VIDEO_PATH if use_videos else None,
+        "features": features,
+    }


 def check_delta_timestamps(
@@ -39,7 +39,6 @@ from .utils import (
    EPISODES_DIR,
    INFO_PATH,
    STATS_PATH,
-    DatasetInfo,
    serialize_dict,
 )

@@ -116,21 +115,25 @@ def embed_images(dataset: datasets.Dataset) -> datasets.Dataset:
    return dataset


-def write_info(info: DatasetInfo, local_dir: Path) -> None:
-    write_json(info.to_dict(), local_dir / INFO_PATH)
+def write_info(info: dict, local_dir: Path) -> None:
+    write_json(info, local_dir / INFO_PATH)


-def load_info(local_dir: Path) -> DatasetInfo:
+def load_info(local_dir: Path) -> dict:
    """Load dataset info metadata from its standard file path.

+    Also converts shape lists to tuples for consistency.
+
    Args:
        local_dir (Path): The root directory of the dataset.

    Returns:
-        DatasetInfo: The typed dataset information object.
+        dict: The dataset information dictionary.
    """
-    raw = load_json(local_dir / INFO_PATH)
-    return DatasetInfo.from_dict(raw)
+    info = load_json(local_dir / INFO_PATH)
+    for ft in info["features"].values():
+        ft["shape"] = tuple(ft["shape"])
+    return info


 def write_stats(stats: dict, local_dir: Path) -> None:
@@ -630,8 +630,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
        streaming_encoding: bool = False,
        encoder_queue_maxsize: int = 30,
        encoder_threads: int | None = None,
-        video_files_size_in_mb: int | None = None,
-        data_files_size_in_mb: int | None = None,
    ) -> "LeRobotDataset":
        """Create a new LeRobotDataset from scratch for recording data.

@@ -679,8 +677,6 @@ class LeRobotDataset(torch.utils.data.Dataset):
            root=root,
            use_videos=use_videos,
            metadata_buffer_size=metadata_buffer_size,
-            video_files_size_in_mb=video_files_size_in_mb,
-            data_files_size_in_mb=data_files_size_in_mb,
        )
        obj.repo_id = obj.meta.repo_id
        obj._requested_root = obj.meta.root
@@ -123,7 +123,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):

        NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
        """
-        return self._datasets[0].meta.info.fps
+        return self._datasets[0].meta.info["fps"]

    @property
    def video(self) -> bool:
@@ -133,7 +133,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset):

        NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
        """
-        return len(self._datasets[0].meta.video_keys) > 0
+        return self._datasets[0].meta.info.get("video", False)

    @property
    def features(self) -> datasets.Features:
@@ -434,7 +434,7 @@ class StreamingLeRobotDataset(torch.utils.data.IterableDataset):

    def _make_padding_camera_frame(self, camera_key: str):
        """Variable-shape padding frame for given camera keys, given in (H, W, C)"""
-        return torch.zeros(self.meta.info.features[camera_key]["shape"]).permute(-1, 0, 1)
+        return torch.zeros(self.meta.info["features"][camera_key]["shape"]).permute(-1, 0, 1)

    def _get_video_frame_padding_mask(
        self,
@@ -14,11 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
-import dataclasses
 import importlib.resources
 import json
 import logging
-from dataclasses import dataclass, field
 from pathlib import Path

 import datasets
@@ -72,12 +70,9 @@ class ForwardCompatibilityError(CompatibilityError):
        super().__init__(message)


-logger = logging.getLogger(__name__)
-
-
 DEFAULT_CHUNK_SIZE = 1000  # Max number of files per chunk
-DEFAULT_DATA_FILE_SIZE_IN_MB = 100  # Max size per file
-DEFAULT_VIDEO_FILE_SIZE_IN_MB = 200  # Max size per file
+DEFAULT_DATA_FILE_SIZE_IN_MB = 50  # Max size per file
+DEFAULT_VIDEO_FILE_SIZE_IN_MB = 100  # Max size per file

 INFO_PATH = "meta/info.json"
 STATS_PATH = "meta/stats.json"
@@ -99,123 +94,6 @@ LEGACY_EPISODES_STATS_PATH = "meta/episodes_stats.jsonl"
 LEGACY_TASKS_PATH = "meta/tasks.jsonl"


-@dataclass
-class DatasetInfo:
-    """Typed representation of the ``meta/info.json`` file for a LeRobot dataset.
-
-    Replaces the previously untyped ``dict`` returned by ``load_info()`` and
-    created by ``create_empty_dataset_info()``.  Using a dataclass provides
-    explicit field definitions, IDE auto-completion, and validation at
-    construction time.
-    """
-
-    codebase_version: str
-    fps: int
-    features: dict[str, dict]
-
-    # Episode / frame counters — start at zero for new datasets
-    total_episodes: int = 0
-    total_frames: int = 0
-    total_tasks: int = 0
-
-    # Storage settings
-    chunks_size: int = field(default=DEFAULT_CHUNK_SIZE)
-    data_files_size_in_mb: int = field(default=DEFAULT_DATA_FILE_SIZE_IN_MB)
-    video_files_size_in_mb: int = field(default=DEFAULT_VIDEO_FILE_SIZE_IN_MB)
-
-    # File path templates
-    data_path: str = field(default=DEFAULT_DATA_PATH)
-    video_path: str | None = field(default=DEFAULT_VIDEO_PATH)
-
-    # Optional metadata
-    robot_type: str | None = None
-    splits: dict[str, str] = field(default_factory=dict)
-
-    def __post_init__(self) -> None:
-        # Coerce feature shapes from list to tuple — JSON deserialisation
-        # returns lists, but the rest of the codebase expects tuples.
-        for ft in self.features.values():
-            if isinstance(ft.get("shape"), list):
-                ft["shape"] = tuple(ft["shape"])
-
-        if self.fps <= 0:
-            raise ValueError(f"fps must be positive, got {self.fps}")
-        if self.chunks_size <= 0:
-            raise ValueError(f"chunks_size must be positive, got {self.chunks_size}")
-        if self.data_files_size_in_mb <= 0:
-            raise ValueError(f"data_files_size_in_mb must be positive, got {self.data_files_size_in_mb}")
-        if self.video_files_size_in_mb <= 0:
-            raise ValueError(f"video_files_size_in_mb must be positive, got {self.video_files_size_in_mb}")
-
-    def to_dict(self) -> dict:
-        """Return a JSON-serialisable dict.
-
-        Converts tuple shapes back to lists so ``json.dump`` can handle them.
-        """
-        d = dataclasses.asdict(self)
-        for ft in d["features"].values():
-            if isinstance(ft.get("shape"), tuple):
-                ft["shape"] = list(ft["shape"])
-        return d
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "DatasetInfo":
-        """Construct from a raw dict (e.g. loaded directly from JSON).
-
-        Unknown keys are ignored for forward compatibility with datasets that
-        carry additional fields (e.g. ``total_videos`` from v2.x). A warning is
-        logged when such fields are present.
-        """
-        known = {f.name for f in dataclasses.fields(cls)}
-        unknown = sorted(k for k in data if k not in known)
-        if unknown:
-            logger.warning(f"Unknown fields in DatasetInfo: {unknown}. These will be ignored.")
-        return cls(**{k: v for k, v in data.items() if k in known})
-
-    # ---------------------------------------------------------------------------
-    # Temporary dict-style compatibility layer
-    # Allows existing ``info["key"]`` call-sites to keep working without changes.
-    # Once all callers have been migrated to attribute access, remove these.
-    # ---------------------------------------------------------------------------
-    def __getitem__(self, key: str):
-        import warnings
-
-        warnings.warn(
-            f"Accessing DatasetInfo with dict-style syntax info['{key}'] is deprecated. "
-            f"Use attribute access info.{key} instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        try:
-            return getattr(self, key)
-        except AttributeError as err:
-            raise KeyError(key) from err
-
-    def __setitem__(self, key: str, value) -> None:
-        import warnings
-
-        warnings.warn(
-            f"Setting DatasetInfo with dict-style syntax info['{key}'] = ... is deprecated. "
-            f"Use attribute assignment info.{key} = ... instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        if not hasattr(self, key):
-            raise KeyError(f"DatasetInfo has no field '{key}'")
-        setattr(self, key, value)
-
-    def __contains__(self, key: str) -> bool:
-        """Check if a field exists (dict-like interface)."""
-        return hasattr(self, key)
-
-    def get(self, key: str, default=None):
-        """Get attribute value with default fallback (dict-like interface)."""
-        try:
-            return getattr(self, key)
-        except AttributeError:
-            return default
-
-
 def has_legacy_hub_download_metadata(root: Path) -> bool:
    """Return ``True`` when *root* looks like a legacy Hub ``local_dir`` mirror.

@@ -416,7 +294,7 @@ def create_branch(repo_id: str, *, branch: str, repo_type: str | None = None) ->

 def create_lerobot_dataset_card(
    tags: list | None = None,
-    dataset_info: DatasetInfo | None = None,
+    dataset_info: dict | None = None,
    **kwargs,
 ) -> DatasetCard:
    """Create a `DatasetCard` for a LeRobot dataset.
@@ -427,7 +305,7 @@ def create_lerobot_dataset_card(

    Args:
        tags (list | None): A list of tags to add to the dataset card.
-        dataset_info (DatasetInfo | None): The dataset's info object, which will
+        dataset_info (dict | None): The dataset's info dictionary, which will
            be displayed on the card.
        **kwargs: Additional keyword arguments to populate the card template.

@@ -440,7 +318,7 @@ def create_lerobot_dataset_card(
        card_tags += tags
    if dataset_info:
        dataset_structure = "[meta/info.json](meta/info.json):\n"
-        dataset_structure += f"```json\n{json.dumps(dataset_info.to_dict(), indent=4)}\n```\n"
+        dataset_structure += f"```json\n{json.dumps(dataset_info, indent=4)}\n```\n"
        kwargs = {**kwargs, "dataset_structure": dataset_structure}
    card_data = DatasetCardData(
        license=kwargs.get("license"),
@@ -331,7 +331,6 @@ class LiberoEnv(EnvConfig):
    camera_name_mapping: dict[str, str] | None = None
    observation_height: int = 360
    observation_width: int = 360
-    is_libero_plus: bool = False
    features: dict[str, PolicyFeature] = field(
        default_factory=lambda: {
            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(7,)),
@@ -433,7 +432,6 @@ class LiberoEnv(EnvConfig):
            control_mode=self.control_mode,
            episode_length=self.episode_length,
            camera_name_mapping=self.camera_name_mapping,
-            is_libero_plus=self.is_libero_plus,
        )

    def get_env_processors(self):
@@ -498,146 +496,6 @@ class MetaworldEnv(EnvConfig):
        )


-@EnvConfig.register_subclass("robocasa")
-@dataclass
-class RoboCasaEnv(EnvConfig):
-    task: str = "CloseFridge"
-    fps: int = 20
-    episode_length: int = 1000
-    obs_type: str = "pixels_agent_pos"
-    render_mode: str = "rgb_array"
-    camera_name: str = "robot0_agentview_left,robot0_eye_in_hand,robot0_agentview_right"
-    observation_height: int = 256
-    observation_width: int = 256
-    visualization_height: int = 512
-    visualization_width: int = 512
-    split: str | None = None
-    # Object-mesh registries to sample from. Upstream default is
-    # ("objaverse", "lightwheel"), but objaverse is ~30GB and the CI image
-    # only ships the lightwheel pack. Override to include objaverse once
-    # you've run `python -m robocasa.scripts.download_kitchen_assets
-    # --type objaverse` locally.
-    obj_registries: list[str] = field(default_factory=lambda: ["lightwheel"])
-    features: dict[str, PolicyFeature] = field(
-        default_factory=lambda: {ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(12,))}
-    )
-    features_map: dict[str, str] = field(default_factory=lambda: {ACTION: ACTION, "agent_pos": OBS_STATE})
-
-    def __post_init__(self):
-        if self.obs_type not in ("pixels", "pixels_agent_pos"):
-            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
-
-        # Preserve raw RoboCasa camera names end-to-end (e.g.
-        # `observation.images.robot0_agentview_left`). This matches the
-        # naming convention used by the RoboCasa datasets on the Hub, so
-        # trained policies don't need a `--rename_map` at eval time.
-        cams = [c.strip() for c in self.camera_name.split(",") if c.strip()]
-        for cam in cams:
-            self.features[f"pixels/{cam}"] = PolicyFeature(
-                type=FeatureType.VISUAL,
-                shape=(self.observation_height, self.observation_width, 3),
-            )
-            self.features_map[f"pixels/{cam}"] = f"{OBS_IMAGES}.{cam}"
-
-        if self.obs_type == "pixels_agent_pos":
-            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(16,))
-
-    @property
-    def gym_kwargs(self) -> dict:
-        kwargs: dict[str, Any] = {
-            "obs_type": self.obs_type,
-            "render_mode": self.render_mode,
-            "observation_height": self.observation_height,
-            "observation_width": self.observation_width,
-            "visualization_height": self.visualization_height,
-            "visualization_width": self.visualization_width,
-        }
-        if self.split is not None:
-            kwargs["split"] = self.split
-        return kwargs
-
-    def create_envs(self, n_envs: int, use_async_envs: bool = False):
-        from .robocasa import create_robocasa_envs
-
-        if self.task is None:
-            raise ValueError("RoboCasaEnv requires a task to be specified")
-        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
-        return create_robocasa_envs(
-            task=self.task,
-            n_envs=n_envs,
-            camera_name=self.camera_name,
-            gym_kwargs=self.gym_kwargs,
-            env_cls=env_cls,
-            episode_length=self.episode_length,
-            obj_registries=tuple(self.obj_registries),
-        )
-
-
-@EnvConfig.register_subclass("vlabench")
-@dataclass
-class VLABenchEnv(EnvConfig):
-    task: str = "select_fruit"
-    fps: int = 10
-    episode_length: int = 500
-    obs_type: str = "pixels_agent_pos"
-    render_mode: str = "rgb_array"
-    render_resolution: tuple[int, int] = (480, 480)
-    robot: str = "franka"
-    action_mode: str = "eef"
-    features: dict[str, PolicyFeature] = field(
-        default_factory=lambda: {
-            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(7,)),
-        }
-    )
-    features_map: dict[str, str] = field(
-        default_factory=lambda: {
-            ACTION: ACTION,
-            "agent_pos": OBS_STATE,
-            "pixels/image": f"{OBS_IMAGES}.image",
-            "pixels/second_image": f"{OBS_IMAGES}.second_image",
-            "pixels/wrist_image": f"{OBS_IMAGES}.wrist_image",
-        }
-    )
-
-    def __post_init__(self):
-        h, w = self.render_resolution
-        if self.obs_type == "pixels":
-            self.features["pixels/image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-            self.features["pixels/second_image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-            self.features["pixels/wrist_image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-        elif self.obs_type == "pixels_agent_pos":
-            self.features["pixels/image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-            self.features["pixels/second_image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-            self.features["pixels/wrist_image"] = PolicyFeature(type=FeatureType.VISUAL, shape=(h, w, 3))
-            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(7,))
-        else:
-            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
-
-    @property
-    def gym_kwargs(self) -> dict:
-        return {
-            "obs_type": self.obs_type,
-            "render_mode": self.render_mode,
-            "render_resolution": self.render_resolution,
-            "robot": self.robot,
-            "max_episode_steps": self.episode_length,
-            "action_mode": self.action_mode,
-        }
-
-    def create_envs(self, n_envs: int, use_async_envs: bool = False):
-        from .vlabench import create_vlabench_envs
-
-        if self.task is None:
-            raise ValueError("VLABenchEnv requires a task to be specified")
-        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
-        return create_vlabench_envs(
-            task=self.task,
-            n_envs=n_envs,
-            gym_kwargs=self.gym_kwargs,
-            env_cls=env_cls,
-        )
-
-
@EnvConfig.register_subclass("isaaclab_arena")
@dataclass
 class IsaaclabArenaEnv(HubEnvConfig):
@@ -716,171 +574,3 @@ class IsaaclabArenaEnv(HubEnvConfig):
            ),
            PolicyProcessorPipeline(steps=[]),
        )
-
-
-@EnvConfig.register_subclass("libero_plus")
-@dataclass
-class LiberoPlusEnv(LiberoEnv):
-    """Config for LIBERO-plus robustness benchmark evaluation.
-
-    LIBERO-plus extends LIBERO with 7 perturbation dimensions (camera viewpoints,
-    object layouts, robot initial states, language instructions, lighting, background
-    textures, sensor noise) producing ~10k task variants.
-
-    The gym interface is identical to LIBERO so this class reuses ``LiberoEnv``
-    entirely — only the registered name and default task suite differ.
-
-    Install: see docker/Dockerfile.benchmark.libero_plus — LIBERO-plus ships
-    as a namespace package from a git fork and must be cloned + PYTHONPATH'd
-    rather than installed as a pyproject extra.
-
-    See Also:
-        https://github.com/sylvestf/LIBERO-plus
-    """
-
-    task: str = "libero_spatial"
-    is_libero_plus: bool = True
-
-
-@EnvConfig.register_subclass("robotwin")
-@dataclass
-class RoboTwinEnvConfig(EnvConfig):
-    """Configuration for RoboTwin 2.0 benchmark environments.
-
-    RoboTwin 2.0 is a dual-arm manipulation benchmark with 50 tasks built on the
-    SAPIEN simulator. The robot is an Aloha-AgileX bimanual platform with 14 DOF
-    (7 per arm). All three cameras are enabled by default.
-
-    See: https://robotwin-platform.github.io
-    Dataset: https://huggingface.co/datasets/lerobot/robotwin_unified
-    """
-
-    task: str = "beat_block_hammer"  # single task or comma-separated list
-    fps: int = 25
-    episode_length: int = 300
-    obs_type: str = "pixels_agent_pos"
-    render_mode: str = "rgb_array"
-    # Available cameras from RoboTwin's aloha-agilex embodiment: head_camera
-    # (torso-mounted) + left_camera / right_camera (wrists).
-    camera_names: str = "head_camera,left_camera,right_camera"
-    # Match the D435 dims in task_config/demo_clean.yml (_camera_config.yml).
-    # Gym's vector-env concatenate pre-allocates buffers of this shape, so it
-    # must equal what SAPIEN actually renders.
-    observation_height: int = 240
-    observation_width: int = 320
-    features: dict[str, PolicyFeature] = field(
-        default_factory=lambda: {
-            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(14,)),
-        }
-    )
-    features_map: dict[str, str] = field(
-        default_factory=lambda: {
-            ACTION: ACTION,
-            "pixels/head_camera": f"{OBS_IMAGES}.head_camera",
-            "pixels/left_camera": f"{OBS_IMAGES}.left_camera",
-            "pixels/right_camera": f"{OBS_IMAGES}.right_camera",
-            "agent_pos": OBS_STATE,
-        }
-    )
-
-    def __post_init__(self):
-        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
-        for cam in cam_list:
-            self.features[f"pixels/{cam}"] = PolicyFeature(
-                type=FeatureType.VISUAL,
-                shape=(self.observation_height, self.observation_width, 3),
-            )
-            # Keep features_map entry if already set (default_factory); add if missing.
-            key = f"pixels/{cam}"
-            if key not in self.features_map:
-                self.features_map[key] = f"{OBS_IMAGES}.{cam}"
-
-        if self.obs_type == "pixels_agent_pos":
-            self.features["agent_pos"] = PolicyFeature(
-                type=FeatureType.STATE,
-                shape=(14,),  # 14 DOF: 7 per arm
-            )
-        elif self.obs_type != "pixels":
-            raise ValueError(
-                f"Unsupported obs_type '{self.obs_type}'. "
-                "RoboTwinEnvConfig supports 'pixels' and 'pixels_agent_pos'."
-            )
-
-    @property
-    def gym_kwargs(self) -> dict:
-        return {}
-
-    def create_envs(self, n_envs: int, use_async_envs: bool = True):
-        from lerobot.envs.robotwin import create_robotwin_envs
-
-        if not self.task:
-            raise ValueError("RoboTwinEnvConfig requires `task` to be specified.")
-
-        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
-        cam_list = [c.strip() for c in self.camera_names.split(",") if c.strip()]
-        return create_robotwin_envs(
-            task=self.task,
-            n_envs=n_envs,
-            env_cls=env_cls,
-            camera_names=cam_list,
-            observation_height=self.observation_height,
-            observation_width=self.observation_width,
-            episode_length=self.episode_length,
-        )
-
-
-@EnvConfig.register_subclass("robomme")
-@dataclass
-class RoboMMEEnv(EnvConfig):
-    """RoboMME memory-augmented manipulation benchmark (ManiSkill/SAPIEN).
-
-    16 tasks across 4 suites: Counting, Permanence, Reference, Imitation.
-    Dataset: lerobot/robomme (LeRobot v3.0, 1,600 episodes).
-    Benchmark: https://github.com/RoboMME/robomme_benchmark
-
-    Requires the `robomme` git package installed separately (Linux only);
-    see docker/Dockerfile.benchmark.robomme for the canonical install.
-    """
-
-    task: str = "PickXtimes"
-    fps: int = 10
-    episode_length: int = 300
-    action_space: str = "joint_angle"  # or "ee_pose" (7-D)
-    dataset_split: str = "test"  # "train" | "val" | "test"
-    task_ids: list[int] | None = None
-    features: dict[str, PolicyFeature] = field(default_factory=dict)
-    features_map: dict[str, str] = field(
-        default_factory=lambda: {
-            ACTION: ACTION,
-            "pixels/image": f"{OBS_IMAGES}.image",
-            "pixels/wrist_image": f"{OBS_IMAGES}.wrist_image",
-            "agent_pos": OBS_STATE,
-        }
-    )
-
-    def __post_init__(self):
-        action_dim = 8 if self.action_space == "joint_angle" else 7
-        self.features = {
-            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(action_dim,)),
-            "pixels/image": PolicyFeature(type=FeatureType.VISUAL, shape=(256, 256, 3)),
-            "pixels/wrist_image": PolicyFeature(type=FeatureType.VISUAL, shape=(256, 256, 3)),
-            "agent_pos": PolicyFeature(type=FeatureType.STATE, shape=(8,)),
-        }
-
-    @property
-    def gym_kwargs(self) -> dict:
-        return {}
-
-    def create_envs(self, n_envs: int, use_async_envs: bool = True):
-        from lerobot.envs.robomme import create_robomme_envs
-
-        env_cls = _make_vec_env_cls(use_async_envs, n_envs)
-        return create_robomme_envs(
-            task=self.task,
-            n_envs=n_envs,
-            action_space_type=self.action_space,
-            dataset=self.dataset_split,
-            episode_length=self.episode_length,
-            task_ids=self.task_ids,
-            env_cls=env_cls,
-        )
@@ -16,7 +16,6 @@
 from __future__ import annotations

 import os
-import re
 from collections import defaultdict
 from collections.abc import Callable, Iterable, Mapping, Sequence
 from functools import partial
@@ -32,7 +31,20 @@ from libero.libero.envs import OffScreenRenderEnv

 from lerobot.types import RobotObservation

-from .utils import _LazyAsyncVectorEnv, parse_camera_names
+from .utils import _LazyAsyncVectorEnv
+
+
+def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
+    """Normalize camera_name into a non-empty list of strings."""
+    if isinstance(camera_name, str):
+        cams = [c.strip() for c in camera_name.split(",") if c.strip()]
+    elif isinstance(camera_name, (list | tuple)):
+        cams = [str(c).strip() for c in camera_name if str(c).strip()]
+    else:
+        raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
+    if not cams:
+        raise ValueError("camera_name resolved to an empty list.")
+    return cams


 def _get_suite(name: str) -> benchmark.Benchmark:
@@ -57,34 +69,14 @@ def _select_task_ids(total_tasks: int, task_ids: Iterable[int] | None) -> list[i
    return ids


-# LIBERO-plus perturbation variants encode the perturbation in the filename
-# but on disk only the base `.pruned_init` exists — strip the suffix to match
-# LIBERO-plus's own suite.get_task_init_states() (we reimplement it here so we
-# can pass weights_only=False for PyTorch 2.6+ numpy pickles).
-_LIBERO_PERTURBATION_SUFFIX_RE = re.compile(r"_(?:language|view|light)_[^.]*|_(?:table|tb)_\d+")
-
-
-def get_task_init_states(task_suite: Any, i: int, is_libero_plus: bool = False) -> np.ndarray:
-    task = task_suite.tasks[i]
-    filename = Path(task.init_states_file)
-    root = Path(get_libero_path("init_states"))
-
-    if not is_libero_plus:
-        init_states_path = root / task.problem_folder / filename.name
-        return torch.load(init_states_path, weights_only=False)  # nosec B614
-
-    # LIBERO-plus: `_add_` / `_level` variants store extra-object layouts under
-    # libero_newobj/ as a flat array that must be reshaped to (1, -1).
-    if "_add_" in filename.name or "_level" in filename.name:
-        init_states_path = root / "libero_newobj" / task.problem_folder / filename.name
-        init_states = torch.load(init_states_path, weights_only=False)  # nosec B614
-        return init_states.reshape(1, -1)
-
-    # LIBERO-plus perturbation variants encode the perturbation in the filename
-    # but on disk only the base `.pruned_init` exists — strip the suffix to match.
-    stripped = _LIBERO_PERTURBATION_SUFFIX_RE.sub("", filename.stem) + filename.suffix
-    init_states_path = root / task.problem_folder / stripped
-    return torch.load(init_states_path, weights_only=False)  # nosec B614
+def get_task_init_states(task_suite: Any, i: int) -> np.ndarray:
+    init_states_path = (
+        Path(get_libero_path("init_states"))
+        / task_suite.tasks[i].problem_folder
+        / task_suite.tasks[i].init_states_file
+    )
+    init_states = torch.load(init_states_path, weights_only=False)  # nosec B614
+    return init_states


 def get_libero_dummy_action():
@@ -126,11 +118,9 @@ class LiberoEnv(gym.Env):
        camera_name_mapping: dict[str, str] | None = None,
        num_steps_wait: int = 10,
        control_mode: str = "relative",
-        is_libero_plus: bool = False,
    ):
        super().__init__()
        self.task_id = task_id
-        self.is_libero_plus = is_libero_plus
        self.obs_type = obs_type
        self.render_mode = render_mode
        self.observation_width = observation_width
@@ -138,7 +128,7 @@ class LiberoEnv(gym.Env):
        self.visualization_width = visualization_width
        self.visualization_height = visualization_height
        self.init_states = init_states
-        self.camera_name = parse_camera_names(
+        self.camera_name = _parse_camera_names(
            camera_name
        )  # agentview_image (main) or robot0_eye_in_hand_image (wrist)

@@ -157,11 +147,7 @@ class LiberoEnv(gym.Env):
        self.episode_index = episode_index
        self.episode_length = episode_length
        # Load once and keep
-        self._init_states = (
-            get_task_init_states(task_suite, self.task_id, is_libero_plus=self.is_libero_plus)
-            if self.init_states
-            else None
-        )
+        self._init_states = get_task_init_states(task_suite, self.task_id) if self.init_states else None
        self._reset_stride = n_envs  # when performing a reset, append `_reset_stride` to `init_state_id`.

        self.init_state_id = self.episode_index  # tie each sub-env to a fixed init state
@@ -394,7 +380,6 @@ def _make_env_fns(
    gym_kwargs: Mapping[str, Any],
    control_mode: str,
    camera_name_mapping: dict[str, str] | None = None,
-    is_libero_plus: bool = False,
 ) -> list[Callable[[], LiberoEnv]]:
    """Build n_envs factory callables for a single (suite, task_id)."""

@@ -411,7 +396,6 @@ def _make_env_fns(
            n_envs=n_envs,
            control_mode=control_mode,
            camera_name_mapping=camera_name_mapping,
-            is_libero_plus=is_libero_plus,
            **local_kwargs,
        )

@@ -434,7 +418,6 @@ def create_libero_envs(
    control_mode: str = "relative",
    episode_length: int | None = None,
    camera_name_mapping: dict[str, str] | None = None,
-    is_libero_plus: bool = False,
 ) -> dict[str, dict[int, Any]]:
    """
    Create vectorized LIBERO environments with a consistent return shape.
@@ -454,7 +437,7 @@ def create_libero_envs(
    gym_kwargs = dict(gym_kwargs or {})
    task_ids_filter = gym_kwargs.pop("task_ids", None)  # optional: limit to specific tasks

-    camera_names = parse_camera_names(camera_name)
+    camera_names = _parse_camera_names(camera_name)
    suite_names = [s.strip() for s in str(task).split(",") if s.strip()]
    if not suite_names:
        raise ValueError("`task` must contain at least one LIBERO suite name.")
@@ -479,7 +462,6 @@ def create_libero_envs(
        # Probe once and reuse to avoid creating a temp env per task.
        cached_obs_space: spaces.Space | None = None
        cached_act_space: spaces.Space | None = None
-        cached_metadata: dict[str, Any] | None = None

        for tid in selected:
            fns = _make_env_fns(
@@ -493,14 +475,12 @@ def create_libero_envs(
                gym_kwargs=gym_kwargs,
                control_mode=control_mode,
                camera_name_mapping=camera_name_mapping,
-                is_libero_plus=is_libero_plus,
            )
            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
+                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space)
                if cached_obs_space is None:
                    cached_obs_space = lazy.observation_space
                    cached_act_space = lazy.action_space
-                    cached_metadata = lazy.metadata
                out[suite_name][tid] = lazy
            else:
                out[suite_name][tid] = env_cls(fns)
@@ -311,7 +311,6 @@ def create_metaworld_envs(
    is_async = env_cls is gym.vector.AsyncVectorEnv
    cached_obs_space = None
    cached_act_space = None
-    cached_metadata = None
    out: dict[str, dict[int, Any]] = defaultdict(dict)

    for group in task_groups:
@@ -325,11 +324,10 @@ def create_metaworld_envs(
            fns = [(lambda tn=task_name: MetaworldEnv(task=tn, **gym_kwargs)) for _ in range(n_envs)]

            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
+                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space)
                if cached_obs_space is None:
                    cached_obs_space = lazy.observation_space
                    cached_act_space = lazy.action_space
-                    cached_metadata = lazy.metadata
                out[group][tid] = lazy
            else:
                out[group][tid] = env_cls(fns)
@@ -1,425 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import logging
-from collections import defaultdict
-from collections.abc import Callable, Sequence
-from functools import partial
-from typing import Any
-
-import gymnasium as gym
-import numpy as np
-from gymnasium import spaces
-
-from lerobot.types import RobotObservation
-
-from .utils import _LazyAsyncVectorEnv, parse_camera_names
-
-logger = logging.getLogger(__name__)
-
-# Dimensions for the flat action/state vectors used by the LeRobot wrapper.
-# These correspond to the PandaOmron robot in RoboCasa365.
-OBS_STATE_DIM = 16  # base_pos(3) + base_quat(4) + ee_pos_rel(3) + ee_quat_rel(4) + gripper_qpos(2)
-ACTION_DIM = 12  # base_motion(4) + control_mode(1) + ee_pos(3) + ee_rot(3) + gripper(1)
-ACTION_LOW = -1.0
-ACTION_HIGH = 1.0
-
-# Default PandaOmron cameras. We surface these raw names directly as
-# `observation.images.<name>` so the LeRobot dataset/policy keys match
-# RoboCasa's native convention (no implicit renaming).
-DEFAULT_CAMERAS = [
-    "robot0_agentview_left",
-    "robot0_eye_in_hand",
-    "robot0_agentview_right",
-]
-
-# Object-mesh registries to sample from. RoboCasa's upstream default is
-# ("objaverse", "lightwheel"), but the objaverse pack is huge (~30GB) and
-# most users — including our CI image — only download the lightwheel pack
-# (`--type objs_lw` in `download_kitchen_assets`). When a sampled object
-# category has zero candidates in every registry, robocasa crashes with
-# `ValueError: Probabilities contain NaN` (0/0 divide in the probability
-# normalization). Restricting to registries that are actually on disk
-# avoids the NaN and matches what the asset download provides.
-DEFAULT_OBJ_REGISTRIES: tuple[str, ...] = ("lightwheel",)
-
-# Task-group shortcuts accepted as `--env.task`. When the user passes one of
-# these names, we expand it to the upstream RoboCasa task list and auto-set
-# the dataset split. Individual task names (optionally comma-separated) still
-# take precedence; this only triggers on an exact group-name match.
-_TASK_GROUP_SPLITS = {
-    "atomic_seen": "target",
-    "composite_seen": "target",
-    "composite_unseen": "target",
-    "pretrain50": "pretrain",
-    "pretrain100": "pretrain",
-    "pretrain200": "pretrain",
-    "pretrain300": "pretrain",
-}
-
-
-def _resolve_tasks(task: str) -> tuple[list[str], str | None]:
-    """Resolve a `--env.task` value to (task_names, split_override).
-
-    If `task` is a known task-group name (e.g. `atomic_seen`, `pretrain100`),
-    expand it via `robocasa.utils.dataset_registry.{TARGET,PRETRAINING}_TASKS`
-    and return the matching split. Otherwise treat `task` as a single task or
-    comma-separated list and leave the split untouched (None).
-    """
-    key = task.strip()
-    if key in _TASK_GROUP_SPLITS:
-        from robocasa.utils.dataset_registry import PRETRAINING_TASKS, TARGET_TASKS
-
-        combined = {**TARGET_TASKS, **PRETRAINING_TASKS}
-        if key not in combined:
-            raise ValueError(
-                f"Task group '{key}' is not available in this version of robocasa. "
-                f"Known groups: {sorted(combined.keys())}."
-            )
-        return list(combined[key]), _TASK_GROUP_SPLITS[key]
-
-    names = [t.strip() for t in task.split(",") if t.strip()]
-    if not names:
-        raise ValueError("`task` must contain at least one RoboCasa task name.")
-    return names, None
-
-
-def convert_action(flat_action: np.ndarray) -> dict[str, Any]:
-    """Split a flat (12,) action vector into a RoboCasa action dict.
-
-    Layout: base_motion(4) + control_mode(1) + ee_pos(3) + ee_rot(3) + gripper(1)
-    """
-    return {
-        "action.base_motion": flat_action[0:4],
-        "action.control_mode": flat_action[4:5],
-        "action.end_effector_position": flat_action[5:8],
-        "action.end_effector_rotation": flat_action[8:11],
-        "action.gripper_close": flat_action[11:12],
-    }
-
-
-class RoboCasaEnv(gym.Env):
-    """LeRobot gym.Env wrapper for RoboCasa365 kitchen environments.
-
-    Wraps RoboCasaGymEnv from the robocasa package and converts its
-    dict-based observations and actions into the flat arrays LeRobot expects.
-    Raw RoboCasa camera names are preserved verbatim under `pixels/<cam>`.
-    """
-
-    metadata = {"render_modes": ["rgb_array"], "render_fps": 20}
-
-    def __init__(
-        self,
-        task: str,
-        camera_name: str | Sequence[str] = ",".join(DEFAULT_CAMERAS),
-        obs_type: str = "pixels_agent_pos",
-        render_mode: str = "rgb_array",
-        observation_width: int = 256,
-        observation_height: int = 256,
-        visualization_width: int = 512,
-        visualization_height: int = 512,
-        split: str | None = None,
-        episode_length: int | None = None,
-        obj_registries: Sequence[str] = DEFAULT_OBJ_REGISTRIES,
-        episode_index: int = 0,
-    ):
-        super().__init__()
-        self.task = task
-        self.obs_type = obs_type
-        self.render_mode = render_mode
-        self.observation_width = observation_width
-        self.observation_height = observation_height
-        self.visualization_width = visualization_width
-        self.visualization_height = visualization_height
-        self.split = split
-        self.obj_registries = tuple(obj_registries)
-        # Per-worker index (0..n_envs-1) used to spread the user-provided
-        # seed across factories so each sub-env explores a distinct layout
-        # even when the same seed is passed to `reset()`.
-        self.episode_index = int(episode_index)
-
-        self.camera_name = parse_camera_names(camera_name)
-
-        self._max_episode_steps = episode_length if episode_length is not None else 1000
-
-        # Deferred — created on first reset() inside the worker subprocess
-        # to avoid inheriting stale GPU/EGL contexts across fork().
-        self._env: Any = None
-        self.task_description = ""
-
-        images = {
-            cam: spaces.Box(
-                low=0,
-                high=255,
-                shape=(self.observation_height, self.observation_width, 3),
-                dtype=np.uint8,
-            )
-            for cam in self.camera_name
-        }
-
-        if self.obs_type == "pixels":
-            self.observation_space = spaces.Dict({"pixels": spaces.Dict(images)})
-        elif self.obs_type == "pixels_agent_pos":
-            self.observation_space = spaces.Dict(
-                {
-                    "pixels": spaces.Dict(images),
-                    "agent_pos": spaces.Box(
-                        low=-np.inf,
-                        high=np.inf,
-                        shape=(OBS_STATE_DIM,),
-                        dtype=np.float32,
-                    ),
-                }
-            )
-        else:
-            raise ValueError(f"Unsupported obs_type '{self.obs_type}'. Use 'pixels' or 'pixels_agent_pos'.")
-
-        self.action_space = spaces.Box(
-            low=ACTION_LOW,
-            high=ACTION_HIGH,
-            shape=(ACTION_DIM,),
-            dtype=np.float32,
-        )
-
-    def _ensure_env(self) -> None:
-        """Create the underlying RoboCasaGymEnv on first use.
-
-        Called inside the worker subprocess after fork(), so each worker gets
-        its own clean rendering context rather than inheriting a stale one from
-        the parent process (which causes crashes with AsyncVectorEnv).
-        """
-        if self._env is not None:
-            return
-        from robocasa.wrappers.gym_wrapper import RoboCasaGymEnv
-
-        # RoboCasaGymEnv defaults split="test", which create_env rejects
-        # (only None/"all"/"pretrain"/"target" are valid). Always pass a
-        # valid value so we don't hit that default. Extra kwargs are
-        # forwarded to the underlying kitchen env via create_env/robosuite.make.
-        self._env = RoboCasaGymEnv(
-            env_name=self.task,
-            camera_widths=self.observation_width,
-            camera_heights=self.observation_height,
-            split=self.split if self.split is not None else "all",
-            obj_registries=self.obj_registries,
-        )
-
-        ep_meta = self._env.env.get_ep_meta()
-        self.task_description = ep_meta.get("lang", self.task)
-
-    def _format_raw_obs(self, raw_obs: dict) -> RobotObservation:
-        """Convert RoboCasaGymEnv observation dict to LeRobot format."""
-        # RoboCasaGymEnv emits camera frames under "video.<cam>".
-        images = {cam: raw_obs[f"video.{cam}"] for cam in self.camera_name if f"video.{cam}" in raw_obs}
-
-        if self.obs_type == "pixels":
-            return {"pixels": images}
-
-        # `state.*` keys come from PandaOmronKeyConverter inside the wrapper.
-        agent_pos = np.concatenate(
-            [
-                raw_obs.get("state.base_position", np.zeros(3)),
-                raw_obs.get("state.base_rotation", np.zeros(4)),
-                raw_obs.get("state.end_effector_position_relative", np.zeros(3)),
-                raw_obs.get("state.end_effector_rotation_relative", np.zeros(4)),
-                raw_obs.get("state.gripper_qpos", np.zeros(2)),
-            ],
-            axis=-1,
-        ).astype(np.float32)
-
-        return {"pixels": images, "agent_pos": agent_pos}
-
-    def render(self) -> np.ndarray:
-        self._ensure_env()
-        assert self._env is not None
-        return self._env.render()
-
-    def reset(self, seed=None, **kwargs):
-        self._ensure_env()
-        assert self._env is not None
-        super().reset(seed=seed)
-        # Spread the seed across workers so n_envs factories don't all
-        # roll the same scene. With an explicit user seed we shift it by
-        # episode_index; with no seed we fall back to episode_index so
-        # each worker is still distinct rather than inheriting the same
-        # global RNG state.
-        worker_seed = seed + self.episode_index if seed is not None else self.episode_index
-        raw_obs, info = self._env.reset(seed=worker_seed)
-
-        ep_meta = self._env.env.get_ep_meta()
-        self.task_description = ep_meta.get("lang", self.task)
-
-        observation = self._format_raw_obs(raw_obs)
-        info = {"is_success": False}
-        return observation, info
-
-    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
-        self._ensure_env()
-        assert self._env is not None
-        if action.ndim != 1:
-            raise ValueError(
-                f"Expected action to be 1-D (shape (action_dim,)), "
-                f"but got shape {action.shape} with ndim={action.ndim}"
-            )
-
-        action_dict = convert_action(action)
-        raw_obs, reward, done, truncated, info = self._env.step(action_dict)
-
-        is_success = bool(info.get("success", False))
-        terminated = done or is_success
-        info.update({"task": self.task, "done": done, "is_success": is_success})
-
-        observation = self._format_raw_obs(raw_obs)
-        if terminated:
-            info["final_info"] = {
-                "task": self.task,
-                "done": bool(done),
-                "is_success": bool(is_success),
-            }
-            self.reset()
-
-        return observation, reward, terminated, truncated, info
-
-    def close(self):
-        if self._env is not None:
-            self._env.close()
-
-
-def _make_env_fns(
-    *,
-    task: str,
-    n_envs: int,
-    camera_names: list[str],
-    obs_type: str,
-    render_mode: str,
-    observation_width: int,
-    observation_height: int,
-    visualization_width: int,
-    visualization_height: int,
-    split: str | None,
-    episode_length: int | None,
-    obj_registries: Sequence[str],
-) -> list[Callable[[], RoboCasaEnv]]:
-    """Build n_envs factory callables for a single task.
-
-    Each factory carries a distinct ``episode_index`` (``0..n_envs-1``) so
-    ``RoboCasaEnv.reset()`` can derive a per-worker seed series from the
-    user-provided seed.
-    """
-
-    def _make_env(episode_index: int) -> RoboCasaEnv:
-        return RoboCasaEnv(
-            task=task,
-            camera_name=camera_names,
-            obs_type=obs_type,
-            render_mode=render_mode,
-            observation_width=observation_width,
-            observation_height=observation_height,
-            visualization_width=visualization_width,
-            visualization_height=visualization_height,
-            split=split,
-            episode_length=episode_length,
-            obj_registries=obj_registries,
-            episode_index=episode_index,
-        )
-
-    return [partial(_make_env, i) for i in range(n_envs)]
-
-
-def create_robocasa_envs(
-    task: str,
-    n_envs: int,
-    gym_kwargs: dict[str, Any] | None = None,
-    camera_name: str | Sequence[str] = ",".join(DEFAULT_CAMERAS),
-    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
-    episode_length: int | None = None,
-    obj_registries: Sequence[str] = DEFAULT_OBJ_REGISTRIES,
-) -> dict[str, dict[int, Any]]:
-    """Create vectorized RoboCasa365 environments with a consistent return shape.
-
-    Returns:
-        dict[task_name][task_id] -> vec_env (env_cls([...]) with exactly n_envs factories)
-
-    `task` can be:
-      - a single task name (e.g. `CloseFridge`)
-      - a comma-separated list of task names (e.g. `CloseFridge,PickPlaceCoffee`)
-      - a benchmark-group shortcut (`atomic_seen`, `composite_seen`,
-        `composite_unseen`, `pretrain50`, `pretrain100`, `pretrain200`,
-        `pretrain300`), which auto-expands to the upstream task list and
-        auto-sets the dataset `split` ("target" or "pretrain").
-    """
-    if env_cls is None or not callable(env_cls):
-        raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
-    if not isinstance(n_envs, int) or n_envs <= 0:
-        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
-
-    gym_kwargs = dict(gym_kwargs or {})
-    obs_type = gym_kwargs.pop("obs_type", "pixels_agent_pos")
-    render_mode = gym_kwargs.pop("render_mode", "rgb_array")
-    observation_width = gym_kwargs.pop("observation_width", 256)
-    observation_height = gym_kwargs.pop("observation_height", 256)
-    visualization_width = gym_kwargs.pop("visualization_width", 512)
-    visualization_height = gym_kwargs.pop("visualization_height", 512)
-    split = gym_kwargs.pop("split", None)
-
-    camera_names = parse_camera_names(camera_name)
-    task_names, group_split = _resolve_tasks(str(task))
-    if group_split is not None and split is None:
-        split = group_split
-
-    logger.info(
-        "Creating RoboCasa envs | tasks=%s | split=%s | n_envs(per task)=%d",
-        task_names,
-        split,
-        n_envs,
-    )
-
-    is_async = env_cls is gym.vector.AsyncVectorEnv
-
-    cached_obs_space: spaces.Space | None = None
-    cached_act_space: spaces.Space | None = None
-    cached_metadata: dict[str, Any] | None = None
-    out: dict[str, dict[int, Any]] = defaultdict(dict)
-
-    for task_name in task_names:
-        fns = _make_env_fns(
-            task=task_name,
-            n_envs=n_envs,
-            camera_names=camera_names,
-            obs_type=obs_type,
-            render_mode=render_mode,
-            observation_width=observation_width,
-            observation_height=observation_height,
-            visualization_width=visualization_width,
-            visualization_height=visualization_height,
-            split=split,
-            episode_length=episode_length,
-            obj_registries=obj_registries,
-        )
-
-        if is_async:
-            lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
-            if cached_obs_space is None:
-                cached_obs_space = lazy.observation_space
-                cached_act_space = lazy.action_space
-                cached_metadata = lazy.metadata
-            out[task_name][0] = lazy
-        else:
-            out[task_name][0] = env_cls(fns)
-        logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
-
-    return {name: dict(task_map) for name, task_map in out.items()}
@@ -1,245 +0,0 @@
-"""RoboMME environment wrapper for LeRobot evaluation.
-
-Wraps the RoboMME ``BenchmarkEnvBuilder`` into a Gymnasium-compatible
-``VectorEnv`` suitable for ``lerobot_eval``.
-
-RoboMME tasks:
-  Counting:    BinFill, PickXtimes, SwingXtimes, StopCube
-  Permanence:  VideoUnmask, VideoUnmaskSwap, ButtonUnmask, ButtonUnmaskSwap
-  Reference:   PickHighlight, VideoRepick, VideoPlaceButton, VideoPlaceOrder
-  Imitation:   MoveCube, InsertPeg, PatternLock, RouteStick
-
-Dataset: lerobot/robomme (LeRobot v3.0, 1,600 episodes)
-Install: see docker/Dockerfile.benchmark.robomme  (Linux only — mani-skill vs numpy pin conflict)
-Benchmark: https://github.com/RoboMME/robomme_benchmark
-"""
-
-from __future__ import annotations
-
-from collections.abc import Callable, Sequence
-from functools import partial
-from typing import Any
-
-import gymnasium as gym
-import numpy as np
-from gymnasium import spaces
-
-from .utils import _LazyAsyncVectorEnv
-
-ROBOMME_TASKS = [
-    "BinFill",
-    "PickXtimes",
-    "SwingXtimes",
-    "StopCube",
-    "VideoUnmask",
-    "VideoUnmaskSwap",
-    "ButtonUnmask",
-    "ButtonUnmaskSwap",
-    "PickHighlight",
-    "VideoRepick",
-    "VideoPlaceButton",
-    "VideoPlaceOrder",
-    "MoveCube",
-    "InsertPeg",
-    "PatternLock",
-    "RouteStick",
-]
-
-
-class RoboMMEGymEnv(gym.Env):
-    """Thin Gymnasium wrapper around a single RoboMME episode env."""
-
-    metadata = {"render_modes": ["rgb_array"], "render_fps": 10}
-
-    def __init__(
-        self,
-        task: str = "PickXtimes",
-        action_space_type: str = "joint_angle",
-        dataset: str = "test",
-        episode_idx: int = 0,
-        max_steps: int = 300,
-    ):
-        super().__init__()
-        from robomme.env_record_wrapper import BenchmarkEnvBuilder
-
-        self._task = task
-        self._action_space_type = action_space_type
-        self._dataset = dataset
-        self._episode_idx = episode_idx
-        self._max_steps = max_steps
-        self._max_episode_steps = max_steps
-
-        self._builder = BenchmarkEnvBuilder(
-            env_id=task,
-            dataset=dataset,
-            action_space=action_space_type,
-            gui_render=False,
-            max_steps=max_steps,
-        )
-        self._env = None
-        self._last_raw_obs: dict | None = None
-
-        action_dim = 8 if action_space_type == "joint_angle" else 7
-        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_dim,), dtype=np.float32)
-        # `pixels` must be a nested Dict so `preprocess_observation()` in
-        # envs/utils.py picks it up and maps each camera to
-        # `observation.images.<cam>`. A flat layout (`pixels/image`,
-        # `pixels/wrist_image`) silently drops every image from the batch.
-        self.observation_space = spaces.Dict(
-            {
-                "pixels": spaces.Dict(
-                    {
-                        "image": spaces.Box(0, 255, shape=(256, 256, 3), dtype=np.uint8),
-                        "wrist_image": spaces.Box(0, 255, shape=(256, 256, 3), dtype=np.uint8),
-                    }
-                ),
-                "agent_pos": spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32),
-            }
-        )
-
-    def reset(self, *, seed=None, options=None):
-        super().reset(seed=seed)
-        self._env = self._builder.make_env_for_episode(
-            episode_idx=self._episode_idx,
-            max_steps=self._max_steps,
-        )
-        obs, info = self._env.reset()
-        self._last_raw_obs = obs
-        return self._convert_obs(obs), self._convert_info(info)
-
-    def step(self, action):
-        obs, reward, terminated, truncated, info = self._env.step(action)
-        self._last_raw_obs = obs
-
-        terminated_bool = bool(terminated.item()) if hasattr(terminated, "item") else bool(terminated)
-        truncated_bool = bool(truncated.item()) if hasattr(truncated, "item") else bool(truncated)
-
-        status = info.get("status", "ongoing")
-        is_success = status == "success"
-        conv_info = self._convert_info(info)
-        conv_info["is_success"] = is_success
-
-        return self._convert_obs(obs), float(reward), terminated_bool, truncated_bool, conv_info
-
-    def render(self) -> np.ndarray | None:
-        """Return the front camera image from the last observation for video recording."""
-        if self._last_raw_obs is None:
-            return np.zeros((256, 256, 3), dtype=np.uint8)
-        front = self._last_raw_obs.get("front_rgb_list")
-        if front is None:
-            return np.zeros((256, 256, 3), dtype=np.uint8)
-        frame = front[-1] if isinstance(front, list) else front
-        return np.asarray(frame, dtype=np.uint8)
-
-    def _convert_obs(self, obs: dict) -> dict:
-        front_rgb = (
-            obs["front_rgb_list"][-1] if isinstance(obs["front_rgb_list"], list) else obs["front_rgb_list"]
-        )
-        wrist_rgb = (
-            obs["wrist_rgb_list"][-1] if isinstance(obs["wrist_rgb_list"], list) else obs["wrist_rgb_list"]
-        )
-        joint_state = (
-            obs["joint_state_list"][-1]
-            if isinstance(obs["joint_state_list"], list)
-            else obs["joint_state_list"]
-        )
-        gripper_state = (
-            obs["gripper_state_list"][-1]
-            if isinstance(obs["gripper_state_list"], list)
-            else obs["gripper_state_list"]
-        )
-
-        front_rgb = np.asarray(front_rgb, dtype=np.uint8)
-        wrist_rgb = np.asarray(wrist_rgb, dtype=np.uint8)
-        joint = np.asarray(joint_state, dtype=np.float32).flatten()[:7]
-        gripper = np.asarray(gripper_state, dtype=np.float32).flatten()[:1]
-        state = np.concatenate([joint, gripper])
-
-        return {
-            "pixels": {"image": front_rgb, "wrist_image": wrist_rgb},
-            "agent_pos": state,
-        }
-
-    def _convert_info(self, info: dict) -> dict:
-        return {
-            "status": info.get("status", "ongoing"),
-            "task_goal": info.get("task_goal", ""),
-        }
-
-
-def _make_env_fns(
-    *,
-    task: str,
-    n_envs: int,
-    action_space_type: str,
-    dataset: str,
-    episode_length: int,
-    task_id: int,
-) -> list[Callable[[], RoboMMEGymEnv]]:
-    """Build n_envs factory callables for one RoboMME task id."""
-
-    def _make_one(episode_index: int) -> RoboMMEGymEnv:
-        return RoboMMEGymEnv(
-            task=task,
-            action_space_type=action_space_type,
-            dataset=dataset,
-            episode_idx=episode_index,
-            max_steps=episode_length,
-        )
-
-    return [partial(_make_one, task_id + i) for i in range(n_envs)]
-
-
-def create_robomme_envs(
-    task: str,
-    n_envs: int = 1,
-    action_space_type: str = "joint_angle",
-    dataset: str = "test",
-    episode_length: int = 300,
-    task_ids: list[int] | None = None,
-    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
-) -> dict[str, dict[int, gym.vector.VectorEnv]]:
-    """Create vectorized RoboMME environments for evaluation.
-
-    `task` may be a single RoboMME task name (e.g. "PickXtimes") or a
-    comma-separated list (e.g. "PickXtimes,BinFill,StopCube"). Each task
-    becomes its own suite in the returned mapping.
-
-    Returns {suite_name: {task_id: VectorEnv}} matching lerobot's expected format.
-    """
-    if env_cls is None or not callable(env_cls):
-        raise ValueError("env_cls must be a callable that wraps a list of env factory callables.")
-    if not isinstance(n_envs, int) or n_envs <= 0:
-        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
-
-    if task_ids is None:
-        task_ids = [0]
-
-    task_names = [t.strip() for t in task.split(",") if t.strip()]
-    is_async = env_cls is gym.vector.AsyncVectorEnv
-    cached_obs_space: spaces.Space | None = None
-    cached_act_space: spaces.Space | None = None
-    cached_metadata: dict[str, Any] | None = None
-    out: dict[str, dict[int, gym.vector.VectorEnv]] = {}
-    for task_name in task_names:
-        envs_by_task: dict[int, gym.vector.VectorEnv] = {}
-        for task_id in task_ids:
-            fns = _make_env_fns(
-                task=task_name,
-                n_envs=n_envs,
-                action_space_type=action_space_type,
-                dataset=dataset,
-                episode_length=episode_length,
-                task_id=task_id,
-            )
-            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
-                if cached_obs_space is None:
-                    cached_obs_space = lazy.observation_space
-                    cached_act_space = lazy.action_space
-                    cached_metadata = lazy.metadata
-                envs_by_task[task_id] = lazy
-            else:
-                envs_by_task[task_id] = env_cls(fns)
-        out[task_name] = envs_by_task
-    return out
@@ -1,488 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import importlib
-import logging
-from collections import defaultdict
-from collections.abc import Callable, Sequence
-from functools import partial
-from typing import Any
-
-import gymnasium as gym
-import numpy as np
-import torch
-from gymnasium import spaces
-
-from lerobot.types import RobotObservation
-
-from .utils import _LazyAsyncVectorEnv
-
-logger = logging.getLogger(__name__)
-
-# Camera names as used by RoboTwin 2.0. The wrapper appends "_rgb" when looking
-# up keys in get_obs() output (e.g. "head_camera" → "head_camera_rgb").
-ROBOTWIN_CAMERA_NAMES: tuple[str, ...] = (
-    "head_camera",
-    "left_camera",
-    "right_camera",
-)
-
-ACTION_DIM = 14  # 7 DOF × 2 arms
-ACTION_LOW = -1.0
-ACTION_HIGH = 1.0
-DEFAULT_EPISODE_LENGTH = 300
-# D435 dims from task_config/_camera_config.yml (what demo_clean.yml selects).
-DEFAULT_CAMERA_H = 240
-DEFAULT_CAMERA_W = 320
-
-# Task list from RoboTwin 2.0's `envs/` directory — mirrors upstream exactly
-# (50 tasks as of main; earlier revisions had 60 with a different split).
-# Keep this in sync with:
-#   gh api /repos/RoboTwin-Platform/RoboTwin/contents/envs --paginate \
-#     | jq -r '.[].name' | grep -E '\.py$' | grep -v '^_' | sed 's/\.py$//'
-ROBOTWIN_TASKS: tuple[str, ...] = (
-    "adjust_bottle",
-    "beat_block_hammer",
-    "blocks_ranking_rgb",
-    "blocks_ranking_size",
-    "click_alarmclock",
-    "click_bell",
-    "dump_bin_bigbin",
-    "grab_roller",
-    "handover_block",
-    "handover_mic",
-    "hanging_mug",
-    "lift_pot",
-    "move_can_pot",
-    "move_pillbottle_pad",
-    "move_playingcard_away",
-    "move_stapler_pad",
-    "open_laptop",
-    "open_microwave",
-    "pick_diverse_bottles",
-    "pick_dual_bottles",
-    "place_a2b_left",
-    "place_a2b_right",
-    "place_bread_basket",
-    "place_bread_skillet",
-    "place_burger_fries",
-    "place_can_basket",
-    "place_cans_plasticbox",
-    "place_container_plate",
-    "place_dual_shoes",
-    "place_empty_cup",
-    "place_fan",
-    "place_mouse_pad",
-    "place_object_basket",
-    "place_object_scale",
-    "place_object_stand",
-    "place_phone_stand",
-    "place_shoe",
-    "press_stapler",
-    "put_bottles_dustbin",
-    "put_object_cabinet",
-    "rotate_qrcode",
-    "scan_object",
-    "shake_bottle",
-    "shake_bottle_horizontally",
-    "stack_blocks_three",
-    "stack_blocks_two",
-    "stack_bowls_three",
-    "stack_bowls_two",
-    "stamp_seal",
-    "turn_switch",
-)
-
-
-_ROBOTWIN_SETUP_CACHE: dict[str, dict[str, Any]] = {}
-
-
-def _load_robotwin_setup_kwargs(task_name: str) -> dict[str, Any]:
-    """Build the kwargs dict RoboTwin's setup_demo expects.
-
-    Mirrors the config loading done by RoboTwin's ``script/eval_policy.py``:
-    reads ``task_config/demo_clean.yml``, resolves the embodiment file from
-    ``_embodiment_config.yml``, loads the robot's own ``config.yml``, and
-    reads camera dimensions from ``_camera_config.yml``.
-
-    Uses ``aloha-agilex`` single-robot dual-arm by default (the only embodiment
-    used by beat_block_hammer and most smoke-test tasks).
-    """
-    if task_name in _ROBOTWIN_SETUP_CACHE:
-        return dict(_ROBOTWIN_SETUP_CACHE[task_name])
-
-    import os
-
-    import yaml  # type: ignore[import-untyped]
-    from envs import CONFIGS_PATH  # type: ignore[import-not-found]
-
-    task_config = "demo_clean"
-    with open(os.path.join(CONFIGS_PATH, f"{task_config}.yml"), encoding="utf-8") as f:
-        args = yaml.safe_load(f)
-
-    # Resolve embodiment — demo_clean.yml uses [aloha-agilex] (dual-arm single robot)
-    with open(os.path.join(CONFIGS_PATH, "_embodiment_config.yml"), encoding="utf-8") as f:
-        embodiment_types = yaml.safe_load(f)
-    embodiment = args.get("embodiment", ["aloha-agilex"])
-    if len(embodiment) == 1:
-        robot_file = embodiment_types[embodiment[0]]["file_path"]
-        args["left_robot_file"] = robot_file
-        args["right_robot_file"] = robot_file
-        args["dual_arm_embodied"] = True
-    elif len(embodiment) == 3:
-        args["left_robot_file"] = embodiment_types[embodiment[0]]["file_path"]
-        args["right_robot_file"] = embodiment_types[embodiment[1]]["file_path"]
-        args["embodiment_dis"] = embodiment[2]
-        args["dual_arm_embodied"] = False
-    else:
-        raise ValueError(f"embodiment must have 1 or 3 items, got {len(embodiment)}")
-
-    with open(os.path.join(args["left_robot_file"], "config.yml"), encoding="utf-8") as f:
-        args["left_embodiment_config"] = yaml.safe_load(f)
-    with open(os.path.join(args["right_robot_file"], "config.yml"), encoding="utf-8") as f:
-        args["right_embodiment_config"] = yaml.safe_load(f)
-
-    # Camera dimensions
-    with open(os.path.join(CONFIGS_PATH, "_camera_config.yml"), encoding="utf-8") as f:
-        camera_config = yaml.safe_load(f)
-    head_cam = args["camera"]["head_camera_type"]
-    args["head_camera_h"] = camera_config[head_cam]["h"]
-    args["head_camera_w"] = camera_config[head_cam]["w"]
-
-    # Headless overrides
-    args["render_freq"] = 0
-    args["task_name"] = task_name
-    args["task_config"] = task_config
-
-    _ROBOTWIN_SETUP_CACHE[task_name] = args
-    return dict(args)
-
-
-def _load_robotwin_task(task_name: str) -> type:
-    """Dynamically import and return a RoboTwin 2.0 task class.
-
-    RoboTwin tasks live in ``envs/<task_name>.py`` relative to the repository
-    root and are expected to be on ``sys.path`` after installation.
-    """
-    try:
-        module = importlib.import_module(f"envs.{task_name}")
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError(
-            f"Could not import RoboTwin task '{task_name}'. "
-            "Ensure RoboTwin 2.0 is installed and its 'envs/' directory is on PYTHONPATH. "
-            "See the RoboTwin installation guide: https://robotwin-platform.github.io/doc/usage/robotwin-install.html"
-        ) from e
-    task_cls = getattr(module, task_name, None)
-    if task_cls is None:
-        raise AttributeError(f"Task class '{task_name}' not found in envs/{task_name}.py")
-    return task_cls
-
-
-class RoboTwinEnv(gym.Env):
-    """Gymnasium wrapper around a single RoboTwin 2.0 task.
-
-    RoboTwin uses a custom SAPIEN-based API (``setup_demo`` / ``get_obs`` /
-    ``take_action`` / ``check_success``) rather than the standard gym interface.
-    This class bridges that API to Gymnasium so that ``lerobot-eval`` can drive
-    RoboTwin exactly like LIBERO or Meta-World.
-
-    The underlying SAPIEN environment is created lazily on the first ``reset()``
-    call *inside the worker process*.  This is required for
-    ``gym.vector.AsyncVectorEnv`` compatibility: SAPIEN allocates EGL/GPU
-    contexts that must not be forked from the parent process.
-
-    Observations
-    ------------
-    The ``pixels`` dict uses the raw RoboTwin camera names as keys (e.g.
-    ``"head_camera"``, ``"left_camera"``). ``preprocess_observation`` in
-    ``envs/utils.py`` then converts these to ``observation.images.<cam>``.
-
-    Actions
-    -------
-    14-dim float32 array in ``[-1, 1]`` (joint-space, 7 DOF per arm).
-
-    Autograd
-    --------
-    ``setup_demo`` and ``take_action`` drive CuRobo's Newton trajectory
-    optimizer, which calls ``cost.backward()`` internally. lerobot_eval wraps
-    the rollout in ``torch.no_grad()``, so both call sites re-enable grad.
-    """
-
-    metadata = {"render_modes": ["rgb_array"], "render_fps": 25}
-
-    def __init__(
-        self,
-        task_name: str,
-        episode_index: int = 0,
-        n_envs: int = 1,
-        camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
-        observation_height: int | None = None,
-        observation_width: int | None = None,
-        episode_length: int = DEFAULT_EPISODE_LENGTH,
-        render_mode: str = "rgb_array",
-    ):
-        super().__init__()
-        self.task_name = task_name
-        self.task = task_name  # used by add_envs_task() in utils.py
-        self.task_description = task_name.replace("_", " ")
-        self.episode_index = episode_index
-        self._reset_stride = n_envs
-        self.camera_names = list(camera_names)
-        # Default to D435 dims (the camera type baked into task_config/demo_clean.yml).
-        # The YAML-driven lookup is deferred to reset() so construction doesn't
-        # import RoboTwin's `envs` module — fast-tests run without RoboTwin installed.
-        self.observation_height = observation_height or DEFAULT_CAMERA_H
-        self.observation_width = observation_width or DEFAULT_CAMERA_W
-        self.episode_length = episode_length
-        self._max_episode_steps = episode_length  # lerobot_eval.rollout reads this
-        self.render_mode = render_mode
-
-        self._env: Any | None = None  # deferred — created on first reset() inside worker
-        self._step_count: int = 0
-        self._black_frame = np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
-
-        image_spaces = {
-            cam: spaces.Box(
-                low=0,
-                high=255,
-                shape=(self.observation_height, self.observation_width, 3),
-                dtype=np.uint8,
-            )
-            for cam in self.camera_names
-        }
-        self.observation_space = spaces.Dict(
-            {
-                "pixels": spaces.Dict(image_spaces),
-                "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(ACTION_DIM,), dtype=np.float32),
-            }
-        )
-        self.action_space = spaces.Box(
-            low=ACTION_LOW, high=ACTION_HIGH, shape=(ACTION_DIM,), dtype=np.float32
-        )
-
-    def _ensure_env(self) -> None:
-        """Create the SAPIEN environment on first use.
-
-        Called inside the worker subprocess after fork(), so each worker gets
-        its own EGL/GPU context rather than inheriting a stale one from the
-        parent process (which causes crashes with AsyncVectorEnv).
-        """
-        if self._env is not None:
-            return
-        task_cls = _load_robotwin_task(self.task_name)
-        self._env = task_cls()
-
-    def _get_obs(self) -> RobotObservation:
-        assert self._env is not None, "_get_obs called before _ensure_env()"
-        raw = self._env.get_obs()
-        cameras_raw = raw.get("observation", {})
-
-        images: dict[str, np.ndarray] = {}
-        for cam in self.camera_names:
-            cam_data = cameras_raw.get(cam)
-            img = cam_data.get("rgb") if cam_data else None
-            if img is None:
-                images[cam] = self._black_frame
-                continue
-            img = np.asarray(img, dtype=np.uint8)
-            if img.ndim == 2:
-                img = np.stack([img, img, img], axis=-1)
-            elif img.shape[-1] != 3:
-                img = img[..., :3]
-            images[cam] = img
-
-        ja = raw.get("joint_action") or {}
-        vec = ja.get("vector")
-        if vec is not None:
-            arr = np.asarray(vec, dtype=np.float32).ravel()
-            joint_state = (
-                arr[:ACTION_DIM] if arr.size >= ACTION_DIM else np.zeros(ACTION_DIM, dtype=np.float32)
-            )
-        else:
-            joint_state = np.zeros(ACTION_DIM, dtype=np.float32)
-
-        return {"pixels": images, "agent_pos": joint_state}
-
-    def reset(self, seed: int | None = None, **kwargs) -> tuple[RobotObservation, dict]:
-        self._ensure_env()
-        super().reset(seed=seed)
-        assert self._env is not None  # set by _ensure_env() above
-
-        actual_seed = self.episode_index if seed is None else seed
-        setup_kwargs = _load_robotwin_setup_kwargs(self.task_name)
-        setup_kwargs.update(seed=actual_seed, is_test=True)
-        with torch.enable_grad():
-            self._env.setup_demo(**setup_kwargs)
-        self.episode_index += self._reset_stride
-        self._step_count = 0
-
-        obs = self._get_obs()
-        return obs, {"is_success": False, "task": self.task_name}
-
-    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
-        assert self._env is not None, "step() called before reset()"
-        if action.ndim != 1 or action.shape[0] != ACTION_DIM:
-            raise ValueError(f"Expected 1-D action of shape ({ACTION_DIM},), got {action.shape}")
-
-        with torch.enable_grad():
-            if hasattr(self._env, "take_action"):
-                self._env.take_action(action)
-            else:
-                self._env.step(action)
-
-        self._step_count += 1
-
-        is_success = bool(getattr(self._env, "eval_success", False))
-        if not is_success and hasattr(self._env, "check_success"):
-            is_success = bool(self._env.check_success())
-
-        obs = self._get_obs()
-        reward = float(is_success)
-        terminated = is_success
-        truncated = self._step_count >= self.episode_length
-
-        info: dict[str, Any] = {
-            "task": self.task_name,
-            "is_success": is_success,
-            "step": self._step_count,
-        }
-        if terminated or truncated:
-            info["final_info"] = {
-                "task": self.task_name,
-                "is_success": is_success,
-            }
-            self.reset()
-
-        return obs, reward, terminated, truncated, info
-
-    def render(self) -> np.ndarray:
-        self._ensure_env()
-        obs = self._get_obs()
-        # Prefer head camera for rendering; fall back to first available.
-        if "head_camera" in obs["pixels"]:
-            return obs["pixels"]["head_camera"]
-        return next(iter(obs["pixels"].values()))
-
-    def close(self) -> None:
-        if self._env is not None:
-            if hasattr(self._env, "close_env"):
-                import contextlib
-
-                with contextlib.suppress(TypeError):
-                    self._env.close_env()
-            self._env = None
-
-
-# ---- Multi-task factory --------------------------------------------------------
-
-
-def _make_env_fns(
-    *,
-    task_name: str,
-    n_envs: int,
-    camera_names: list[str],
-    observation_height: int,
-    observation_width: int,
-    episode_length: int,
-) -> list[Callable[[], RoboTwinEnv]]:
-    """Return n_envs factory callables for a single task."""
-
-    def _make_one(episode_index: int) -> RoboTwinEnv:
-        return RoboTwinEnv(
-            task_name=task_name,
-            episode_index=episode_index,
-            n_envs=n_envs,
-            camera_names=camera_names,
-            observation_height=observation_height,
-            observation_width=observation_width,
-            episode_length=episode_length,
-        )
-
-    return [partial(_make_one, i) for i in range(n_envs)]
-
-
-def create_robotwin_envs(
-    task: str,
-    n_envs: int,
-    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
-    camera_names: Sequence[str] = ROBOTWIN_CAMERA_NAMES,
-    observation_height: int = DEFAULT_CAMERA_H,
-    observation_width: int = DEFAULT_CAMERA_W,
-    episode_length: int = DEFAULT_EPISODE_LENGTH,
-) -> dict[str, dict[int, Any]]:
-    """Create vectorized RoboTwin 2.0 environments.
-
-    Returns:
-        ``dict[task_name][0] -> VectorEnv`` — one entry per task, each wrapping
-        ``n_envs`` parallel rollouts.
-
-    Args:
-        task: Comma-separated list of task names (e.g. ``"beat_block_hammer"``
-            or ``"beat_block_hammer,click_bell"``).
-        n_envs: Number of parallel rollouts per task.
-        env_cls: Vector env constructor (e.g. ``gym.vector.AsyncVectorEnv``).
-        camera_names: Cameras to include in observations.
-        observation_height: Pixel height for all cameras.
-        observation_width: Pixel width for all cameras.
-        episode_length: Max steps before truncation.
-    """
-    if env_cls is None or not callable(env_cls):
-        raise ValueError("env_cls must be callable (e.g. gym.vector.AsyncVectorEnv).")
-    if not isinstance(n_envs, int) or n_envs <= 0:
-        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
-
-    task_names = [t.strip() for t in str(task).split(",") if t.strip()]
-    if not task_names:
-        raise ValueError("`task` must contain at least one RoboTwin task name.")
-
-    unknown = [t for t in task_names if t not in ROBOTWIN_TASKS]
-    if unknown:
-        raise ValueError(f"Unknown RoboTwin tasks: {unknown}. Available tasks: {sorted(ROBOTWIN_TASKS)}")
-
-    logger.info(
-        "Creating RoboTwin envs | tasks=%s | n_envs(per task)=%d",
-        task_names,
-        n_envs,
-    )
-
-    is_async = env_cls is gym.vector.AsyncVectorEnv
-    cached_obs_space: spaces.Space | None = None
-    cached_act_space: spaces.Space | None = None
-    cached_metadata: dict[str, Any] | None = None
-
-    out: dict[str, dict[int, Any]] = defaultdict(dict)
-    for task_name in task_names:
-        fns = _make_env_fns(
-            task_name=task_name,
-            n_envs=n_envs,
-            camera_names=list(camera_names),
-            observation_height=observation_height,
-            observation_width=observation_width,
-            episode_length=episode_length,
-        )
-        if is_async:
-            lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
-            if cached_obs_space is None:
-                cached_obs_space = lazy.observation_space
-                cached_act_space = lazy.action_space
-                cached_metadata = lazy.metadata
-            out[task_name][0] = lazy
-        else:
-            out[task_name][0] = env_cls(fns)
-        logger.info("Built vec env | task=%s | n_envs=%d", task_name, n_envs)
-
-    return {k: dict(v) for k, v in out.items()}
@@ -34,25 +34,6 @@ from lerobot.utils.utils import get_channel_first_image_shape
 from .configs import EnvConfig


-def parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
-    """Normalize ``camera_name`` into a non-empty list of strings.
-
-    Accepts a comma-separated string (``"cam_a,cam_b"``) or a sequence of
-    strings (tuples/lists). Whitespace is stripped; empty entries are
-    dropped. Raises ``TypeError`` for unsupported input types and
-    ``ValueError`` when the normalized list is empty.
-    """
-    if isinstance(camera_name, str):
-        cams = [c.strip() for c in camera_name.split(",") if c.strip()]
-    elif isinstance(camera_name, (list | tuple)):
-        cams = [str(c).strip() for c in camera_name if str(c).strip()]
-    else:
-        raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
-    if not cams:
-        raise ValueError("camera_name resolved to an empty list.")
-    return cams
-
-
 def _convert_nested_dict(d):
    result = {}
    for k, v in d.items():
@@ -172,20 +153,17 @@ class _LazyAsyncVectorEnv:
        env_fns: list[Callable],
        observation_space=None,
        action_space=None,
-        metadata=None,
    ):
        self._env_fns = env_fns
        self._env: gym.vector.AsyncVectorEnv | None = None
        self.num_envs = len(env_fns)
-        if observation_space is not None and action_space is not None and metadata is not None:
+        if observation_space is not None and action_space is not None:
            self.observation_space = observation_space
            self.action_space = action_space
-            self.metadata = metadata
        else:
            tmp = env_fns[0]()
            self.observation_space = tmp.observation_space
            self.action_space = tmp.action_space
-            self.metadata = tmp.metadata
            tmp.close()
        self.single_observation_space = self.observation_space
        self.single_action_space = self.action_space
@@ -194,10 +172,6 @@ class _LazyAsyncVectorEnv:
        if self._env is None:
            self._env = gym.vector.AsyncVectorEnv(self._env_fns, context="forkserver", shared_memory=True)

-    @property
-    def unwrapped(self):
-        return self
-
    def reset(self, **kwargs):
        self._ensure()
        return self._env.reset(**kwargs)
@@ -1,589 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""VLABench environment wrapper for LeRobot.
-
-VLABench is a large-scale benchmark for language-conditioned robotic manipulation
-with long-horizon reasoning, built on MuJoCo/dm_control.
-
- Paper: https://arxiv.org/abs/2412.18194
- GitHub: https://github.com/OpenMOSS/VLABench
- Website: https://vlabench.github.io
-"""
-
-from __future__ import annotations
-
-import contextlib
-import logging
-from collections import defaultdict
-from collections.abc import Callable, Sequence
-from typing import Any
-
-import cv2
-import gymnasium as gym
-import numpy as np
-from gymnasium import spaces
-from scipy.spatial.transform import Rotation
-
-from lerobot.types import RobotObservation
-
-from .utils import _LazyAsyncVectorEnv
-
-logger = logging.getLogger(__name__)
-
-ACTION_DIM = 7  # pos(3) + euler(3) + gripper(1)
-ACTION_LOW = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0], dtype=np.float32)
-ACTION_HIGH = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
-
-# Default max episode steps per task type
-DEFAULT_MAX_EPISODE_STEPS = 500
-
-# VLABench task suites
-PRIMITIVE_TASKS = [
-    "select_fruit",
-    "select_toy",
-    "select_chemistry_tube",
-    "add_condiment",
-    "select_book",
-    "select_painting",
-    "select_drink",
-    "insert_flower",
-    "select_billiards",
-    "select_ingredient",
-    "select_mahjong",
-    "select_poker",
-    # Physical series
-    "density_qa",
-    "friction_qa",
-    "magnetism_qa",
-    "reflection_qa",
-    "simple_cuestick_usage",
-    "simple_seesaw_usage",
-    "sound_speed_qa",
-    "thermal_expansion_qa",
-    "weight_qa",
-]
-
-COMPOSITE_TASKS = [
-    "cluster_billiards",
-    "cluster_book",
-    "cluster_drink",
-    "cluster_toy",
-    "cook_dishes",
-    "cool_drink",
-    "find_unseen_object",
-    "get_coffee",
-    "hammer_nail",
-    "heat_food",
-    "make_juice",
-    "play_mahjong",
-    "play_math_game",
-    "play_poker",
-    "play_snooker",
-    "rearrange_book",
-    "rearrange_chemistry_tube",
-    "set_dining_table",
-    "set_study_table",
-    "store_food",
-    "take_chemistry_experiment",
-    "use_seesaw_complex",
-]
-
-SUITE_TASKS: dict[str, list[str]] = {
-    "primitive": PRIMITIVE_TASKS,
-    "composite": COMPOSITE_TASKS,
-}
-
-
-class VLABenchEnv(gym.Env):
-    """Gymnasium wrapper for VLABench environments.
-
-    Wraps the dm_control-based VLABench simulator behind a standard gym.Env interface.
-    Supports multiple cameras (front, second, wrist) and end-effector control.
-    """
-
-    metadata = {"render_modes": ["rgb_array"], "render_fps": 10}
-
-    def __init__(
-        self,
-        task: str = "select_fruit",
-        obs_type: str = "pixels_agent_pos",
-        render_mode: str = "rgb_array",
-        render_resolution: tuple[int, int] = (480, 480),
-        robot: str = "franka",
-        max_episode_steps: int = DEFAULT_MAX_EPISODE_STEPS,
-        action_mode: str = "eef",
-    ):
-        super().__init__()
-        self.task = task
-        self.obs_type = obs_type
-        self.render_mode = render_mode
-        self.render_resolution = render_resolution
-        self.robot = robot
-        self._max_episode_steps = max_episode_steps
-        self.action_mode = action_mode
-
-        # Deferred — created on first reset() inside worker subprocess to avoid
-        # inheriting stale GPU/EGL contexts when AsyncVectorEnv spawns workers.
-        # We never cache `env.physics`: dm_control exposes it as a weakref
-        # proxy that goes stale across resets (rebuilds the sim), so we always
-        # refetch it via `self._env.physics` at the call site.
-        self._env = None
-        self.task_description = ""  # populated on first reset
-        # Cached world-frame XYZ of the robot base link. The VLABench datasets
-        # log both `observation.state` positions and `actions` positions in
-        # robot-base frame (see VLABench/scripts/convert_to_lerobot.py which
-        # subtracts `robot_frame_pos` from ee_pos). The robot is attached at a
-        # fixed offset per task so this is safe to cache once per env build.
-        self._robot_base_xyz: np.ndarray | None = None
-
-        h, w = self.render_resolution
-
-        if self.obs_type == "state":
-            raise NotImplementedError(
-                "The 'state' observation type is not supported in VLABenchEnv. "
-                "Please use 'pixels' or 'pixels_agent_pos'."
-            )
-        elif self.obs_type == "pixels":
-            self.observation_space = spaces.Dict(
-                {
-                    "pixels": spaces.Dict(
-                        {
-                            "image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                            "second_image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                            "wrist_image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                        }
-                    ),
-                }
-            )
-        elif self.obs_type == "pixels_agent_pos":
-            self.observation_space = spaces.Dict(
-                {
-                    "pixels": spaces.Dict(
-                        {
-                            "image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                            "second_image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                            "wrist_image": spaces.Box(low=0, high=255, shape=(h, w, 3), dtype=np.uint8),
-                        }
-                    ),
-                    "agent_pos": spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float64),
-                }
-            )
-        else:
-            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
-
-        self.action_space = spaces.Box(low=ACTION_LOW, high=ACTION_HIGH, dtype=np.float32)
-
-    # Max attempts to rebuild the underlying env when MuJoCo throws
-    # `PhysicsError` (e.g. mjWARN_BADQACC) during VLABench's 20-step
-    # reset warm-up. Some random task/layout samples land in unstable
-    # initial configurations; re-sampling the layout almost always
-    # gives a stable one. A handful of upstream tasks (notably
-    # `select_mahjong`) have layout samplers that diverge often enough
-    # to need >>5 retries, so we pick a generous ceiling.
-    _ENSURE_ENV_MAX_ATTEMPTS = 20
-
-    def _ensure_env(self) -> None:
-        """Create the underlying VLABench env on first use.
-
-        Called inside the worker subprocess after fork(), so each worker gets
-        its own clean rendering context rather than inheriting a stale one from
-        the parent process (which causes crashes with AsyncVectorEnv).
-
-        Retries on `PhysicsError`: VLABench's `LM4ManipDMEnv.reset()` runs 20
-        warm-up `step()` calls while toggling gravity/fluids to let the scene
-        settle; for some random layouts MuJoCo's integrator diverges and
-        raises `mjWARN_BADQACC`. Re-sampling the layout almost always yields
-        a stable one, so we retry a number of times before giving up. Between
-        attempts we reseed NumPy's global RNG from OS entropy so the upstream
-        task sampler explores fresh initial states — without this, retries
-        can replay the same diverging configuration when the sampler is
-        deterministic given the current RNG state.
-        """
-        if self._env is not None:
-            return
-
-        import VLABench.robots  # noqa: F401  # type: ignore[import-untyped]
-        import VLABench.tasks  # noqa: F401  # type: ignore[import-untyped]
-        from dm_control.rl.control import PhysicsError  # type: ignore[import-untyped]
-        from VLABench.envs import load_env  # type: ignore[import-untyped]
-
-        h, w = self.render_resolution
-        last_exc: PhysicsError | None = None
-        for attempt in range(1, self._ENSURE_ENV_MAX_ATTEMPTS + 1):
-            try:
-                env = load_env(task=self.task, robot=self.robot, render_resolution=(h, w))
-                self._env = env
-                break
-            except PhysicsError as exc:
-                last_exc = exc
-                logger.warning(
-                    "PhysicsError on attempt %d/%d while building task '%s': %s. Retrying with fresh layout…",
-                    attempt,
-                    self._ENSURE_ENV_MAX_ATTEMPTS,
-                    self.task,
-                    exc,
-                )
-                np.random.seed(None)
-        if self._env is None:
-            assert last_exc is not None
-            raise RuntimeError(
-                f"VLABench task '{self.task}' failed to produce a stable "
-                f"initial layout after {self._ENSURE_ENV_MAX_ATTEMPTS} "
-                f"attempts. This task's upstream sampler diverges too "
-                f"often for the configured robot; consider removing it "
-                f"from the eval set. Last physics error: {last_exc}"
-            ) from last_exc
-
-        # Extract task description from the dm_control task
-        task_obj = self._env.task
-        if hasattr(task_obj, "task_description"):
-            self.task_description = task_obj.task_description
-        elif hasattr(task_obj, "language_instruction"):
-            self.task_description = task_obj.language_instruction
-        else:
-            self.task_description = self.task
-
-        # Cache robot base world position so `_build_ctrl_from_action` and
-        # `_get_obs` can translate between robot-frame (dataset) and
-        # world-frame (dm_control) without hitting physics every call.
-        try:
-            self._robot_base_xyz = np.asarray(self._env.get_robot_frame_position(), dtype=np.float64).reshape(
-                3
-            )
-        except Exception:
-            # Fallback to VLABench's default Franka base position.
-            self._robot_base_xyz = np.array([0.0, -0.4, 0.78], dtype=np.float64)
-
-    def _get_obs(self) -> dict:
-        """Get current observation from the environment."""
-        assert self._env is not None
-
-        obs = self._env.get_observation()
-        h, w = self.render_resolution
-
-        def _to_hwc3(arr: np.ndarray) -> np.ndarray:
-            """Coerce any camera array to the declared (h, w, 3) uint8 shape."""
-            a = np.asarray(arr)
-            # Drop a leading singleton batch dim if present.
-            while a.ndim > 3 and a.shape[0] == 1:
-                a = a[0]
-            if a.ndim == 3 and a.shape[0] in (1, 3, 4) and a.shape[-1] not in (1, 3, 4):
-                # CHW → HWC
-                a = np.transpose(a, (1, 2, 0))
-            if a.ndim == 2:
-                a = np.stack([a] * 3, axis=-1)
-            if a.ndim != 3:
-                return np.zeros((h, w, 3), dtype=np.uint8)
-            # Force 3 channels.
-            if a.shape[-1] == 1:
-                a = np.repeat(a, 3, axis=-1)
-            elif a.shape[-1] == 4:
-                a = a[..., :3]
-            elif a.shape[-1] != 3:
-                return np.zeros((h, w, 3), dtype=np.uint8)
-            if a.shape[:2] != (h, w):
-                a = cv2.resize(a, (w, h), interpolation=cv2.INTER_AREA)
-            return a.astype(np.uint8)
-
-        # Extract camera images — VLABench returns (n_cameras, C, H, W) or individual arrays
-        raw_frames: list[np.ndarray] = []
-        if "rgb" in obs:
-            rgb = obs["rgb"]
-            if isinstance(rgb, np.ndarray):
-                if rgb.ndim == 4:
-                    raw_frames = [rgb[i] for i in range(rgb.shape[0])]
-                elif rgb.ndim == 3:
-                    raw_frames = [rgb]
-
-        image_keys = ["image", "second_image", "wrist_image"]
-        images: dict[str, np.ndarray] = {}
-        for i, key in enumerate(image_keys):
-            if i < len(raw_frames):
-                images[key] = _to_hwc3(raw_frames[i])
-            else:
-                images[key] = np.zeros((h, w, 3), dtype=np.uint8)
-
-        # Convert VLABench's raw ee_state `[pos_world(3), quat_wxyz(4), open(1)]`
-        # to the dataset's observation.state layout `[pos_robot(3), euler_xyz(3),
-        # gripper(1)]`. See VLABench/scripts/convert_to_lerobot.py — positions
-        # are stored in robot-base frame and orientations as scipy extrinsic
-        # 'xyz' euler angles.
-        raw = np.asarray(obs.get("ee_state", np.zeros(8)), dtype=np.float64).ravel()
-        pos_world = raw[:3] if raw.size >= 3 else np.zeros(3, dtype=np.float64)
-        quat_wxyz = raw[3:7] if raw.size >= 7 else np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64)
-        gripper = float(raw[7]) if raw.size >= 8 else 0.0
-
-        base = self._robot_base_xyz if self._robot_base_xyz is not None else np.zeros(3, dtype=np.float64)
-        pos_robot = pos_world - base
-        euler_xyz = Rotation.from_quat([quat_wxyz[1], quat_wxyz[2], quat_wxyz[3], quat_wxyz[0]]).as_euler(
-            "xyz", degrees=False
-        )
-
-        ee_state = np.concatenate([pos_robot, euler_xyz, [gripper]]).astype(np.float64)
-
-        if self.obs_type == "pixels":
-            return {"pixels": images}
-        elif self.obs_type == "pixels_agent_pos":
-            return {
-                "pixels": images,
-                "agent_pos": ee_state.astype(np.float64),
-            }
-        else:
-            raise ValueError(f"Unknown obs_type: {self.obs_type}")
-
-    # ---- Action adaptation (EEF → joint ctrl) --------------------------------
-    #
-    # The HF vlabench datasets log 7D actions
-    # `[x, y, z (robot frame), rx, ry, rz (scipy extrinsic xyz), gripper]`,
-    # exactly matching VLABench's own eval pipeline (evaluator.base):
-    #   pos, euler, g = policy(...)
-    #   quat = euler_to_quaternion(*euler)      # extrinsic xyz -> wxyz
-    #   _, qpos = robot.get_qpos_from_ee_pos(physics, pos=pos + base, quat=quat)
-    #   env.step(np.concatenate([qpos, [g, g]]))
-    #
-    # VLABench's dm_control task writes `data.ctrl[:] = action` directly — for
-    # Franka that's 9 entries (7 arm joints + 2 gripper fingers). We mirror the
-    # above conversion so the policy's EEF commands actually drive the robot.
-
-    _FRANKA_FINGER_OPEN = 0.04  # qpos when gripper fully open
-
-    def _build_ctrl_from_action(self, action: np.ndarray, ctrl_dim: int) -> np.ndarray:
-        """Convert a 7D EEF action into the `ctrl_dim`-sized joint command vector.
-
-        For the Franka default (ctrl_dim=9): 7 arm joint qposes (via IK) +
-        2 gripper finger qposes (open/closed based on the gripper scalar).
-        If the action is already joint-space (shape matches ctrl_dim), pass
-        through.
-        """
-        if action.shape[0] == ctrl_dim:
-            return action.astype(np.float64, copy=False)
-
-        if action.shape[0] != 7:
-            # Unknown layout — fall back to zero-pad so the sim doesn't crash.
-            padded = np.zeros(ctrl_dim, dtype=np.float64)
-            padded[: min(action.shape[0], ctrl_dim)] = action[:ctrl_dim]
-            return padded
-
-        from dm_control.utils.inverse_kinematics import qpos_from_site_pose
-
-        # Action position is in robot-base frame (see convert_to_lerobot.py);
-        # dm_control's IK expects a world-frame target.
-        base = self._robot_base_xyz if self._robot_base_xyz is not None else np.zeros(3, dtype=np.float64)
-        pos_world = np.asarray(action[:3], dtype=np.float64) + base
-        rx, ry, rz = float(action[3]), float(action[4]), float(action[5])
-        gripper = float(np.clip(action[6], 0.0, 1.0))
-
-        # Dataset euler is scipy extrinsic 'xyz' (same as VLABench's
-        # `euler_to_quaternion`). scipy emits `[x, y, z, w]`; dm_control's IK
-        # and MuJoCo use `[w, x, y, z]`, so reorder.
-        qxyzw = Rotation.from_euler("xyz", [rx, ry, rz], degrees=False).as_quat()
-        quat = np.array([qxyzw[3], qxyzw[0], qxyzw[1], qxyzw[2]], dtype=np.float64)
-
-        assert self._env is not None
-        robot = self._env.task.robot
-        site_name = robot.end_effector_site.full_identifier
-
-        # inplace=False so IK doesn't mutate physics state mid-step — we only
-        # want the solved qpos. Fetch a fresh physics handle — caching it can
-        # yield a stale weakref after a reset.
-        ik_result = qpos_from_site_pose(
-            self._env.physics,
-            site_name=site_name,
-            target_pos=pos_world,
-            target_quat=quat,
-            inplace=False,
-            max_steps=100,
-        )
-        n_dof = robot.n_dof  # 7 for Franka
-        arm_qpos = ik_result.qpos[:n_dof]
-
-        # Dataset gripper convention: 1 = open (finger qpos = 0.04),
-        # 0 = closed (finger qpos = 0.0). See VLABench/scripts/convert_to_lerobot.py
-        # where `trajectory[i][-1] > 0.03` is encoded as `1`.
-        finger_qpos = gripper * self._FRANKA_FINGER_OPEN
-
-        ctrl = np.zeros(ctrl_dim, dtype=np.float64)
-        ctrl[:n_dof] = arm_qpos
-        # Remaining entries are gripper fingers (usually 2 for Franka).
-        ctrl[n_dof:] = finger_qpos
-        return ctrl
-
-    def reset(self, seed=None, **kwargs) -> tuple[RobotObservation, dict[str, Any]]:
-        self._ensure_env()
-        assert self._env is not None
-        super().reset(seed=seed)
-
-        if seed is not None:
-            self._seed_inner_env(int(self.np_random.integers(0, 2**31 - 1)))
-
-        self._env.reset()
-
-        observation = self._get_obs()
-        info = {"is_success": False}
-        return observation, info
-
-    def _seed_inner_env(self, seed: int) -> None:
-        """Propagate `seed` to the inner dm_control env. `Environment.reset()`
-        doesn't accept a seed, so we re-seed the task and environment
-        `RandomState`s directly. Best-effort: silently skipped when the
-        expected attributes are absent on a given VLABench version.
-        """
-        for owner_attr, rng_attr in (("task", "random"), (None, "_random_state")):
-            owner = getattr(self._env, owner_attr) if owner_attr else self._env
-            rng = getattr(owner, rng_attr, None)
-            rng_seed = getattr(rng, "seed", None)
-            if callable(rng_seed):
-                rng_seed(seed)
-
-    def step(self, action: np.ndarray) -> tuple[RobotObservation, float, bool, bool, dict[str, Any]]:
-        from dm_control.rl.control import PhysicsError  # type: ignore[import-untyped]
-
-        self._ensure_env()
-        assert self._env is not None
-
-        if action.ndim != 1:
-            raise ValueError(
-                f"Expected action to be 1-D (shape (action_dim,)), "
-                f"but got shape {action.shape} with ndim={action.ndim}"
-            )
-
-        if self.action_mode not in ("eef", "joint", "delta_eef"):
-            raise ValueError(f"Unknown action_mode: {self.action_mode}")
-
-        # Always refetch physics — dm_control returns a weakref proxy that can
-        # go stale across resets.
-        physics = self._env.physics
-        ctrl_dim = int(physics.data.ctrl.shape[0])
-        ctrl = self._build_ctrl_from_action(action, ctrl_dim)
-        try:
-            timestep = self._env.step(ctrl)
-        except PhysicsError as exc:
-            # Physics integrator diverged (e.g. mjWARN_BADQACC). Treat it as
-            # a graceful failed termination rather than a hard crash — the
-            # rest of the multi-task eval should still run.
-            logger.warning(
-                "PhysicsError during step on task '%s': %s. Terminating episode.",
-                self.task,
-                exc,
-            )
-            observation = self._get_obs()
-            info = {"task": self.task, "is_success": False, "physics_error": True}
-            # Drop the stale env so the next reset() rebuilds it cleanly.
-            with contextlib.suppress(Exception):
-                self._env.close()
-            self._env = None
-            return observation, 0.0, True, False, info
-
-        # Extract reward from dm_control timestep
-        reward = float(timestep.reward) if timestep.reward is not None else 0.0
-
-        # Check success via the task's termination condition
-        is_success = False
-        if hasattr(self._env, "task") and hasattr(self._env.task, "should_terminate_episode"):
-            is_success = bool(self._env.task.should_terminate_episode(self._env.physics))
-
-        terminated = is_success
-        truncated = False
-        info = {
-            "task": self.task,
-            "is_success": is_success,
-        }
-
-        observation = self._get_obs()
-
-        if terminated:
-            self.reset()
-
-        return observation, reward, terminated, truncated, info
-
-    def render(self) -> np.ndarray:
-        self._ensure_env()
-        obs = self._get_obs()
-        return obs["pixels"]["image"]
-
-    def close(self):
-        if self._env is not None:
-            self._env.close()
-            self._env = None
-
-
-# ---- Main API ----------------------------------------------------------------
-
-
-def create_vlabench_envs(
-    task: str,
-    n_envs: int,
-    gym_kwargs: dict[str, Any] | None = None,
-    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
-) -> dict[str, dict[int, Any]]:
-    """
-    Create vectorized VLABench environments with a consistent return shape.
-
-    Returns:
-        dict[suite_name][task_id] -> vec_env (env_cls([...]) with exactly n_envs factories)
-
-    Notes:
-        - n_envs is the number of rollouts *per task*.
-        - `task` can be a suite name ("primitive", "composite"), a comma-separated list of
-          suite names, or individual task names (e.g. "select_fruit,heat_food").
-    """
-    if env_cls is None or not callable(env_cls):
-        raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
-    if not isinstance(n_envs, int) or n_envs <= 0:
-        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
-
-    gym_kwargs = dict(gym_kwargs or {})
-    task_groups = [t.strip() for t in task.split(",") if t.strip()]
-    if not task_groups:
-        raise ValueError("`task` must contain at least one VLABench task or suite name.")
-
-    logger.info(
-        "Creating VLABench envs | task_groups=%s | n_envs(per task)=%d",
-        task_groups,
-        n_envs,
-    )
-
-    is_async = env_cls is gym.vector.AsyncVectorEnv
-    cached_obs_space = None
-    cached_act_space = None
-    cached_metadata = None
-    out: dict[str, dict[int, Any]] = defaultdict(dict)
-
-    for group in task_groups:
-        # Check if it's a suite name, otherwise treat as individual task
-        tasks = SUITE_TASKS.get(group, [group])
-
-        for tid, task_name in enumerate(tasks):
-            logger.info(
-                "Building vec env | group=%s | task_id=%d | task=%s",
-                group,
-                tid,
-                task_name,
-            )
-
-            fns = [(lambda tn=task_name: VLABenchEnv(task=tn, **gym_kwargs)) for _ in range(n_envs)]
-
-            if is_async:
-                lazy = _LazyAsyncVectorEnv(fns, cached_obs_space, cached_act_space, cached_metadata)
-                if cached_obs_space is None:
-                    cached_obs_space = lazy.observation_space
-                    cached_act_space = lazy.action_space
-                    cached_metadata = lazy.metadata
-                out[group][tid] = lazy
-            else:
-                out[group][tid] = env_cls(fns)
-
-    return {group: dict(task_map) for group, task_map in out.items()}
@@ -24,6 +24,8 @@ from .pi0_fast.configuration_pi0_fast import PI0FastConfig as PI0FastConfig
 from .pi05.configuration_pi05 import PI05Config as PI05Config
 from .pretrained import PreTrainedPolicy as PreTrainedPolicy
 from .sac.configuration_sac import SACConfig as SACConfig
+from .sac.reward_model.configuration_classifier import RewardClassifierConfig as RewardClassifierConfig
+from .sarm.configuration_sarm import SARMConfig as SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig as SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
 from .utils import make_robot_action, prepare_observation_for_inference
@@ -44,7 +46,9 @@ __all__ = [
    "PI0Config",
    "PI0FastConfig",
    "PI05Config",
+    "RewardClassifierConfig",
    "SACConfig",
+    "SARMConfig",
    "SmolVLAConfig",
    "TDMPCConfig",
    "VQBeTConfig",
@@ -142,10 +142,9 @@ class ACTPolicy(PreTrainedPolicy):

        actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch)

-        abs_err = F.l1_loss(batch[ACTION], actions_hat, reduction="none")
-        valid_mask = ~batch["action_is_pad"].unsqueeze(-1)
-        num_valid = valid_mask.sum() * abs_err.shape[-1]
-        l1_loss = (abs_err * valid_mask).sum() / num_valid.clamp_min(1)
+        l1_loss = (
+            F.l1_loss(batch[ACTION], actions_hat, reduction="none") * ~batch["action_is_pad"].unsqueeze(-1)
+        ).mean()

        loss_dict = {"l1_loss": l1_loss.item()}
        if self.config.use_vae:
@@ -380,9 +380,7 @@ class DiffusionModel(nn.Module):
                    f"{self.config.do_mask_loss_for_padding=}."
                )
            in_episode_bound = ~batch["action_is_pad"]
-            mask = in_episode_bound.unsqueeze(-1)
-            num_valid = mask.sum() * loss.shape[-1]
-            return (loss * mask).sum() / num_valid.clamp_min(1)
+            loss = loss * in_episode_bound.unsqueeze(-1)

        return loss.mean()

@@ -52,6 +52,8 @@ from .pi0.configuration_pi0 import PI0Config
 from .pi05.configuration_pi05 import PI05Config
 from .pretrained import PreTrainedPolicy
 from .sac.configuration_sac import SACConfig
+from .sac.reward_model.configuration_classifier import RewardClassifierConfig
+from .sarm.configuration_sarm import SARMConfig
 from .smolvla.configuration_smolvla import SmolVLAConfig
 from .tdmpc.configuration_tdmpc import TDMPCConfig
 from .utils import validate_visual_features_consistency
@@ -87,7 +89,7 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:

    Args:
        name: The name of the policy. Supported names are "tdmpc", "diffusion", "act",
-            "multi_task_dit", "vqbet", "pi0", "pi05", "sac", "smolvla", "wall_x".
+            "multi_task_dit", "vqbet", "pi0", "pi05", "sac", "reward_classifier", "smolvla", "wall_x".
    Returns:
        The policy class corresponding to the given name.

@@ -130,10 +132,18 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
        from .sac.modeling_sac import SACPolicy

        return SACPolicy
+    elif name == "reward_classifier":
+        from .sac.reward_model.modeling_classifier import Classifier
+
+        return Classifier
    elif name == "smolvla":
        from .smolvla.modeling_smolvla import SmolVLAPolicy

        return SmolVLAPolicy
+    elif name == "sarm":
+        from .sarm.modeling_sarm import SARMRewardModel
+
+        return SARMRewardModel
    elif name == "groot":
        from .groot.modeling_groot import GrootPolicy

@@ -163,7 +173,7 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
    Args:
        policy_type: The type of the policy. Supported types include "tdmpc",
                     "multi_task_dit", "diffusion", "act", "vqbet", "pi0", "pi05", "sac",
-                     "smolvla", "wall_x".
+                     "smolvla", "reward_classifier", "wall_x".
        **kwargs: Keyword arguments to be passed to the configuration class constructor.

    Returns:
@@ -190,6 +200,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
        return SACConfig(**kwargs)
    elif policy_type == "smolvla":
        return SmolVLAConfig(**kwargs)
+    elif policy_type == "reward_classifier":
+        return RewardClassifierConfig(**kwargs)
    elif policy_type == "groot":
        return GrootConfig(**kwargs)
    elif policy_type == "xvla":
@@ -366,6 +378,14 @@ def make_pre_post_processors(
            dataset_stats=kwargs.get("dataset_stats"),
        )

+    elif isinstance(policy_cfg, RewardClassifierConfig):
+        from .sac.reward_model.processor_classifier import make_classifier_processor
+
+        processors = make_classifier_processor(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+        )
+
    elif isinstance(policy_cfg, SmolVLAConfig):
        from .smolvla.processor_smolvla import make_smolvla_pre_post_processors

@@ -374,6 +394,14 @@ def make_pre_post_processors(
            dataset_stats=kwargs.get("dataset_stats"),
        )

+    elif isinstance(policy_cfg, SARMConfig):
+        from .sarm.processor_sarm import make_sarm_pre_post_processors
+
+        processors = make_sarm_pre_post_processors(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+            dataset_meta=kwargs.get("dataset_meta"),
+        )
    elif isinstance(policy_cfg, GrootConfig):
        from .groot.processor_groot import make_groot_pre_post_processors

@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING

@@ -173,14 +174,17 @@ N_COLOR_CHANNELS = 3


 # config
+@dataclass
 class GR00TN15Config(PretrainedConfig):
    model_type = "gr00t_n1_5"
+    backbone_cfg: dict = field(init=False, metadata={"help": "Backbone configuration."})

-    backbone_cfg: dict
-    action_head_cfg: dict
-    action_horizon: int
-    action_dim: int
-    compute_dtype: str = "float32"
+    action_head_cfg: dict = field(init=False, metadata={"help": "Action head configuration."})
+
+    action_horizon: int = field(init=False, metadata={"help": "Action horizon."})
+
+    action_dim: int = field(init=False, metadata={"help": "Action dimension."})
+    compute_dtype: str = field(default="float32", metadata={"help": "Compute dtype."})

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
@@ -688,9 +688,8 @@ class DiffusionObjective(nn.Module):
        loss = F.mse_loss(predicted, target, reduction="none")

        if self.do_mask_loss_for_padding and "action_is_pad" in batch:
-            mask = ~batch["action_is_pad"].unsqueeze(-1)
-            num_valid = mask.sum() * loss.shape[-1]
-            return (loss * mask).sum() / num_valid.clamp_min(1)
+            valid_actions = ~batch["action_is_pad"]
+            loss = loss * valid_actions.unsqueeze(-1)

        return loss.mean()

@@ -753,9 +752,8 @@ class FlowMatchingObjective(nn.Module):
        loss = F.mse_loss(predicted_velocity, target_velocity, reduction="none")

        if self.do_mask_loss_for_padding and "action_is_pad" in batch:
-            mask = ~batch["action_is_pad"].unsqueeze(-1)
-            num_valid = mask.sum() * loss.shape[-1]
-            return (loss * mask).sum() / num_valid.clamp_min(1)
+            valid_mask = ~batch["action_is_pad"]
+            loss = loss * valid_mask.unsqueeze(-1)

        return loss.mean()

@@ -227,7 +227,6 @@ class PI0FastPaliGemma(nn.Module):
        # forward(..., adarms_cond=...) is supported (same as pi0/pi05).
        if use_adarms[0]:
            text_config = self.paligemma.config.text_config
-            del self.paligemma.model.language_model
            self.paligemma.model.language_model = PiGemmaModel(text_config)

        self.to_bfloat16_for_selected_params(precision)
@@ -197,9 +197,6 @@ class PiGemmaModel(GemmaModel):  # type: ignore[misc]

    def __init__(self, config: GemmaConfig, **kwargs):
        super().__init__(config, **kwargs)
-        # Free parent-allocated layers/norm before replacing to avoid ~2x peak memory.
-        del self.layers
-        del self.norm
        # if not getattr(config, "use_adarms", False):
        #     return
        cond_dim = getattr(config, "adarms_cond_dim", None)
@@ -331,7 +328,6 @@ class PiGemmaForCausalLM(GemmaForCausalLM):  # type: ignore[misc]

    def __init__(self, config: GemmaConfig, **kwargs):
        super().__init__(config, **kwargs)
-        del self.model
        self.model = PiGemmaModel(config)


@@ -340,7 +336,6 @@ class PaliGemmaModelWithPiGemma(PaliGemmaModel):

    def __init__(self, config):
        super().__init__(config)
-        del self.language_model
        self.language_model = PiGemmaModel(config.text_config)


@@ -349,7 +344,6 @@ class PaliGemmaForConditionalGenerationWithPiGemma(PaliGemmaForConditionalGenera

    def __init__(self, config):
        super().__init__(config)
-        del self.model
        self.model = PaliGemmaModelWithPiGemma(config)

    # Make modules available through conditional class for BC
@@ -19,7 +19,6 @@ from .action_queue import ActionQueue
 from .configuration_rtc import RTCConfig
 from .latency_tracker import LatencyTracker
 from .modeling_rtc import RTCProcessor
-from .relative import reanchor_relative_rtc_prefix

 __all__ = [
    "ActionInterpolator",
@@ -27,5 +26,4 @@ __all__ = [
    "LatencyTracker",
    "RTCConfig",
    "RTCProcessor",
-    "reanchor_relative_rtc_prefix",
 ]
@@ -35,7 +35,7 @@ class RTCConfig:
    """

    # Infrastructure
-    enabled: bool = True
+    enabled: bool = False

    # Core RTC settings
    # Todo change to exp
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Relative-action helpers for Real-Time Chunking (RTC)."""
-
-from __future__ import annotations
-
-import torch
-
-from lerobot.processor import (
-    NormalizerProcessorStep,
-    RelativeActionsProcessorStep,
-    TransitionKey,
-    create_transition,
-    to_relative_actions,
-)
-
-
-def reanchor_relative_rtc_prefix(
-    prev_actions_absolute: torch.Tensor,
-    current_state: torch.Tensor,
-    relative_step: RelativeActionsProcessorStep,
-    normalizer_step: NormalizerProcessorStep | None,
-    policy_device: torch.device | str,
-) -> torch.Tensor:
-    """Convert absolute leftover actions into model-space for relative-action RTC policies.
-
-    When using relative actions, the RTC prefix (previous chunk's unexecuted tail)
-    is stored in absolute coordinates. Before feeding it back to the policy, this
-    helper re-expresses those actions relative to the robot's current joint state
-    and optionally normalizes them so the policy receives correctly scaled inputs.
-    """
-    state = current_state.detach().cpu()
-    if state.dim() == 1:
-        state = state.unsqueeze(0)
-
-    action_cpu = prev_actions_absolute.detach().cpu()
-    mask = relative_step._build_mask(action_cpu.shape[-1])
-    relative_actions = to_relative_actions(action_cpu, state, mask)
-
-    transition = create_transition(action=relative_actions)
-    if normalizer_step is not None:
-        transition = normalizer_step(transition)
-
-    return transition[TransitionKey.ACTION].to(policy_device)
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,15 +15,14 @@
 # limitations under the License.
 from dataclasses import dataclass, field

-from lerobot.configs import NormalizationMode
-from lerobot.configs.rewards import RewardModelConfig
+from lerobot.configs import NormalizationMode, PreTrainedConfig
 from lerobot.optim import AdamWConfig, LRSchedulerConfig, OptimizerConfig
 from lerobot.utils.constants import OBS_IMAGE


-@RewardModelConfig.register_subclass(name="reward_classifier")
+@PreTrainedConfig.register_subclass(name="reward_classifier")
@dataclass
-class RewardClassifierConfig(RewardModelConfig):
+class RewardClassifierConfig(PreTrainedConfig):
    """Configuration for the Reward Classifier model."""

    name: str = "reward_classifier"
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,10 +19,11 @@ import logging
 import torch
 from torch import Tensor, nn

-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
-from lerobot.rewards.pretrained import PreTrainedRewardModel
 from lerobot.utils.constants import OBS_IMAGE, REWARD

+from ...pretrained import PreTrainedPolicy
+from .configuration_classifier import RewardClassifierConfig
+

 class ClassifierOutput:
    """Wrapper for classifier outputs with additional metadata."""
@@ -96,7 +99,7 @@ class SpatialLearnedEmbeddings(nn.Module):
        return output


-class Classifier(PreTrainedRewardModel):
+class Classifier(PreTrainedPolicy):
    """Image classifier built on top of a pre-trained encoder."""

    name = "reward_classifier"
@@ -232,16 +235,6 @@ class Classifier(PreTrainedRewardModel):

        return ClassifierOutput(logits=logits, probabilities=probabilities, hidden_states=encoder_outputs)

-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Returns 1.0 for success, 0.0 for failure based on image observations."""
-        images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)]
-        output = self.predict(images)
-
-        if self.config.num_classes == 2:
-            return (output.probabilities > 0.5).float()
-        else:
-            return torch.argmax(output.probabilities, dim=1).float()
-
    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]:
        """Standard forward pass for training compatible with train.py."""
        # Extract images and labels
@@ -276,6 +269,10 @@ class Classifier(PreTrainedRewardModel):

    def predict_reward(self, batch, threshold=0.5):
        """Eval method. Returns predicted reward with the decision threshold as argument."""
+        # Check for both OBS_IMAGE and OBS_IMAGES prefixes
+        batch = self.normalize_inputs(batch)
+        batch = self.normalize_targets(batch)
+
        # Extract images from batch dict
        images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)]

@@ -285,3 +282,28 @@ class Classifier(PreTrainedRewardModel):
            return (probs > threshold).float()
        else:
            return torch.argmax(self.predict(images).probabilities, dim=1)
+
+    def get_optim_params(self):
+        """Return optimizer parameters for the policy."""
+        return self.parameters()
+
+    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not select actions.
+        """
+        raise NotImplementedError("Reward classifiers do not select actions")
+
+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not produce action chunks.
+        """
+        raise NotImplementedError("Reward classifiers do not predict action chunks")
+
+    def reset(self):
+        """
+        This method is required by PreTrainedPolicy but not used for reward classifiers.
+        The reward classifier is not an actor and does not select actions.
+        """
+        pass
@@ -1,3 +1,5 @@
+# !/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,7 +27,8 @@ from lerobot.processor import (
    policy_action_to_transition,
    transition_to_policy_action,
 )
-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
+
+from .configuration_classifier import RewardClassifierConfig


 def make_classifier_processor(
@@ -49,6 +52,8 @@ def make_classifier_processor(
    Args:
        config: The configuration object for the RewardClassifier.
        dataset_stats: A dictionary of statistics for normalization.
+        preprocessor_kwargs: Additional arguments for the pre-processor pipeline.
+        postprocessor_kwargs: Additional arguments for the post-processor pipeline.

    Returns:
        A tuple containing the configured pre-processor and post-processor pipelines.
@@ -0,0 +1 @@
+../../../../docs/source/policy_sarm_README.md
@@ -0,0 +1,18 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .configuration_sarm import SARMConfig
+from .modeling_sarm import SARMRewardModel
+
+__all__ = ["SARMConfig", "SARMRewardModel"]
@@ -25,18 +25,18 @@ need ~num_frames/30 queries instead of one per frame (~30x speedup).

 Usage:
    # Full RA-BC computation with visualizations
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4

    # Faster computation with stride (compute every 5 frames, interpolate the rest)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --stride 5

    # Visualize predictions only (no RA-BC computation)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --visualize-only \\
@@ -58,9 +58,10 @@ import torch
 from tqdm import tqdm

 from lerobot.datasets import LeRobotDataset
-from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
-from lerobot.rewards.sarm.processor_sarm import make_sarm_pre_post_processors
-from lerobot.rewards.sarm.sarm_utils import normalize_stage_tau
+
+from .modeling_sarm import SARMRewardModel
+from .processor_sarm import make_sarm_pre_post_processors
+from .sarm_utils import normalize_stage_tau


 def get_reward_model_path_from_parquet(parquet_path: Path) -> str | None:
@@ -712,12 +713,12 @@ def main():
        epilog="""
 Examples:
    # Full RA-BC computation with visualizations
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4

    # Visualize predictions only (no RA-BC computation)
-    python src/lerobot/rewards/sarm/compute_rabc_weights.py \\
+    python src/lerobot/policies/sarm/compute_rabc_weights.py \\
        --dataset-repo-id lerobot/aloha_sim_insertion_human \\
        --reward-model-path <USER>/sarm_single_uni4 \\
        --visualize-only \\
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 Qianzhong Chen, Justin Yu, Mac Schwager, Pieter Abbeel, Yide Shentu, Philipp Wu
 # and The HuggingFace Inc. team. All rights reserved.
 #
@@ -20,15 +22,14 @@ Paper: https://arxiv.org/abs/2509.25358

 from dataclasses import dataclass, field

-from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature
-from lerobot.configs.rewards import RewardModelConfig
+from lerobot.configs import FeatureType, NormalizationMode, PolicyFeature, PreTrainedConfig
 from lerobot.optim import AdamWConfig, CosineDecayWithWarmupSchedulerConfig
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE


-@RewardModelConfig.register_subclass("sarm")
+@PreTrainedConfig.register_subclass("sarm")
@dataclass
-class SARMConfig(RewardModelConfig):
+class SARMConfig(PreTrainedConfig):
    """Configuration class for SARM (Stage-Aware Reward Modeling).

    Supports three annotation modes:
@@ -109,6 +110,7 @@ class SARMConfig(RewardModelConfig):

    def __post_init__(self):
        super().__post_init__()
+
        if self.annotation_mode not in ["single_stage", "dense_only", "dual"]:
            raise ValueError(
                f"annotation_mode must be 'single_stage', 'dense_only', or 'dual', got {self.annotation_mode}"
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 Qianzhong Chen, Justin Yu, Mac Schwager, Pieter Abbeel, Yide Shentu, Philipp Wu
 # and The HuggingFace Inc. team. All rights reserved.
 #
@@ -32,13 +34,14 @@ import torch.nn as nn
 import torch.nn.functional as F  # noqa: N812
 from torch import Tensor

-from lerobot.rewards.pretrained import PreTrainedRewardModel
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-from lerobot.rewards.sarm.sarm_utils import (
+from lerobot.utils.constants import OBS_STR
+
+from ..pretrained import PreTrainedPolicy
+from .configuration_sarm import SARMConfig
+from .sarm_utils import (
    normalize_stage_tau,
    pad_state_to_max_dim,
 )
-from lerobot.utils.constants import OBS_STR


 class StageTransformer(nn.Module):
@@ -350,7 +353,7 @@ def gen_stage_emb(num_classes: int, targets: torch.Tensor) -> torch.Tensor:
    return stage_onehot


-class SARMRewardModel(PreTrainedRewardModel):
+class SARMRewardModel(PreTrainedPolicy):
    """
    SARM Reward Model for stage-aware task completion rewards.

@@ -468,23 +471,6 @@ class SARMRewardModel(PreTrainedRewardModel):
        self.subtask_model.to(device)
        return self

-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Compute dense progress reward in [0, 1] from batch.
-
-        Expects batch to contain:
-        - "observation_features" or video embeddings: (B, T, 512)
-        - "language_embedding" or text embeddings: (B, 512)
-        - optionally "observation.state": (B, T, state_dim)
-        """
-        text_emb = batch.get("language_embedding", batch.get("text_features"))
-        video_emb = batch.get("observation_features", batch.get("video_features"))
-        state = batch.get("observation.state", batch.get("state_features"))
-
-        rewards = self.calculate_rewards(text_emb, video_emb, state)
-        if isinstance(rewards, np.ndarray):
-            rewards = torch.from_numpy(rewards).float()
-        return rewards
-
    @torch.no_grad()
    def calculate_rewards(
        self,
@@ -645,9 +631,17 @@ class SARMRewardModel(PreTrainedRewardModel):
        return self.parameters()

    def reset(self):
-        """SARM has no episode-level state to reset."""
+        """Required by PreTrainedPolicy but not used for reward models."""
        pass

+    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+        """Required by PreTrainedPolicy but not used for reward models."""
+        raise NotImplementedError("SARM model does not predict action chunks")
+
+    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+        """Required by PreTrainedPolicy but not used for SARM."""
+        raise NotImplementedError("SARM model does not select actions")
+
    def _train_step(
        self,
        img_emb: torch.Tensor,  # (B, N, T, D)
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,15 +60,16 @@ from lerobot.processor import (
    policy_action_to_transition,
    transition_to_policy_action,
 )
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-from lerobot.rewards.sarm.sarm_utils import (
+from lerobot.types import EnvTransition, PolicyAction, TransitionKey
+from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME
+
+from .configuration_sarm import SARMConfig
+from .sarm_utils import (
    apply_rewind_augmentation,
    compute_absolute_indices,
    find_stage_and_tau,
    pad_state_to_max_dim,
 )
-from lerobot.types import EnvTransition, PolicyAction, TransitionKey
-from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME


 class SARMEncodingProcessorStep(ProcessorStep):
@@ -452,13 +455,7 @@ class SARMEncodingProcessorStep(ProcessorStep):
            inputs = {k: v.to(self.device) for k, v in inputs.items()}

            # Get image embeddings
-            # transformers 5.x returns BaseModelOutputWithPooling instead of a plain tensor
-            output = self.clip_model.get_image_features(**inputs)
-            if not isinstance(output, torch.Tensor):
-                output = output.pooler_output
-                if output is None:
-                    raise ValueError("pooler_output should not be None for CLIP models.")
-            embeddings = output.detach().cpu()
+            embeddings = self.clip_model.get_image_features(**inputs).detach().cpu()

            # Handle single frame case
            if embeddings.dim() == 1:
@@ -485,13 +482,7 @@ class SARMEncodingProcessorStep(ProcessorStep):
        inputs = self.clip_processor.tokenizer([text], return_tensors="pt", padding=True, truncation=True)
        inputs = {k: v.to(self.device) for k, v in inputs.items()}

-        # transformers 5.x returns BaseModelOutputWithPooling instead of a plain tensor
-        output = self.clip_model.get_text_features(**inputs)
-        if not isinstance(output, torch.Tensor):
-            output = output.pooler_output
-            if output is None:
-                raise ValueError("pooler_output should not be None for CLIP models.")
-        text_embedding = output.detach().cpu()
+        text_embedding = self.clip_model.get_text_features(**inputs).detach().cpu()
        text_embedding = text_embedding.expand(batch_size, -1)

        return text_embedding
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -394,21 +394,13 @@ class SmolVLAPolicy(PreTrainedPolicy):
        loss_dict["losses_after_rm_padding"] = losses.clone().mean().item()

        if reduction == "none":
-            # Return per-sample losses (B,) by averaging over valid (time, action) entries
-            if actions_is_pad is None:
-                per_sample_loss = losses.mean(dim=(1, 2))
-            else:
-                num_valid = ((~actions_is_pad).sum(dim=1) * losses.shape[-1]).clamp_min(1)
-                per_sample_loss = losses.sum(dim=(1, 2)) / num_valid
+            # Return per-sample losses (B,) by averaging over time and action dims
+            per_sample_loss = losses.mean(dim=(1, 2))
            loss_dict["loss"] = per_sample_loss.mean().item()
            return per_sample_loss, loss_dict
        else:
-            # Default: return scalar mean loss over valid (time, action) entries
-            if actions_is_pad is None:
-                loss = losses.mean()
-            else:
-                num_valid = ((~actions_is_pad).sum() * losses.shape[-1]).clamp_min(1)
-                loss = losses.sum() / num_valid
+            # Default: return scalar mean loss
+            loss = losses.mean()
            loss_dict["loss"] = loss.item()
            return loss, loss_dict

@@ -557,7 +557,7 @@ class RewardClassifierProcessorStep(ProcessorStep):
    def __post_init__(self):
        """Initializes the reward classifier model after the dataclass is created."""
        if self.pretrained_path is not None:
-            from lerobot.rewards.classifier.modeling_classifier import Classifier
+            from lerobot.policies.sac.reward_model.modeling_classifier import Classifier

            self.reward_classifier = Classifier.from_pretrained(self.pretrained_path)
            self.reward_classifier.to(self.device)
@@ -142,10 +142,6 @@ class RelativeActionsProcessorStep(ProcessorStep):
        new_transition[TransitionKey.ACTION] = to_relative_actions(action, state, mask)
        return new_transition

-    def get_cached_state(self) -> torch.Tensor | None:
-        """Return the cached ``observation.state`` used as the reference point for relative/absolute action conversions."""
-        return self._last_state
-
    def get_config(self) -> dict[str, Any]:
        return {
            "enabled": self.enabled,
@@ -186,8 +182,7 @@ class AbsoluteActionsProcessorStep(ProcessorStep):
                "but relative_step is None. Ensure relative_step is set when constructing the postprocessor."
            )

-        cached_state = self.relative_step.get_cached_state()
-        if cached_state is None:
+        if self.relative_step._last_state is None:
            raise RuntimeError(
                "AbsoluteActionsProcessorStep requires state from RelativeActionsProcessorStep "
                "but no state has been cached. Ensure the preprocessor runs before the postprocessor."
@@ -199,7 +194,9 @@ class AbsoluteActionsProcessorStep(ProcessorStep):
            return new_transition

        mask = self.relative_step._build_mask(action.shape[-1])
-        new_transition[TransitionKey.ACTION] = to_absolute_actions(action, cached_state, mask)
+        new_transition[TransitionKey.ACTION] = to_absolute_actions(
+            action, self.relative_step._last_state, mask
+        )
        return new_transition

    def get_config(self) -> dict[str, Any]:
@@ -1,36 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .classifier.configuration_classifier import RewardClassifierConfig as RewardClassifierConfig
-from .factory import (
-    get_reward_model_class as get_reward_model_class,
-    make_reward_model as make_reward_model,
-    make_reward_model_config as make_reward_model_config,
-    make_reward_pre_post_processors as make_reward_pre_post_processors,
-)
-from .pretrained import PreTrainedRewardModel as PreTrainedRewardModel
-from .sarm.configuration_sarm import SARMConfig as SARMConfig
-
-__all__ = [
-    # Configuration classes
-    "RewardClassifierConfig",
-    "SARMConfig",
-    # Base class
-    "PreTrainedRewardModel",
-    # Factory functions
-    "get_reward_model_class",
-    "make_reward_model",
-    "make_reward_model_config",
-    "make_reward_pre_post_processors",
-]
@@ -1,238 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import importlib
-import logging
-from typing import Any
-
-import torch
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.processor import PolicyAction, PolicyProcessorPipeline
-from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig
-from lerobot.rewards.pretrained import PreTrainedRewardModel
-from lerobot.rewards.sarm.configuration_sarm import SARMConfig
-
-
-def get_reward_model_class(name: str) -> type[PreTrainedRewardModel]:
-    """
-    Retrieves a reward model class by its registered name.
-
-    This function uses dynamic imports to avoid loading all reward model classes into
-    memory at once, improving startup time and reducing dependencies.
-
-    Args:
-        name: The name of the reward model. Supported names are "reward_classifier",
-              "sarm".
-
-    Returns:
-        The reward model class corresponding to the given name.
-
-    Raises:
-        ValueError: If the reward model name is not recognized.
-    """
-    if name == "reward_classifier":
-        from lerobot.rewards.classifier.modeling_classifier import Classifier
-
-        return Classifier
-    elif name == "sarm":
-        from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel
-
-        return SARMRewardModel
-    else:
-        try:
-            return _get_reward_model_cls_from_name(name=name)
-        except Exception as e:
-            raise ValueError(f"Reward model type '{name}' is not available.") from e
-
-
-def make_reward_model_config(reward_type: str, **kwargs) -> RewardModelConfig:
-    """
-    Instantiates a reward model configuration object based on the reward type.
-
-    This factory function simplifies the creation of reward model configuration objects
-    by mapping a string identifier to the corresponding config class.
-
-    Args:
-        reward_type: The type of the reward model. Supported types include
-                     "reward_classifier", "sarm".
-        **kwargs: Keyword arguments to be passed to the configuration class constructor.
-
-    Returns:
-        An instance of a `RewardModelConfig` subclass.
-
-    Raises:
-        ValueError: If the `reward_type` is not recognized.
-    """
-    if reward_type == "reward_classifier":
-        return RewardClassifierConfig(**kwargs)
-    elif reward_type == "sarm":
-        return SARMConfig(**kwargs)
-    else:
-        try:
-            config_cls = RewardModelConfig.get_choice_class(reward_type)
-            return config_cls(**kwargs)
-        except Exception as e:
-            raise ValueError(f"Reward model type '{reward_type}' is not available.") from e
-
-
-def make_reward_model(cfg: RewardModelConfig, **kwargs) -> PreTrainedRewardModel:
-    """
-    Instantiate a reward model from its configuration.
-
-    Args:
-        cfg: The configuration for the reward model to be created. If
-             `cfg.pretrained_path` is set, the model will be loaded with weights
-             from that path.
-        **kwargs: Additional keyword arguments forwarded to the model constructor
-            (e.g., ``dataset_stats``, ``dataset_meta``).
-
-    Returns:
-        An instantiated and device-placed reward model.
-    """
-    reward_cls = get_reward_model_class(cfg.type)
-
-    kwargs["config"] = cfg
-
-    if cfg.pretrained_path:
-        kwargs["pretrained_name_or_path"] = cfg.pretrained_path
-        reward_model = reward_cls.from_pretrained(**kwargs)
-    else:
-        reward_model = reward_cls(**kwargs)
-
-    reward_model.to(cfg.device)
-    assert isinstance(reward_model, torch.nn.Module)
-
-    return reward_model
-
-
-def make_reward_pre_post_processors(
-    reward_cfg: RewardModelConfig,
-    **kwargs,
-) -> tuple[
-    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
-    PolicyProcessorPipeline[PolicyAction, PolicyAction],
-]:
-    """
-    Create pre- and post-processor pipelines for a given reward model.
-
-    Each reward model type has a dedicated factory function for its processors.
-
-    Args:
-        reward_cfg: The configuration of the reward model for which to create processors.
-        **kwargs: Additional keyword arguments passed to the processor factory
-            (e.g., ``dataset_stats``, ``dataset_meta``).
-
-    Returns:
-        A tuple containing the input (pre-processor) and output (post-processor) pipelines.
-
-    Raises:
-        ValueError: If a processor factory is not implemented for the given reward
-            model configuration type.
-    """
-    # Create a new processor based on reward model type
-    if isinstance(reward_cfg, RewardClassifierConfig):
-        from lerobot.rewards.classifier.processor_classifier import make_classifier_processor
-
-        return make_classifier_processor(
-            config=reward_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-        )
-
-    elif isinstance(reward_cfg, SARMConfig):
-        from lerobot.rewards.sarm.processor_sarm import make_sarm_pre_post_processors
-
-        return make_sarm_pre_post_processors(
-            config=reward_cfg,
-            dataset_stats=kwargs.get("dataset_stats"),
-            dataset_meta=kwargs.get("dataset_meta"),
-        )
-
-    else:
-        try:
-            processors = _make_processors_from_reward_model_config(
-                config=reward_cfg,
-                dataset_stats=kwargs.get("dataset_stats"),
-            )
-        except Exception as e:
-            raise ValueError(
-                f"Processor for reward model type '{reward_cfg.type}' is not implemented."
-            ) from e
-        return processors
-
-
-def _get_reward_model_cls_from_name(name: str) -> type[PreTrainedRewardModel]:
-    """Get reward model class from its registered name using dynamic imports.
-
-    This is used as a helper function to import reward models from 3rd party lerobot
-    plugins.
-
-    Args:
-        name: The name of the reward model.
-
-    Returns:
-        The reward model class corresponding to the given name.
-    """
-    if name not in RewardModelConfig.get_known_choices():
-        raise ValueError(
-            f"Unknown reward model name '{name}'. "
-            f"Available reward models: {RewardModelConfig.get_known_choices()}"
-        )
-
-    config_cls = RewardModelConfig.get_choice_class(name)
-    config_cls_name = config_cls.__name__
-
-    model_name = config_cls_name.removesuffix("Config")
-    if model_name == config_cls_name:
-        raise ValueError(
-            f"The config class name '{config_cls_name}' does not follow the expected naming convention. "
-            f"Make sure it ends with 'Config'!"
-        )
-
-    cls_name = model_name + "RewardModel"
-    module_path = config_cls.__module__.replace("configuration_", "modeling_")
-
-    module = importlib.import_module(module_path)
-    reward_cls = getattr(module, cls_name)
-    return reward_cls
-
-
-def _make_processors_from_reward_model_config(
-    config: RewardModelConfig,
-    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
-) -> tuple[Any, Any]:
-    """Create pre- and post-processors from a reward model configuration using dynamic imports.
-
-    This is used as a helper function to import processor factories from 3rd party
-    lerobot reward model plugins.
-
-    Args:
-        config: The reward model configuration object.
-        dataset_stats: Dataset statistics for normalization.
-
-    Returns:
-        A tuple containing the input (pre-processor) and output (post-processor) pipelines.
-    """
-    reward_type = config.type
-    function_name = f"make_{reward_type}_pre_post_processors"
-    module_path = config.__class__.__module__.replace("configuration_", "processor_")
-    logging.debug(
-        f"Instantiating reward pre/post processors using function '{function_name}' "
-        f"from module '{module_path}'"
-    )
-    module = importlib.import_module(module_path)
-    function = getattr(module, function_name)
-    return function(config, dataset_stats=dataset_stats)
@@ -1,244 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import abc
-import builtins
-import logging
-import os
-from importlib.resources import files
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, TypeVar
-
-import packaging
-import safetensors
-from huggingface_hub import HfApi, ModelCard, ModelCardData, hf_hub_download
-from huggingface_hub.constants import SAFETENSORS_SINGLE_FILE
-from huggingface_hub.errors import HfHubHTTPError
-from safetensors.torch import load_model as load_model_as_safetensor, save_model as save_model_as_safetensor
-from torch import Tensor, nn
-
-from lerobot.configs.rewards import RewardModelConfig
-from lerobot.utils.hub import HubMixin
-
-if TYPE_CHECKING:
-    from lerobot.configs.train import TrainPipelineConfig
-
-T = TypeVar("T", bound="PreTrainedRewardModel")
-
-
-class PreTrainedRewardModel(nn.Module, HubMixin, abc.ABC):
-    """Base class for reward models."""
-
-    config_class: None
-    name: None
-
-    def __init__(self, config: RewardModelConfig, *inputs, **kwargs):
-        super().__init__()
-        if not isinstance(config, RewardModelConfig):
-            raise ValueError(
-                f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class "
-                "`RewardModelConfig`. To create a model from a pretrained model use "
-                f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`"
-            )
-        self.config = config
-
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        if not getattr(cls, "config_class", None):
-            raise TypeError(f"Class {cls.__name__} must define 'config_class'")
-        if not getattr(cls, "name", None):
-            raise TypeError(f"Class {cls.__name__} must define 'name'")
-
-    def _save_pretrained(self, save_directory: Path) -> None:
-        self.config._save_pretrained(save_directory)
-        model_to_save = self.module if hasattr(self, "module") else self
-        save_model_as_safetensor(model_to_save, str(save_directory / SAFETENSORS_SINGLE_FILE))
-
-    @classmethod
-    def from_pretrained(
-        cls: builtins.type[T],
-        pretrained_name_or_path: str | Path,
-        *,
-        config: RewardModelConfig | None = None,
-        force_download: bool = False,
-        resume_download: bool | None = None,
-        proxies: dict | None = None,
-        token: str | bool | None = None,
-        cache_dir: str | Path | None = None,
-        local_files_only: bool = False,
-        revision: str | None = None,
-        strict: bool = False,
-        **kwargs,
-    ) -> T:
-        """
-        The reward model is set in evaluation mode by default using `reward.eval()` (dropout modules are
-        deactivated). To train it, you should first set it back in training mode with `reward.train()`.
-        """
-        if config is None:
-            config = RewardModelConfig.from_pretrained(
-                pretrained_name_or_path=pretrained_name_or_path,
-                force_download=force_download,
-                resume_download=resume_download,
-                proxies=proxies,
-                token=token,
-                cache_dir=cache_dir,
-                local_files_only=local_files_only,
-                revision=revision,
-                **kwargs,
-            )
-        model_id = str(pretrained_name_or_path)
-        instance = cls(config, **kwargs)
-        if os.path.isdir(model_id):
-            print("Loading weights from local directory")
-            model_file = os.path.join(model_id, SAFETENSORS_SINGLE_FILE)
-            reward = cls._load_as_safetensor(instance, model_file, config.device or "cpu", strict)
-        else:
-            try:
-                model_file = hf_hub_download(
-                    repo_id=model_id,
-                    filename=SAFETENSORS_SINGLE_FILE,
-                    revision=revision,
-                    cache_dir=cache_dir,
-                    force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
-                    token=token,
-                    local_files_only=local_files_only,
-                )
-                reward = cls._load_as_safetensor(instance, model_file, config.device or "cpu", strict)
-            except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"{SAFETENSORS_SINGLE_FILE} not found on the HuggingFace Hub in {model_id}"
-                ) from e
-
-        reward.to(config.device)
-        reward.eval()
-        return reward
-
-    @classmethod
-    def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
-        # Create base kwargs
-        kwargs = {"strict": strict}
-
-        # Add device parameter for newer versions that support it
-        if packaging.version.parse(safetensors.__version__) >= packaging.version.parse("0.4.3"):
-            kwargs["device"] = map_location
-
-        # Load the model with appropriate kwargs
-        missing_keys, unexpected_keys = load_model_as_safetensor(model, model_file, **kwargs)
-        if missing_keys:
-            logging.warning(f"Missing key(s) when loading model: {missing_keys}")
-        if unexpected_keys:
-            logging.warning(f"Unexpected key(s) when loading model: {unexpected_keys}")
-
-        # For older versions, manually move to device if needed
-        if "device" not in kwargs and map_location != "cpu":
-            logging.warning(
-                "Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors."
-                " This means that the model is loaded on 'cpu' first and then copied to the device."
-                " This leads to a slower loading time."
-                " Please update safetensors to version 0.4.3 or above for improved performance."
-            )
-            model.to(map_location)
-        return model
-
-    def get_optim_params(self):
-        """
-        Returns the reward-model-specific parameters dict to be passed on to the optimizer.
-        """
-        return self.parameters()
-
-    def reset(self) -> None:
-        """Reset any internal state."""
-        pass
-
-    @abc.abstractmethod
-    def compute_reward(self, batch: dict[str, Tensor]) -> Tensor:
-        """Compute a scalar reward signal for a batch of observations.
-
-        Args:
-            batch: Dictionary containing at minimum observation tensors.
-                   May also contain "action", "next_observation.*", etc.
-
-        Returns:
-            Tensor of shape ``(batch_size,)`` with reward values.
-        """
-        ...
-
-    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Any]]:
-        """Training forward pass — override for trainable reward models."""
-        raise NotImplementedError(
-            f"{self.__class__.__name__} is not trainable. Only use compute_reward() for inference."
-        )
-
-    @property
-    def is_trainable(self) -> bool:
-        """Whether this reward model can be trained via ``lerobot-train``.
-
-        Trainable reward models override :meth:`forward`; zero-shot models
-        inherit the base implementation that raises ``NotImplementedError``.
-        """
-        return type(self).forward is not PreTrainedRewardModel.forward
-
-    def push_model_to_hub(self, cfg: "TrainPipelineConfig"):
-        api = HfApi()
-        repo_id = api.create_repo(
-            repo_id=self.config.repo_id, private=self.config.private, exist_ok=True
-        ).repo_id
-
-        # Push the files to the repo in a single commit
-        with TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
-            saved_path = Path(tmp) / repo_id
-
-            self.save_pretrained(saved_path)  # Calls _save_pretrained and stores model tensors
-
-            card = self.generate_model_card(
-                cfg.dataset.repo_id, self.config.type, self.config.license, self.config.tags
-            )
-            card.save(str(saved_path / "README.md"))
-
-            cfg.save_pretrained(saved_path)  # Calls _save_pretrained and stores train config
-
-            commit_info = api.upload_folder(
-                repo_id=repo_id,
-                repo_type="model",
-                folder_path=saved_path,
-                commit_message="Upload reward model weights, train config and readme",
-                allow_patterns=["*.safetensors", "*.json", "*.yaml", "*.md"],
-                ignore_patterns=["*.tmp", "*.log"],
-            )
-
-            logging.info(f"Model pushed to {commit_info.repo_url.url}")
-
-    def generate_model_card(
-        self, dataset_repo_id: str, model_type: str, license: str | None, tags: list[str] | None
-    ) -> ModelCard:
-        card_data = ModelCardData(
-            license=license or "apache-2.0",
-            library_name="lerobot",
-            pipeline_tag="robotics",
-            tags=list(set(tags or []).union({"robotics", "lerobot", "reward-model", model_type})),
-            model_name=model_type,
-            datasets=dataset_repo_id,
-        )
-
-        template_card = (
-            files("lerobot.templates")
-            .joinpath("lerobot_rewardmodel_modelcard_template.md")
-            .read_text(encoding="utf-8")
-        )
-        card = ModelCard.from_template(card_data, template_str=template_card)
-        card.validate()
-        return card
@@ -193,15 +193,15 @@ def convert_lerobot_dataset_to_cropped_lerobot_dataset(
        fps=int(original_dataset.fps),
        root=new_dataset_root,
        robot_type=original_dataset.meta.robot_type,
-        features=original_dataset.meta.info.features,
+        features=original_dataset.meta.info["features"],
        use_videos=len(original_dataset.meta.video_keys) > 0,
    )

    # Update the metadata for every image key that will be cropped:
    # (Here we simply set the shape to be the final resize_size.)
    for key in crop_params_dict:
-        if key in new_dataset.meta.info.features:
-            new_dataset.meta.info.features[key]["shape"] = (3, *resize_size)
+        if key in new_dataset.meta.info["features"]:
+            new_dataset.meta.info["features"][key]["shape"] = [3] + list(resize_size)

    # TODO:  Directly modify the mp4 video + meta info features, instead of recreating a dataset
    prev_episode_index = 0
@@ -23,6 +23,7 @@ from .configs import (
    DAggerKeyboardConfig,
    DAggerPedalConfig,
    DAggerStrategyConfig,
+    DatasetRecordConfig,
    HighlightStrategyConfig,
    RolloutConfig,
    RolloutStrategyConfig,
@@ -46,25 +47,18 @@ from .inference import (
    SyncInferenceEngine,
    create_inference_engine,
 )
-from .strategies import (
-    BaseStrategy,
-    DAggerStrategy,
-    HighlightStrategy,
-    RolloutStrategy,
-    SentryStrategy,
-    create_strategy,
-)
+from .ring_buffer import RolloutRingBuffer
+from .robot_wrapper import ThreadSafeRobot
+from .strategies import RolloutStrategy, create_strategy

 __all__ = [
-    "BaseStrategy",
    "BaseStrategyConfig",
    "DAggerKeyboardConfig",
    "DAggerPedalConfig",
-    "DAggerStrategy",
    "DAggerStrategyConfig",
    "DatasetContext",
+    "DatasetRecordConfig",
    "HardwareContext",
-    "HighlightStrategy",
    "HighlightStrategyConfig",
    "InferenceEngine",
    "InferenceEngineConfig",
@@ -74,13 +68,14 @@ __all__ = [
    "RTCInferenceEngine",
    "RolloutConfig",
    "RolloutContext",
+    "RolloutRingBuffer",
    "RolloutStrategy",
    "RolloutStrategyConfig",
    "RuntimeContext",
-    "SentryStrategy",
    "SentryStrategyConfig",
    "SyncInferenceConfig",
    "SyncInferenceEngine",
+    "ThreadSafeRobot",
    "build_rollout_context",
    "create_inference_engine",
    "create_strategy",
@@ -26,7 +26,6 @@ from lerobot.configs import PreTrainedConfig, parser
 from lerobot.configs.dataset import DatasetRecordConfig
 from lerobot.robots.config import RobotConfig
 from lerobot.teleoperators.config import TeleoperatorConfig
-from lerobot.utils.device_utils import auto_select_torch_device, is_torch_device_available

 from .inference import InferenceEngineConfig, SyncInferenceConfig

@@ -75,7 +74,7 @@ class SentryStrategyConfig(RolloutStrategyConfig):
    # Target video file size in MB for episode rotation.  Episodes are
    # saved once the estimated video duration would exceed this limit.
    # Defaults to DEFAULT_VIDEO_FILE_SIZE_IN_MB when set to None.
-    target_video_file_size_mb: int | None = None
+    target_video_file_size_mb: float | None = None


@RolloutStrategyConfig.register_subclass("highlight")
@@ -89,8 +88,8 @@ class HighlightStrategyConfig(RolloutStrategyConfig):
    again.
    """

-    ring_buffer_seconds: float = 10.0
-    ring_buffer_max_memory_mb: int = 1024
+    ring_buffer_seconds: float = 30.0
+    ring_buffer_max_memory_mb: float = 2048.0
    save_key: str = "s"
    push_key: str = "h"

@@ -136,21 +135,19 @@ class DAggerStrategyConfig(RolloutStrategyConfig):
    2. **correction** — toggle human correction recording.
    3. **upload** — push dataset to hub on demand (corrections-only mode).

-    When ``record_autonomous=False`` (default) only human-correction windows
-    are recorded — each correction becomes its own episode.  Set to ``True``
-    to record both autonomous and correction frames with size-based episode
-    rotation (same as Sentry) and background uploading.  ``push_to_hub`` is
-    blocked while a correction is in progress.
+    When ``record_autonomous=True`` (default) both autonomous and correction
+    frames are recorded with size-based episode rotation (same as Sentry)
+    and background uploading.  ``push_to_hub`` is blocked while a correction
+    is in progress.  Set to ``False`` to record only the human-correction
+    windows, where each correction becomes its own episode.
    """

-    # Number of correction episodes to collect (corrections-only mode).
-    # When None, falls back to ``--dataset.num_episodes``.
-    num_episodes: int | None = None
+    num_episodes: int = 10
    record_autonomous: bool = False
    upload_every_n_episodes: int = 5
    # Target video file size in MB for episode rotation (record_autonomous
    # mode only).  Defaults to DEFAULT_VIDEO_FILE_SIZE_IN_MB when None.
-    target_video_file_size_mb: int | None = None
+    target_video_file_size_mb: float | None = None
    input_device: str = "keyboard"
    keyboard: DAggerKeyboardConfig = field(default_factory=DAggerKeyboardConfig)
    pedal: DAggerPedalConfig = field(default_factory=DAggerPedalConfig)
@@ -206,14 +203,6 @@ class RolloutConfig:
    # Use vocal synthesis to read events
    play_sounds: bool = True
    resume: bool = False
-    # Rename map for mapping robot/dataset observation keys to policy keys
-    rename_map: dict[str, str] = field(default_factory=dict)
-
-    # Hardware teardown
-    # When True (default), smoothly interpolate the robot back to the joint
-    # positions captured at startup before disconnecting.  Set to False to
-    # leave the robot in its final achieved pose at shutdown.
-    return_to_initial_position: bool = True

    # Torch compile
    use_torch_compile: bool = False
@@ -227,10 +216,7 @@ class RolloutConfig:
        if isinstance(self.strategy, DAggerStrategyConfig) and self.teleop is None:
            raise ValueError("DAgger strategy requires --teleop.type to be set")

-        # TODO(Steven): DAgger shouldn't require a dataset (user may want to just rollout+intervene without recording), but for now we require it to simplify the implementation.
-        needs_dataset = isinstance(
-            self.strategy, (SentryStrategyConfig, HighlightStrategyConfig, DAggerStrategyConfig)
-        )
+        needs_dataset = isinstance(self.strategy, (SentryStrategyConfig, HighlightStrategyConfig))
        if needs_dataset and (self.dataset is None or not self.dataset.repo_id):
            raise ValueError(f"{self.strategy.type} strategy requires --dataset.repo_id to be set")

@@ -258,29 +244,14 @@ class RolloutConfig:
            self.dataset.streaming_encoding = True

        # DAgger: streaming is mandatory only when the autonomous phase is also recorded.
-        if isinstance(self.strategy, DAggerStrategyConfig) and self.dataset is not None:
-            if self.strategy.record_autonomous and not self.dataset.streaming_encoding:
-                logger.warning("DAgger with record_autonomous=True forces streaming_encoding=True")
-                self.dataset.streaming_encoding = True
-            elif not self.strategy.record_autonomous and not self.dataset.streaming_encoding:
-                logger.info(
-                    "Streaming encoding is disabled for DAgger corrections-only mode. "
-                    "Consider enabling it for faster episode saving: "
-                    "--dataset.streaming_encoding=true --dataset.encoder_threads=2"
-                )
-
-        # DAgger: resolve num_episodes from dataset config when not explicitly set.
-        if isinstance(self.strategy, DAggerStrategyConfig) and self.strategy.num_episodes is None:
-            if self.dataset is not None:
-                self.strategy.num_episodes = self.dataset.num_episodes
-                logger.info(
-                    "DAgger num_episodes not set — using --dataset.num_episodes=%d",
-                    self.strategy.num_episodes,
-                )
-            else:
-                raise ValueError(
-                    "DAgger num_episodes must be set either via --strategy.num_episodes or --dataset.num_episodes"
-                )
+        if (
+            isinstance(self.strategy, DAggerStrategyConfig)
+            and self.strategy.record_autonomous
+            and self.dataset is not None
+            and not self.dataset.streaming_encoding
+        ):
+            logger.warning("DAgger with record_autonomous=True forces streaming_encoding=True")
+            self.dataset.streaming_encoding = True

        # --- Policy loading ---
        if self.robot is None:
@@ -294,30 +265,6 @@ class RolloutConfig:
        if self.policy is None:
            raise ValueError("--policy.path is required for rollout")

-        # --- Task resolution ---
-        # When any --dataset.* flag is passed, draccus creates a DatasetRecordConfig with single_task="".
-        # If the user set the task via the top-level --task flag, propagate it so that all
-        # downstream consumers (inference engine, dataset frame builders) see it.
-        if self.dataset is not None and not self.dataset.single_task and self.task:
-            logger.info("Propagating top-level task '%s' to dataset config", self.task)
-            self.dataset.single_task = self.task
-        elif self.dataset is not None and self.dataset.single_task and not self.task:
-            logger.info("Propagating dataset single_task '%s' to top-level task", self.dataset.single_task)
-            self.task = self.dataset.single_task
-
-        # --- Device resolution ---
-        # Resolve device from the policy config when not explicitly set so all
-        # components (policy.to, preprocessor, inference engine) use the same
-        # device string instead of inconsistent fallbacks.
-        if self.device is None or not is_torch_device_available(self.device):
-            resolved = self.policy.device
-            if resolved:
-                self.device = resolved
-                logger.info("Resolved device from policy config: %s", self.device)
-            else:
-                self.device = auto_select_torch_device().type
-                logger.info("No policy config to resolve device from; auto-selected device: %s", self.device)
-
    @classmethod
    def __get_path_fields__(cls) -> list[str]:
        return ["policy"]
@@ -43,7 +43,6 @@ from lerobot.processor import (
    make_default_processors,
    rename_stats,
 )
-from lerobot.processor.relative_action_processor import RelativeActionsProcessorStep
 from lerobot.robots import make_robot_from_config
 from lerobot.teleoperators import Teleoperator, make_teleoperator_from_config
 from lerobot.utils.feature_utils import combine_feature_dicts, hw_to_dataset_features
@@ -52,7 +51,6 @@ from .configs import BaseStrategyConfig, DAggerStrategyConfig, RolloutConfig
 from .inference import (
    InferenceEngine,
    RTCInferenceConfig,
-    SyncInferenceConfig,
    create_inference_engine,
 )
 from .robot_wrapper import ThreadSafeRobot
@@ -259,12 +257,10 @@ def build_rollout_context(
        teleop.connect()
        logger.info("Teleoperator connected")

-    # TODO(Steven): once Teleoperator motor-control methods are standardised
-    # (``enable_torque`` / ``disable_torque`` / ``write_goal_positions``), gate
-    # the DAgger strategy on their presence here and fail fast with a helpful
-    # message instead of relying on the operator to pre-align the leader by
-    # hand.  See :func:`DAggerStrategy._apply_transition` for the matching
-    # disabled call sites.
+    # DAgger requires teleop with motor control capabilities (enable_torque,
+    # disable_torque, write_goal_positions).
+    # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+    # user is responsible for moving the teleop to the same position as the robot when starting the correction.
    # if isinstance(cfg.strategy, DAggerStrategyConfig) and teleop is not None:
    #     required_teleop_methods = ("enable_torque", "disable_torque", "write_goal_positions")
    #     missing = [m for m in required_teleop_methods if not callable(getattr(teleop, m, None))]
@@ -276,19 +272,11 @@ def build_rollout_context(
    #         )

    # --- 4. Features + action-key reconciliation ---------------------
-    # TODO(Steven):Only ``.pos`` joint features are routed to the policy as state and as the
-    # action target; velocity and torque channels (when present) are kept in
-    # the raw observation but excluded from the policy-facing tensors.
    all_obs_features = robot.observation_features
-    # ``observation_features`` values are either a tuple (camera shape) or the
-    # ``float`` type itself used as a sentinel for scalar motor features —
-    # see ``dict[str, type | tuple]`` annotation on ``Robot.observation_features``.
    observation_features_hw = {
-        k: v
-        for k, v in all_obs_features.items()
-        if isinstance(v, tuple) or (v is float and k.endswith(".pos"))
+        k: v for k, v in all_obs_features.items() if v is float or isinstance(v, tuple)
    }
-    action_features_hw = {k: v for k, v in robot.action_features.items() if k.endswith(".pos")}
+    action_features_hw = robot.action_features

    # The action side is always needed: sync inference reads action names from
    # ``dataset_features[ACTION]`` to map policy tensors back to robot actions.
@@ -305,7 +293,7 @@ def build_rollout_context(
    )
    dataset_features = combine_feature_dicts(action_dataset_features, observation_dataset_features)
    hw_features = hw_to_dataset_features(observation_features_hw, "observation")
-    raw_action_keys = list(action_features_hw.keys())
+    raw_action_keys = list(robot.action_features.keys())
    policy_action_names = getattr(policy_config, "action_feature_names", None)
    ordered_action_keys = _resolve_action_key_order(
        list(policy_action_names) if policy_action_names else None,
@@ -313,11 +301,11 @@ def build_rollout_context(
    )

    # Validate visual features if no rename_map is active
-    rename_map = cfg.rename_map
+    rename_map = cfg.dataset.rename_map if cfg.dataset else {}
    if not rename_map:
        expected_visuals = {k for k, v in full_config.input_features.items() if v.type == FeatureType.VISUAL}
        provided_visuals = {
-            f"observation.images.{k}" for k, v in robot.observation_features.items() if isinstance(v, tuple)
+            f"observation.{k}" for k, v in robot.observation_features.items() if isinstance(v, tuple)
        }
        policy_subset = expected_visuals.issubset(provided_visuals)
        hw_subset = provided_visuals.issubset(expected_visuals)
@@ -353,14 +341,6 @@ def build_rollout_context(
                    "names": None,
                }

-            repo_name = cfg.dataset.repo_id.split("/", 1)[-1]
-            if not repo_name.startswith("rollout_"):
-                raise ValueError(
-                    "Dataset names for rollout must start with 'rollout_'. "
-                    "Use --dataset.repo_id=<user>/rollout_<name> for policy deployment datasets."
-                )
-            cfg.dataset.stamp_repo_id()
-            target_video_mb = getattr(cfg.strategy, "target_video_file_size_mb", None)
            dataset = LeRobotDataset.create(
                cfg.dataset.repo_id,
                cfg.dataset.fps,
@@ -376,7 +356,6 @@ def build_rollout_context(
                streaming_encoding=cfg.dataset.streaming_encoding,
                encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize,
                encoder_threads=cfg.dataset.encoder_threads,
-                video_files_size_in_mb=target_video_mb,
            )

    if dataset is not None:
@@ -387,7 +366,7 @@ def build_rollout_context(
    if dataset is not None:
        dataset_stats = rename_stats(
            dataset.meta.stats,
-            cfg.rename_map,
+            cfg.dataset.rename_map if cfg.dataset else {},
        )

    preprocessor, postprocessor = make_pre_post_processors(
@@ -395,20 +374,11 @@ def build_rollout_context(
        pretrained_path=cfg.policy.pretrained_path,
        dataset_stats=dataset_stats,
        preprocessor_overrides={
-            "device_processor": {"device": cfg.device},
-            "rename_observations_processor": {"rename_map": cfg.rename_map},
+            "device_processor": {"device": cfg.device or getattr(policy_config, "device", "cpu")},
+            "rename_observations_processor": {"rename_map": cfg.dataset.rename_map if cfg.dataset else {}},
        },
    )

-    if isinstance(cfg.inference, SyncInferenceConfig) and any(
-        isinstance(step, RelativeActionsProcessorStep) and step.enabled
-        for step in getattr(preprocessor, "steps", ())
-    ):
-        raise NotImplementedError(
-            "SyncInferenceEngine does not support policies with relative actions for now."
-            "Use --inference.type=rtc or remove relative action processor steps from the policy pipeline."
-        )
-
    # --- 7. Inference strategy (needs policy + pre/post + hardware) --
    logger.info(
        "Creating inference engine (type=%s)...",
@@ -14,8 +14,8 @@

 """Inference engine package — backend-agnostic action production.

-Concrete backends (``sync``, ``rtc``, ...) expose the same small interface so
-rollout strategies never branch on which backend is in use.
+Concrete strategies (sync, RTC, …) expose the same small interface so
+rollout strategies never branch on the inference backend.
 """

 from .base import InferenceEngine
@@ -15,8 +15,8 @@
 """Inference engine ABC.

 Rollout strategies consume actions through this small interface so they
-do not need to know whether inference happens inline on the control thread
-or asynchronously in a background thread (RTC).
+do not need to know whether the inference engine is synchronous, runs in
+a background thread (RTC), or comes from an external source.
 """

 from __future__ import annotations
@@ -29,10 +29,9 @@ import torch
 class InferenceEngine(abc.ABC):
    """Abstract backend for producing actions during rollout.

-    Subclasses decide whether inference happens inline on the control
-    thread or asynchronously in a background thread.  The contract is
-    minimal so additional backends can be plugged in without touching
-    rollout strategies.
+    Subclasses decide whether inference happens inline, in a background
+    thread, or externally. The contract is minimal so new backends can
+    be added without touching rollout strategies.

    Lifecycle
    ---------
@@ -44,8 +43,8 @@ class InferenceEngine(abc.ABC):
    -----------------
    ``get_action(obs_frame)`` — return the next action tensor, or
    ``None`` if none is available (e.g. async queue empty).  Sync
-    backends always compute from ``obs_frame``; async backends ignore
-    it (they receive observations via ``notify_observation``).
+    backends always compute from ``obs_frame``; async backends may
+    ignore it (they get observations via ``notify_observation``).

    Optional hooks
    --------------
@@ -68,8 +68,9 @@ class SyncInferenceConfig(InferenceEngineConfig):
 class RTCInferenceConfig(InferenceEngineConfig):
    """Real-Time Chunking: async policy inference in a background thread."""

-    # Eagerly constructed so draccus exposes nested fields directly on the CLI
-    # (e.g. ``--inference.rtc.execution_horizon=...``).
+    # ``RTCConfig`` is a small dataclass with default-only fields, so eagerly
+    # constructing one here costs nothing and keeps draccus' CLI surface flat
+    # (``--inference.rtc.execution_horizon=...`` etc.).  No need to lazy-init.
    rtc: RTCConfig = field(default_factory=RTCConfig)
    queue_threshold: int = 30

@@ -32,14 +32,18 @@ from typing import Any
 import torch

 from lerobot.policies.pretrained import PreTrainedPolicy
-from lerobot.policies.rtc import ActionQueue, LatencyTracker, reanchor_relative_rtc_prefix
+from lerobot.policies.rtc import ActionQueue, LatencyTracker
 from lerobot.policies.rtc.configuration_rtc import RTCConfig
 from lerobot.policies.utils import prepare_observation_for_inference
 from lerobot.processor import (
    NormalizerProcessorStep,
    PolicyProcessorPipeline,
    RelativeActionsProcessorStep,
+    TransitionKey,
+    create_transition,
+    to_relative_actions,
 )
+from lerobot.utils.constants import OBS_STATE
 from lerobot.utils.feature_utils import build_dataset_frame

 from ..robot_wrapper import ThreadSafeRobot
@@ -62,6 +66,35 @@ _RTC_JOIN_TIMEOUT_S: float = 3.0
 # ---------------------------------------------------------------------------


+def _reanchor_relative_rtc_prefix(
+    prev_actions_absolute: torch.Tensor,
+    current_state: torch.Tensor,
+    relative_step: RelativeActionsProcessorStep,
+    normalizer_step: NormalizerProcessorStep | None,
+    policy_device: torch.device | str,
+) -> torch.Tensor:
+    """Convert absolute leftover actions into model-space for relative-action RTC policies.
+
+    When using relative actions, the RTC prefix (previous chunk's unexecuted tail)
+    is stored in absolute coordinates. Before feeding it back to the policy, this
+    helper re-expresses those actions relative to the robot's current joint state
+    and optionally normalizes them so the policy receives correctly scaled inputs.
+    """
+    state = current_state.detach().cpu()
+    if state.dim() == 1:
+        state = state.unsqueeze(0)
+
+    action_cpu = prev_actions_absolute.detach().cpu()
+    mask = relative_step._build_mask(action_cpu.shape[-1])
+    relative_actions = to_relative_actions(action_cpu, state, mask)
+
+    transition = create_transition(action=relative_actions)
+    if normalizer_step is not None:
+        transition = normalizer_step(transition)
+
+    return transition[TransitionKey.ACTION].to(policy_device)
+
+
 def _normalize_prev_actions_length(prev_actions: torch.Tensor, target_steps: int) -> torch.Tensor:
    """Pad or truncate RTC prefix actions to a fixed length for stable compiled inference."""
    if prev_actions.ndim != 2:
@@ -285,15 +318,13 @@ class RTCInferenceEngine(InferenceEngine):
                        preprocessed = self._preprocessor(obs_batch)

                        if prev_actions is not None and self._relative_step is not None:
-                            # Rebase against the raw cached state so the leftover tail stays in
-                            # the training-time coordinate frame.
-                            raw_state = self._relative_step.get_cached_state()
-                            if raw_state is not None:
+                            state_tensor = preprocessed.get(OBS_STATE)
+                            if state_tensor is not None:
                                prev_abs = queue.get_processed_left_over()
                                if prev_abs is not None and prev_abs.numel() > 0:
-                                    prev_actions = reanchor_relative_rtc_prefix(
+                                    prev_actions = _reanchor_relative_rtc_prefix(
                                        prev_actions_absolute=prev_abs,
-                                        current_state=raw_state,
+                                        current_state=state_tensor,
                                        relative_step=self._relative_step,
                                        normalizer_step=self._normalizer_step,
                                        policy_device=policy_device,
@@ -31,21 +31,6 @@ from .base import InferenceEngine
 logger = logging.getLogger(__name__)


-# TODO(Steven): support relative-action policies.  The per-tick flow refreshes
-# ``RelativeActionsProcessorStep._last_state`` every call, so cached chunk
-# actions popped on later ticks get reanchored to the *current* robot state and
-# absolute targets drift through the chunk.  Relative-action policies are
-# rejected at context-build time today; RTC postprocesses the whole chunk and
-# is unaffected.
-#
-# Candidate fix: drive the policy via ``predict_action_chunk`` and serve a
-# local FIFO of postprocessed actions.  Eliminates drift by construction and
-# saves per-tick pre/post work, but bypasses ``select_action`` — needs
-# fallbacks for SAC (raises), ACT temporal ensembling (ensembler lives in
-# ``select_action``), and Diffusion-family (obs-history queues populated as a
-# side effect of ``select_action``).
-
-
 class SyncInferenceEngine(InferenceEngine):
    """Inline synchronous inference: compute one action per call.

@@ -47,7 +47,7 @@ class RolloutRingBuffer:
        count.
    """

-    def __init__(self, max_seconds: float = 30.0, max_memory_mb: int = 2048, fps: float = 30.0) -> None:
+    def __init__(self, max_seconds: float = 30.0, max_memory_mb: float = 2048.0, fps: float = 30.0) -> None:
        self._max_frames = int(max_seconds * fps)
        self._max_bytes = int(max_memory_mb * 1024 * 1024)
        self._buffer: deque[dict] = deque(maxlen=self._max_frames)
@@ -60,7 +60,8 @@ class BaseStrategy(RolloutStrategy):
                break

            obs = robot.get_observation()
-            obs_processed = self._process_observation_and_notify(ctx.processors, obs)
+            obs_processed = ctx.processors.robot_observation_processor(obs)
+            engine.notify_observation(obs_processed)

            if self._handle_warmup(cfg.use_torch_compile, loop_start, control_interval):
                continue
@@ -71,15 +72,8 @@ class BaseStrategy(RolloutStrategy):
            dt = time.perf_counter() - loop_start
            if (sleep_t := control_interval - dt) > 0:
                precise_sleep(sleep_t)
-            else:
-                logger.warning(
-                    f"Record loop is running slower ({1 / dt:.1f} Hz) than the target FPS ({cfg.fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation"
-                )

    def teardown(self, ctx: RolloutContext) -> None:
        """Disconnect hardware and stop inference."""
-        self._teardown_hardware(
-            ctx.hardware,
-            return_to_initial_position=ctx.runtime.cfg.return_to_initial_position,
-        )
+        self._teardown_hardware(ctx.hardware)
        logger.info("Base strategy teardown complete")
@@ -32,7 +32,7 @@ from ..inference import InferenceEngine

 if TYPE_CHECKING:
    from ..configs import RolloutStrategyConfig
-    from ..context import HardwareContext, ProcessorContext, RolloutContext, RuntimeContext
+    from ..context import HardwareContext, RolloutContext, RuntimeContext

 logger = logging.getLogger(__name__)

@@ -50,7 +50,6 @@ class RolloutStrategy(abc.ABC):
        self._engine: InferenceEngine | None = None
        self._interpolator: ActionInterpolator | None = None
        self._warmup_flushed: bool = False
-        self._cached_obs_processed: dict | None = None

    def _init_engine(self, ctx: RolloutContext) -> None:
        """Attach the inference engine and action interpolator, then start the backend.
@@ -63,35 +62,10 @@ class RolloutStrategy(abc.ABC):
        self._interpolator = ActionInterpolator(multiplier=ctx.runtime.cfg.interpolation_multiplier)
        self._engine = ctx.policy.inference
        logger.info("Starting inference engine...")
-        self._engine.reset()
        self._engine.start()
        self._warmup_flushed = False
-        self._cached_obs_processed = None
        logger.info("Inference engine started")

-    def _process_observation_and_notify(self, processors: ProcessorContext, obs_raw: dict) -> dict:
-        """Run the observation processor and notify the engine — throttled to policy ticks.
-
-        Callers are responsible for calling ``robot.get_observation()`` every loop
-        iteration so ``obs_raw`` stays fresh for the action post-processor.  This
-        helper gates only the comparatively expensive bits — the processor pipeline
-        and ``engine.notify_observation`` — to fire when the interpolator signals
-        it needs a new action (once per ``interpolation_multiplier`` ticks).  On
-        interpolated ticks the cached ``obs_processed`` is reused.
-
-        With ``interpolation_multiplier == 1`` this is equivalent to the unthrottled
-        path: ``needs_new_action()`` is True every tick.
-
-        The cache is implicitly invalidated whenever ``interpolator.reset()`` is
-        called (warmup completion, DAgger phase transitions back to AUTONOMOUS),
-        because reset makes ``needs_new_action()`` return True on the next call.
-        """
-        if self._cached_obs_processed is None or self._interpolator.needs_new_action():
-            obs_processed = processors.robot_observation_processor(obs_raw)
-            self._engine.notify_observation(obs_processed)
-            self._cached_obs_processed = obs_processed
-        return self._cached_obs_processed
-
    def _handle_warmup(self, use_torch_compile: bool, loop_start: float, control_interval: float) -> bool:
        """Handle torch.compile warmup phase.

@@ -116,20 +90,16 @@ class RolloutStrategy(abc.ABC):
            engine.resume()
        return False

-    def _teardown_hardware(self, hw: HardwareContext, return_to_initial_position: bool = True) -> None:
-        """Stop the inference engine, optionally return robot to initial position, and disconnect hardware."""
+    def _teardown_hardware(self, hw: HardwareContext) -> None:
+        """Stop the inference engine, return robot to initial position, and disconnect hardware."""
        if self._engine is not None:
            logger.info("Stopping inference engine...")
            self._engine.stop()
        robot = hw.robot_wrapper.inner
        if robot.is_connected:
-            if return_to_initial_position and hw.initial_position:
+            if hw.initial_position:
                logger.info("Returning robot to initial position before shutdown...")
                self._return_to_initial_position(hw)
-            elif not return_to_initial_position:
-                logger.info(
-                    "Skipping return-to-initial-position (disabled by config); leaving robot in final pose."
-                )
            logger.info("Disconnecting robot...")
            robot.disconnect()
        teleop = hw.teleop
@@ -223,7 +193,7 @@ def estimate_max_episode_seconds(
    The estimate ignores codec-specific settings (CRF, preset) on purpose:
    we only need a rough lower bound on bitrate, not a precise prediction.

-    Falls back to 300 s (5 min) when no video features are present.
+    Falls back to 600 s (10 min) when no video features are present.
    """
    # 0.1 bits-per-pixel is a *low* estimate for CRF-30 streaming video of
    # robot footage (real-world is typically 0.1 – 0.3 bpp).  Under-
@@ -237,16 +207,16 @@ def estimate_max_episode_seconds(
        if feat.get("dtype") == "video":
            shape = feat.get("shape", ())

-            # (H, W, C) — bits-per-pixel is a per-spatial-pixel metric,
-            # so we exclude the channel dimension from the count.
-            if len(shape) == 3:
-                pixels = shape[0] * shape[1]
-                camera_pixels.append(pixels)
-            else:
-                raise ValueError(f"Unexpected video feature shape: {shape}")
+            # Assuming shape could be (C, H, W) or (T, C, H, W)
+            # We want to extract the spatial dimensions.
+            if len(shape) >= 3:
+                h, w = shape[-2], shape[-1]
+                pixels = h * w
+                if pixels > 0:
+                    camera_pixels.append(pixels)

    if not camera_pixels:
-        return 300.0
+        return 600.0

    # Use the smallest camera: it produces the lowest bitrate and therefore
    # takes the longest to reach the target — the conservative choice.
@@ -256,7 +226,7 @@ def estimate_max_episode_seconds(

    # Guard against division by zero just in case
    if bytes_per_second <= 0:
-        return 300.0
+        return 600.0

    return (target_size_mb * 1024 * 1024) / bytes_per_second

@@ -296,9 +266,7 @@ def send_next_action(
    if interp is None:
        return None

-    if len(interp) != len(ordered_keys):
-        raise ValueError(f"Interpolated tensor length ({len(interp)}) != action keys ({len(ordered_keys)})")
-    action_dict = {k: interp[i].item() for i, k in enumerate(ordered_keys)}
+    action_dict = {k: interp[i].item() for i, k in enumerate(ordered_keys) if i < len(interp)}
    processed = ctx.processors.robot_action_processor((action_dict, obs_raw))
    ctx.hardware.robot_wrapper.send_action(processed)
    return action_dict
@@ -24,21 +24,14 @@ the ``input_device`` config field.  Each device exposes three actions:
    1. **pause_resume** — Toggle policy execution (AUTONOMOUS <-> PAUSED).
    2. **correction**   — Toggle correction recording (PAUSED <-> CORRECTING).
    3. **upload**        — Push dataset to hub on demand (corrections-only mode).
-    ESC (keyboard only) — Stop session.
+    ESC (keyboard only)  — Stop session.

-Recording modes:
+Recording Modes:
    ``record_autonomous=True``:  Sentry-like continuous recording with
        time-based episode rotation.  Both autonomous and correction
        frames are recorded; corrections tagged ``intervention=True``.
    ``record_autonomous=False``: Only correction windows are recorded.
        Each correction (start to stop) becomes one episode.
-
-Teleoperator expectations:
-    The user is responsible for keeping the leader arm aligned with the
-    follower arm at the moment a correction begins.  Programmatic motor
-    handover (``enable_torque`` / ``disable_torque`` / ``write_goal_positions``)
-    is intentionally not invoked here — see the TODO in
-    :func:`DAggerStrategy._apply_transition` for the open design decision.
 """

 from __future__ import annotations
@@ -175,10 +168,8 @@ class DAggerEvents:
 # ---------------------------------------------------------------------------


-# TODO(Steven): re-enable programmatic teleop alignment once we decide whether
-# to enforce motor-control methods on every Teleoperator.  Until then the user
-# is responsible for moving the leader arm to the follower's pose at the moment
-# a correction begins.
+# TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+# user is responsible for moving the teleop to the same position as the robot when starting the correction.
 def _teleop_smooth_move_to(
    teleop: Teleoperator, target_pos: dict, duration_s: float = 2.0, fps: int = 50
 ) -> None:
@@ -380,10 +371,7 @@ class DAggerStrategy(RolloutStrategy):
                    logger.info("Dataset uploaded to hub")
                    log_say("Dataset uploaded to hub", play_sounds)

-        self._teardown_hardware(
-            ctx.hardware,
-            return_to_initial_position=ctx.runtime.cfg.return_to_initial_position,
-        )
+        self._teardown_hardware(ctx.hardware)
        logger.info("DAgger strategy teardown complete")

    # ------------------------------------------------------------------
@@ -415,8 +403,8 @@ class DAggerStrategy(RolloutStrategy):
        engine.reset()
        interpolator.reset()
        events.reset()
-        # TODO(Steven): re-enable once Teleoperator motor-control methods are
-        # standardised; until then the user pre-aligns the leader by hand.
+        # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+        # user is responsible for moving the teleop to the same position as the robot when starting the correction.
        # teleop.disable_torque()
        engine.resume()

@@ -446,22 +434,19 @@ class DAggerStrategy(RolloutStrategy):

                    phase = events.phase
                    obs = robot.get_observation()
+                    obs_processed = ctx.processors.robot_observation_processor(obs)
+                    obs_frame = build_dataset_frame(features, obs_processed, prefix=OBS_STR)

                    # --- CORRECTING: human teleop control ---
-                    # TODO(Steven): teleop runs at the same FPS as the policy. To
-                    # decouple the two, sample teleop at its native rate and
-                    # interpolate to the control loop's tick rate.
                    if phase == DAggerPhase.CORRECTING:
-                        obs_processed = ctx.processors.robot_observation_processor(obs)
                        teleop_action = teleop.get_action()
                        processed_teleop = ctx.processors.teleop_action_processor((teleop_action, obs))
                        robot_action_to_send = ctx.processors.robot_action_processor((processed_teleop, obs))
                        robot.send_action(robot_action_to_send)
                        last_action = robot_action_to_send
                        self._log_telemetry(obs_processed, processed_teleop, ctx.runtime)
+                        action_frame = build_dataset_frame(features, processed_teleop, prefix=ACTION)
                        if record_tick % record_stride == 0:
-                            obs_frame = build_dataset_frame(features, obs_processed, prefix=OBS_STR)
-                            action_frame = build_dataset_frame(features, processed_teleop, prefix=ACTION)
                            frame = {
                                **obs_frame,
                                **action_frame,
@@ -478,7 +463,7 @@ class DAggerStrategy(RolloutStrategy):

                    # --- AUTONOMOUS: policy control ---
                    else:
-                        obs_processed = self._process_observation_and_notify(ctx.processors, obs)
+                        engine.notify_observation(obs_processed)

                        if self._handle_warmup(cfg.use_torch_compile, loop_start, control_interval):
                            continue
@@ -487,9 +472,8 @@ class DAggerStrategy(RolloutStrategy):
                        if action_dict is not None:
                            self._log_telemetry(obs_processed, action_dict, ctx.runtime)
                            last_action = ctx.processors.robot_action_processor((action_dict, obs))
+                            action_frame = build_dataset_frame(features, action_dict, prefix=ACTION)
                            if record_tick % record_stride == 0:
-                                obs_frame = build_dataset_frame(features, obs_processed, prefix=OBS_STR)
-                                action_frame = build_dataset_frame(features, action_dict, prefix=ACTION)
                                frame = {
                                    **obs_frame,
                                    **action_frame,
@@ -499,9 +483,9 @@ class DAggerStrategy(RolloutStrategy):
                                dataset.add_frame(frame)
                            record_tick += 1

-                    # Episode rotation derived from the video file-size target.
-                    # Saving is deferred while a correction is ongoing so the
-                    # episode boundary lands on a clean autonomous frame.
+                    # Episode rotation derived from video file-size target.
+                    # Do NOT save mid-correction — wait for the correction
+                    # to finish so the episode boundary is clean.
                    elapsed = time.perf_counter() - episode_start
                    if elapsed >= episode_duration_s and phase != DAggerPhase.CORRECTING:
                        with self._episode_lock:
@@ -524,16 +508,12 @@ class DAggerStrategy(RolloutStrategy):
                    dt = time.perf_counter() - loop_start
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)
-                    else:
-                        logger.warning(
-                            f"Record loop is running slower ({1 / dt:.1f} Hz) than the target FPS ({cfg.fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation"
-                        )

            finally:
                logger.info("DAgger continuous control loop ended — pausing engine")
                engine.pause()
-                # TODO(Steven): re-enable once Teleoperator motor-control methods
-                # are standardised across all teleop implementations.
+                # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+                # user is responsible for moving the teleop to the same position as the robot when starting the correction.
                # teleop.disable_torque()
                with contextlib.suppress(Exception):
                    with self._episode_lock:
@@ -570,13 +550,12 @@ class DAggerStrategy(RolloutStrategy):
        engine.reset()
        interpolator.reset()
        events.reset()
-        # TODO(Steven): re-enable once Teleoperator motor-control methods are
-        # standardised; until then the user pre-aligns the leader by hand.
+        # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+        # user is responsible for moving the teleop to the same position as the robot when starting the correction.
        # teleop.disable_torque()
        engine.resume()

        last_action: dict[str, Any] | None = None
-        start_time = time.perf_counter()
        record_tick = 0
        recorded = 0
        logger.info(
@@ -592,10 +571,6 @@ class DAggerStrategy(RolloutStrategy):
                ):
                    loop_start = time.perf_counter()

-                    if cfg.duration > 0 and (time.perf_counter() - start_time) >= cfg.duration:
-                        logger.info("Duration limit reached (%.0fs)", cfg.duration)
-                        break
-
                    # Process transitions
                    transition = events.consume_transition()
                    if transition is not None:
@@ -624,13 +599,10 @@ class DAggerStrategy(RolloutStrategy):

                    phase = events.phase
                    obs = robot.get_observation()
+                    obs_processed = ctx.processors.robot_observation_processor(obs)

                    # --- CORRECTING: human teleop control + recording ---
-                    # TODO(Steven): teleop runs at the same FPS as the policy. To
-                    # decouple the two, sample teleop at its native rate and
-                    # interpolate to the control loop's tick rate.
                    if phase == DAggerPhase.CORRECTING:
-                        obs_processed = ctx.processors.robot_observation_processor(obs)
                        teleop_action = teleop.get_action()
                        processed_teleop = ctx.processors.teleop_action_processor((teleop_action, obs))
                        robot_action_to_send = ctx.processors.robot_action_processor((processed_teleop, obs))
@@ -638,9 +610,9 @@ class DAggerStrategy(RolloutStrategy):
                        last_action = robot_action_to_send
                        self._log_telemetry(obs_processed, processed_teleop, ctx.runtime)

+                        obs_frame = build_dataset_frame(features, obs_processed, prefix=OBS_STR)
+                        action_frame = build_dataset_frame(features, processed_teleop, prefix=ACTION)
                        if record_tick % record_stride == 0:
-                            obs_frame = build_dataset_frame(features, obs_processed, prefix=OBS_STR)
-                            action_frame = build_dataset_frame(features, processed_teleop, prefix=ACTION)
                            dataset.add_frame(
                                {
                                    **obs_frame,
@@ -658,7 +630,7 @@ class DAggerStrategy(RolloutStrategy):

                    # --- AUTONOMOUS: policy control (no recording) ---
                    else:
-                        obs_processed = self._process_observation_and_notify(ctx.processors, obs)
+                        engine.notify_observation(obs_processed)

                        if self._handle_warmup(cfg.use_torch_compile, loop_start, control_interval):
                            continue
@@ -671,16 +643,12 @@ class DAggerStrategy(RolloutStrategy):
                    dt = time.perf_counter() - loop_start
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)
-                    else:
-                        logger.warning(
-                            f"Record loop is running slower ({1 / dt:.1f} Hz) than the target FPS ({cfg.fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation"
-                        )

            finally:
                logger.info("DAgger corrections-only loop ended — pausing engine")
                engine.pause()
-                # TODO(Steven): re-enable once Teleoperator motor-control methods
-                # are standardised across all teleop implementations.
+                # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+                # user is responsible for moving the teleop to the same position as the robot when starting the correction.
                # teleop.disable_torque()
                with contextlib.suppress(Exception):
                    with self._episode_lock:
@@ -710,16 +678,14 @@ class DAggerStrategy(RolloutStrategy):
            _robot_pos = {
                k: v for k, v in obs.items() if k.endswith(".pos") and k in robot.observation_features
            }
-            # TODO(Steven): once Teleoperator motor-control methods are
-            # standardised, drive the leader to the follower's pose here so the
-            # operator does not need to pre-align the arm by hand.  Until then
-            # the user is responsible for the alignment.
-            # _teleop_smooth_move_to(teleop, _robot_pos, duration_s=2.0, fps=50)
+            # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+            # user is responsible for moving the teleop to the same position as the robot when starting the correction.
+            # _teleop_smooth_move_to(teleop, robot_pos, duration_s=2.0, fps=50)

        elif new_phase == DAggerPhase.CORRECTING:
            logger.info("Entering correction mode — human teleop control")
-            # TODO(Steven): re-enable once Teleoperator motor-control methods
-            # are standardised across all teleop implementations.
+            # TODO(Steven): either enforce this (meaning all teleop must implement these methods) or
+            # user is responsible for moving the teleop to the same position as the robot when starting the correction.
            # teleop.disable_torque()

        elif new_phase == DAggerPhase.AUTONOMOUS:
@@ -25,7 +25,7 @@ from .highlight import HighlightStrategy
 from .sentry import SentryStrategy

 if TYPE_CHECKING:
-    from ..configs import RolloutStrategyConfig
+    from lerobot.rollout import RolloutStrategyConfig


 def create_strategy(config: RolloutStrategyConfig) -> RolloutStrategy:
@@ -22,7 +22,7 @@ import os
 import sys
 import time
 from concurrent.futures import Future, ThreadPoolExecutor
-from threading import Event as ThreadingEvent, Lock
+from threading import Event as ThreadingEvent

 from lerobot.common.control_utils import is_headless
 from lerobot.datasets import VideoEncodingManager
@@ -64,8 +64,8 @@ class HighlightStrategy(RolloutStrategy):
    3. The episode is saved and the ring buffer resumes capturing.

    Requires ``streaming_encoding=True`` (enforced in config validation)
-    so that ``dataset.add_frame`` is a non-blocking queue put — flushing
-    the entire ring buffer in one tick must not stall the control loop.
+    so that ``dataset.add_frame`` is a non-blocking queue put — draining
+    900 frames stays sub-ms per frame.
    """

    config: HighlightStrategyConfig
@@ -80,7 +80,6 @@ class HighlightStrategy(RolloutStrategy):
        self._push_requested = ThreadingEvent()
        self._push_executor: ThreadPoolExecutor | None = None
        self._pending_push: Future | None = None
-        self._episode_lock = Lock()

    def setup(self, ctx: RolloutContext) -> None:
        """Initialise the inference engine, ring buffer, and keyboard listener."""
@@ -135,7 +134,8 @@ class HighlightStrategy(RolloutStrategy):
                        break

                    obs = robot.get_observation()
-                    obs_processed = self._process_observation_and_notify(ctx.processors, obs)
+                    obs_processed = ctx.processors.robot_observation_processor(obs)
+                    engine.notify_observation(obs_processed)

                    if self._handle_warmup(cfg.use_torch_compile, loop_start, control_interval):
                        continue
@@ -151,7 +151,11 @@ class HighlightStrategy(RolloutStrategy):
                        # NOTE: ``is_set()`` then ``clear()`` is not atomic
                        # against the keyboard thread setting the flag again
                        # in between — but that is benign: we lose at most one
-                        # toggle, processed on the next iteration.
+                        # toggle, processed on the next iteration.  The
+                        # ``_recording_live`` branch below is reached in the
+                        # SAME iteration after ``clear()`` runs, so a frame
+                        # finalised by ``save_episode()`` is never re-added to
+                        # the next episode.
                        if self._save_requested.is_set():
                            self._save_requested.clear()
                            if not self._recording_live.is_set():
@@ -164,15 +168,13 @@ class HighlightStrategy(RolloutStrategy):
                                self._recording_live.set()
                            else:
                                dataset.add_frame(frame)
-                                with self._episode_lock:
-                                    dataset.save_episode()
+                                dataset.save_episode()
                                logger.info("Episode saved (total: %d)", dataset.num_episodes)
                                log_say(
                                    f"Episode {dataset.num_episodes} saved",
                                    play_sounds,
                                )
                                self._recording_live.clear()
-                                continue  # frame already consumed — skip ring.append

                        if self._push_requested.is_set():
                            self._push_requested.clear()
@@ -187,16 +189,12 @@ class HighlightStrategy(RolloutStrategy):
                    dt = time.perf_counter() - loop_start
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)
-                    else:
-                        logger.warning(
-                            f"Record loop is running slower ({1 / dt:.1f} Hz) than the target FPS ({cfg.fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation"
-                        )

            finally:
                logger.info("Highlight control loop ended")
                if self._recording_live.is_set():
                    logger.info("Saving in-progress live episode")
-                    with contextlib.suppress(Exception), self._episode_lock:
+                    with contextlib.suppress(Exception):
                        dataset.save_episode()

    def teardown(self, ctx: RolloutContext) -> None:
@@ -227,10 +225,7 @@ class HighlightStrategy(RolloutStrategy):
                    logger.info("Dataset uploaded to hub")
                    log_say("Dataset uploaded to hub", play_sounds)

-        self._teardown_hardware(
-            ctx.hardware,
-            return_to_initial_position=ctx.runtime.cfg.return_to_initial_position,
-        )
+        self._teardown_hardware(ctx.hardware)
        logger.info("Highlight strategy teardown complete")

    def _setup_keyboard(self, shutdown_event: ThreadingEvent) -> None:
@@ -269,13 +264,12 @@ class HighlightStrategy(RolloutStrategy):

        def _push():
            try:
-                with self._episode_lock:
-                    if safe_push_to_hub(
-                        dataset,
-                        tags=cfg.dataset.tags if cfg.dataset else None,
-                        private=cfg.dataset.private if cfg.dataset else False,
-                    ):
-                        logger.info("Background push to hub complete")
+                if safe_push_to_hub(
+                    dataset,
+                    tags=cfg.dataset.tags if cfg.dataset else None,
+                    private=cfg.dataset.private if cfg.dataset else False,
+                ):
+                    logger.info("Background push to hub complete")
            except Exception as e:
                logger.error("Background push failed: %s", e)

@@ -111,7 +111,8 @@ class SentryStrategy(RolloutStrategy):
                        break

                    obs = robot.get_observation()
-                    obs_processed = self._process_observation_and_notify(ctx.processors, obs)
+                    obs_processed = ctx.processors.robot_observation_processor(obs)
+                    engine.notify_observation(obs_processed)

                    if self._handle_warmup(cfg.use_torch_compile, loop_start, control_interval):
                        continue
@@ -159,10 +160,6 @@ class SentryStrategy(RolloutStrategy):
                    dt = time.perf_counter() - loop_start
                    if (sleep_t := control_interval - dt) > 0:
                        precise_sleep(sleep_t)
-                    else:
-                        logger.warning(
-                            f"Record loop is running slower ({1 / dt:.1f} Hz) than the target FPS ({cfg.fps} Hz). Dataset frames might be dropped and robot control might be unstable. Common causes are: 1) Camera FPS not keeping up 2) Policy inference taking too long 3) CPU starvation"
-                        )

            finally:
                logger.info("Sentry control loop ended — saving final episode")
@@ -196,10 +193,7 @@ class SentryStrategy(RolloutStrategy):
                    logger.info("Dataset uploaded to hub")
                    log_say("Dataset uploaded to hub", play_sounds)

-        self._teardown_hardware(
-            ctx.hardware,
-            return_to_initial_position=ctx.runtime.cfg.return_to_initial_position,
-        )
+        self._teardown_hardware(ctx.hardware)
        logger.info("Sentry strategy teardown complete")

    def _background_push(self, dataset, cfg) -> None:
@@ -70,7 +70,6 @@ from lerobot.datasets.io_utils import (
    get_parquet_file_size_in_mb,
    get_parquet_num_frames,
    load_info,
-    load_json,
    write_episodes,
    write_info,
    write_stats,
@@ -82,11 +81,9 @@ from lerobot.datasets.utils import (
    DEFAULT_DATA_PATH,
    DEFAULT_VIDEO_FILE_SIZE_IN_MB,
    DEFAULT_VIDEO_PATH,
-    INFO_PATH,
    LEGACY_EPISODES_PATH,
    LEGACY_EPISODES_STATS_PATH,
    LEGACY_TASKS_PATH,
-    DatasetInfo,
    update_chunk_file_indices,
 )
 from lerobot.datasets.video_utils import concatenate_video_files, get_video_duration_in_s
@@ -168,7 +165,7 @@ def legacy_load_tasks(local_dir: Path) -> tuple[dict, dict]:
 def validate_local_dataset_version(local_path: Path) -> None:
    """Validate that the local dataset has the expected v2.1 version."""
    info = load_info(local_path)
-    dataset_version = info.codebase_version or "unknown"
+    dataset_version = info.get("codebase_version", "unknown")
    if dataset_version != V21:
        raise ValueError(
            f"Local dataset has codebase version '{dataset_version}', expected '{V21}'. "
@@ -259,14 +256,14 @@ def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):

 def get_video_keys(root):
    info = load_info(root)
-    features = info.features
+    features = info["features"]
    video_keys = [key for key, ft in features.items() if ft["dtype"] == "video"]
    return video_keys


 def get_image_keys(root):
    info = load_info(root)
-    features = info.features
+    features = info["features"]
    image_keys = [key for key, ft in features.items() if ft["dtype"] == "image"]
    return image_keys

@@ -437,8 +434,7 @@ def convert_episodes_metadata(root, new_root, episodes_metadata, episodes_video_


 def convert_info(root, new_root, data_file_size_in_mb, video_file_size_in_mb):
-    # Load as raw dict to remove legacy v2.1 fields before constructing DatasetInfo.
-    info = load_json(root / INFO_PATH)
+    info = load_info(root)
    info["codebase_version"] = V30
    del info["total_chunks"]
    del info["total_videos"]
@@ -453,9 +449,7 @@ def convert_info(root, new_root, data_file_size_in_mb, video_file_size_in_mb):
            # already has fps in video_info
            continue
        info["features"][key]["fps"] = info["fps"]
-    # Convert raw dict to typed DatasetInfo before writing
-    dataset_info = DatasetInfo.from_dict(info)
-    write_info(dataset_info, new_root)
+    write_info(info, new_root)


 def convert_dataset(
@@ -150,24 +150,11 @@ Show dataset information without feature details:
        --operation.type info \
        --operation.show_features false

-Recompute dataset statistics (saves to lerobot/pusht_recomputed_stats by default):
+Recompute dataset statistics:
    lerobot-edit-dataset \
        --repo_id lerobot/pusht \
        --operation.type recompute_stats

-Recompute stats and save to a specific new repo_id:
-    lerobot-edit-dataset \
-        --repo_id lerobot/pusht \
-        --new_repo_id lerobot/pusht_new_stats \
-        --operation.type recompute_stats
-
-Recompute stats in-place (overwrites original dataset stats):
-    lerobot-edit-dataset \
-        --repo_id lerobot/pusht \
-        --new_repo_id lerobot/pusht \
-        --operation.type recompute_stats \
-        --operation.overwrite true
-
 Recompute stats for relative actions and push to hub:
    lerobot-edit-dataset \
        --repo_id lerobot/pusht \
@@ -269,7 +256,6 @@ class RecomputeStatsConfig(OperationConfig):
    relative_exclude_joints: list[str] | None = None
    chunk_size: int = 50
    num_workers: int = 0
-    overwrite: bool = False


@OperationConfig.register_subclass("info")
@@ -294,30 +280,16 @@ class EditDatasetConfig:
    push_to_hub: bool = False


-def _resolve_io_paths(
-    repo_id: str,
-    new_repo_id: str | None,
-    root: Path | str | None,
-    new_root: Path | str | None,
-    default_new_repo_id: str | None = None,
-) -> tuple[str, Path, Path]:
-    """Resolve input/output paths and repo_id for dataset operations.
-
-    Returns (output_repo_id, input_path, output_path) with resolved (symlink-safe) paths.
-    """
-    input_path = (Path(root) if root else HF_LEROBOT_HOME / repo_id).resolve()
-    output_repo_id = new_repo_id or default_new_repo_id or repo_id
-    output_path = (Path(new_root) if new_root else HF_LEROBOT_HOME / output_repo_id).resolve()
-    return output_repo_id, input_path, output_path
-
-
 def get_output_path(
    repo_id: str,
    new_repo_id: str | None,
    root: Path | str | None,
    new_root: Path | str | None,
 ) -> tuple[str, Path]:
-    output_repo_id, input_path, output_path = _resolve_io_paths(repo_id, new_repo_id, root, new_root)
+    input_path = Path(root) if root else HF_LEROBOT_HOME / repo_id
+
+    output_repo_id = new_repo_id if new_repo_id else repo_id
+    output_path = Path(new_root) if new_root else HF_LEROBOT_HOME / output_repo_id

    # In case of in-place modification, create a backup of the original dataset (if it exists)
    if output_path == input_path:
@@ -585,39 +557,7 @@ def handle_recompute_stats(cfg: EditDatasetConfig) -> None:
    if not isinstance(cfg.operation, RecomputeStatsConfig):
        raise ValueError("Operation config must be RecomputeStatsConfig")

-    # Determine whether this is an in-place operation
-    output_repo_id, input_root, output_root = _resolve_io_paths(
-        cfg.repo_id,
-        cfg.new_repo_id,
-        cfg.root,
-        cfg.new_root,
-        default_new_repo_id=f"{cfg.repo_id}_recomputed_stats",
-    )
-    in_place = output_root == input_root
-
-    if in_place and not cfg.operation.overwrite:
-        raise ValueError(
-            f"recompute_stats would overwrite the dataset in-place at {input_root}. "
-            "Pass --operation.overwrite true to allow in-place modification, "
-            "or use --new_repo_id / --new_root to write to a different location. "
-            f"Default output repo_id when neither is set: '{cfg.repo_id}_recomputed_stats'."
-        )
-
-    if in_place:
-        logging.warning(
-            f"Overwriting dataset stats in-place at {input_root}. The original stats will be lost."
-        )
-        dataset = LeRobotDataset(cfg.repo_id, root=input_root)
-    else:
-        logging.info(f"Copying dataset from {input_root} to {output_root}")
-        if output_root.exists():
-            backup_path = output_root.with_name(output_root.name + "_old")
-            logging.warning(f"Output directory {output_root} already exists. Moving to {backup_path}")
-            if backup_path.exists():
-                shutil.rmtree(backup_path)
-            shutil.move(output_root, backup_path)
-        shutil.copytree(input_root, output_root)
-        dataset = LeRobotDataset(output_repo_id, root=output_root)
+    dataset = LeRobotDataset(cfg.repo_id, root=cfg.root)

    logging.info(f"Recomputing stats for {cfg.repo_id}")
    if cfg.operation.relative_action:
@@ -638,7 +578,7 @@ def handle_recompute_stats(cfg: EditDatasetConfig) -> None:
    logging.info(f"Stats written to {dataset.root}")

    if cfg.push_to_hub:
-        logging.info(f"Pushing to hub as {dataset.repo_id}...")
+        logging.info(f"Pushing to hub as {dataset.meta.repo_id}...")
        dataset.push_to_hub()


@@ -389,13 +389,11 @@ def record(
            sanity_check_dataset_robot_compatibility(dataset, robot, cfg.dataset.fps, dataset_features)
        else:
            # Reject eval_ prefix — for policy evaluation use lerobot-rollout
-            repo_name = cfg.dataset.repo_id.split("/", 1)[-1]
-            if repo_name.startswith("eval_"):
+            if cfg.dataset.repo_id.startswith("eval_"):
                raise ValueError(
                    "Dataset names starting with 'eval_' are reserved for policy evaluation. "
                    "lerobot-record is for data collection only. Use lerobot-rollout for policy deployment."
                )
-            cfg.dataset.stamp_repo_id()
            dataset = LeRobotDataset.create(
                cfg.dataset.repo_id,
                cfg.dataset.fps,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Steven Palma	8e21268c29	test: add dataset guard + fix imports	2026-04-20 00:36:02 +02:00
Steven Palma	4130d4a4a5	update docs + docstrings + examples + add minimal test	2026-04-19 23:53:53 +02:00
Steven Palma	47bb840a55	add context guards	2026-04-19 23:21:14 +02:00
Steven Palma	9519ff5e09	Merge branch 'main' into feat/decouple_record_script Signed-off-by: Steven Palma <imstevenpmwork@ieee.org>	2026-04-19 22:48:08 +02:00
Steven Palma	32a27cae8a	filesize default change + more logs + filesize mb based episode + go back to init pos + rerun log + date end of repo_id	2026-04-19 16:50:19 +02:00
Steven Palma	8cee56e2d6	fix pre-commit	2026-04-17 16:46:58 +02:00
Steven Palma	a76874f35e	test dagger	2026-04-17 16:46:38 +02:00
Steven Palma	35bb2c7459	simplify dagger	2026-04-17 15:55:03 +02:00
Steven Palma	051f6c6803	Merge branch 'main' into feat/decouple_record_script	2026-04-17 14:25:18 +02:00
Steven Palma	04ae0312a2	HW tests fixes	2026-04-16 17:29:22 +02:00
Steven Palma	cc634de9e7	add docstrings	2026-04-16 16:40:33 +02:00
Steven Palma	3eda5712d3	some more iterations	2026-04-16 15:52:23 +02:00
Steven Palma	783ec6e232	minor improvements	2026-04-16 14:34:22 +02:00
Steven Palma	4e3175ff15	address review	2026-04-15 19:31:53 +02:00
Steven Palma	edd7fc52a8	feat: introduce inference engine strategy	2026-04-15 17:51:44 +02:00
Steven Palma	0f0f8b8961	imports and comments	2026-04-15 16:28:56 +02:00
Steven Palma	79db54dc34	Merge branch 'main' into feat/decouple_record_script	2026-04-15 11:06:45 +02:00
Steven Palma	6ae07878f7	Merge branch 'main' into feat/decouple_record_script	2026-04-14 22:54:29 +02:00
Steven Palma	10d05e03bc	Merge branch 'main' into feat/decouple_record_script	2026-04-14 21:35:26 +02:00
Steven Palma	f2c29d78cf	more improvements and fixes	2026-04-14 17:51:03 +02:00
Steven Palma	8bc47e4318	target review	2026-04-14 17:14:09 +02:00
Steven Palma	49f32b9796	some more iterations	2026-04-14 16:34:52 +02:00
Steven Palma	f55782f9f7	pre-commit run	2026-04-14 15:42:19 +02:00
Steven Palma	05a2604d6e	first iteration	2026-04-14 15:42:04 +02:00
				`@@ -0,0 +1 @@`
				`../../../../docs/source/policy_sarm_README.md`