From 84bb0336311f5d619dae7e8e9a688ad63abf2679 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 17 Apr 2026 13:40:01 +0100 Subject: [PATCH] ci(robotwin): smoke-eval 10 tasks instead of 5 Broader coverage on the RoboTwin 2.0 benchmark CI job: bump the smoke eval from 5 tasks to 10 (one episode each). Added tasks are all drawn from ROBOTWIN_TASKS and mirror the shape/complexity of the existing set (simple single-object or single-fixture manipulations). Tasks now run: beat_block_hammer, click_bell, handover_block, open_laptop, stack_blocks_two, click_alarmclock, close_laptop, close_microwave, open_microwave, place_block. `parse_eval_metrics.py` reads `overall` for multi-task runs so no parser change is needed. Bumped the step name and the metrics label to reflect the 10-task layout. Made-with: Cursor --- .github/workflows/benchmark_tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmark_tests.yml b/.github/workflows/benchmark_tests.yml index 37d6e3fd2..523a31175 100644 --- a/.github/workflows/benchmark_tests.yml +++ b/.github/workflows/benchmark_tests.yml @@ -354,7 +354,7 @@ jobs: cache-from: type=local,src=/tmp/.buildx-cache-robotwin cache-to: type=local,dest=/tmp/.buildx-cache-robotwin,mode=max - - name: Run RoboTwin 2.0 smoke eval (1 episode) + - name: Run RoboTwin 2.0 smoke eval (10 tasks, 1 episode each) if: env.HF_USER_TOKEN != '' run: | # Named container (no --rm) so we can docker cp artifacts out. @@ -368,7 +368,7 @@ jobs: cd /opt/robotwin && lerobot-eval \ --policy.path=lerobot/smolvla_robotwin \ --env.type=robotwin \ - --env.task=beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two \ + --env.task=beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two,click_alarmclock,close_laptop,close_microwave,open_microwave,place_block \ --eval.batch_size=1 \ --eval.n_episodes=1 \ --eval.use_async_envs=false \ @@ -377,7 +377,7 @@ jobs: --output_dir=/tmp/eval-artifacts python /lerobot/scripts/ci/extract_task_descriptions.py \ --env robotwin \ - --task beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two \ + --task beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two,click_alarmclock,close_laptop,close_microwave,open_microwave,place_block \ --output /tmp/eval-artifacts/task_descriptions.json " @@ -394,7 +394,7 @@ jobs: python3 scripts/ci/parse_eval_metrics.py \ --artifacts-dir /tmp/robotwin-artifacts \ --env robotwin \ - --task beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two \ + --task beat_block_hammer,click_bell,handover_block,open_laptop,stack_blocks_two,click_alarmclock,close_laptop,close_microwave,open_microwave,place_block \ --policy lerobot/smolvla_robotwin - name: Upload RoboTwin rollout video