fix(ci): run VLABench tasks 5-at-a-time in parallel

The eval script already supports running multiple tasks concurrently via a ThreadPoolExecutor (env.max_parallel_tasks). Set it to 5 so the 10 VLABench tasks finish in ~2 waves instead of running sequentially. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
fix(ci): cap VLABench smoke eval at 50 steps per task
2026-05-28 15:09:51 +00:00 · 2026-05-07 11:57:03 +02:00 · 2026-05-07 11:16:43 +02:00
15 changed files with 634 additions and 744 deletions
@@ -382,7 +382,6 @@ jobs:
                --policy.path=\"\$ROBOTWIN_POLICY\" \
                --env.type=robotwin \
                --env.task=\"\$ROBOTWIN_TASKS\" \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -483,7 +482,6 @@ jobs:
                --policy.path=lerobot/smolvla_robocasa \
                --env.type=robocasa \
                --env.task=CloseFridge,OpenCabinet,OpenDrawer,TurnOnMicrowave,TurnOffStove,CloseToasterOvenDoor,SlideDishwasherRack,TurnOnSinkFaucet,NavigateKitchen,TurnOnElectricKettle \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -695,7 +693,6 @@ jobs:
                --env.task=\"\$ROBOMME_TASKS\" \
                --env.dataset_split=test \
                --env.task_ids=[0] \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -803,7 +800,6 @@ jobs:
                --env.type=libero_plus \
                --env.task=\"\$LIBERO_PLUS_SUITE\" \
                --env.task_ids=\"\$LIBERO_PLUS_TASK_IDS\" \
-                --env.max_parallel_tasks=5 \
                --eval.batch_size=1 \
                --eval.n_episodes=1 \
                --eval.use_async_envs=false \
@@ -19,8 +19,8 @@ on:
  workflow_dispatch:

  # Runs at 02:00
-  # schedule:
-  #   - cron: "0 2 * * *"
+  schedule:
+    - cron: "0 2 * * *"

 env:
  CLOSE_ISSUE_MESSAGE: >
@@ -59,8 +59,8 @@ keywords = ["lerobot", "huggingface", "robotics",  "machine learning", "artifici

 dependencies = [
    # Core ML
-    "torch>=2.7,<2.13.0",
-    "torchvision>=0.22.0,<0.28.0",
+    "torch>=2.7,<2.11.0",
+    "torchvision>=0.22.0,<0.26.0",
    "numpy>=2.0.0,<2.3.0", # NOTE: Explicitly listing numpy helps the resolver converge faster. Upper bound imposed by opencv-python-headless.
    "opencv-python-headless>=4.9.0,<4.14.0",
    "Pillow>=10.0.0,<13.0.0",
@@ -99,7 +99,7 @@ dataset = [
    "pandas>=2.0.0,<3.0.0", # NOTE: Transitive dependency of datasets
    "pyarrow>=21.0.0,<30.0.0", # NOTE: Transitive dependency of datasets
    "lerobot[av-dep]",
-    "torchcodec>=0.3.0,<0.13.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10), 0.11 needs torch==2.11, 0.12 needs torch==2.12.
+    "torchcodec>=0.3.0,<0.11.0; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')) and (sys_platform != 'darwin' or platform_machine != 'x86_64')", # NOTE: Windows support starts at version 0.7 (needs torch==2.8), ffmpeg>=8 support starts at version 0.8.1 (needs torch==2.9), system-wide ffmpeg support starts at version 0.10 (needs torch==2.10).
    "jsonlines>=4.0.0,<5.0.0",
 ]
 training = [
@@ -256,9 +256,7 @@ class TrainPipelineConfig(HubMixin):
                ) from e

        cli_args = kwargs.pop("cli_args", [])
-        # Legacy RA-BC migration only applies to framework-saved checkpoints (always JSON).
-        # Hand-written YAML/TOML configs are expected to use the current sample_weighting schema.
-        if config_file is not None and config_file.endswith(".json"):
+        if config_file is not None:
            with open(config_file) as f:
                config = json.load(f)
            migrated_config = _migrate_legacy_rabc_fields(config)
@@ -282,11 +282,7 @@ class VideoDecoderCache:
        with self._lock:
            if video_path not in self._cache:
                file_handle = fsspec.open(video_path).__enter__()
-                try:
-                    decoder = VideoDecoder(file_handle, seek_mode="approximate")
-                except Exception:
-                    file_handle.close()
-                    raise
+                decoder = VideoDecoder(file_handle, seek_mode="approximate")
                self._cache[video_path] = (decoder, file_handle)

            return self._cache[video_path][0]
@@ -100,8 +100,8 @@ class DiffusionConfig(PreTrainedConfig):

    # Inputs / output structure.
    n_obs_steps: int = 2
-    horizon: int = 64
-    n_action_steps: int = 32
+    horizon: int = 16
+    n_action_steps: int = 8

    normalization_mapping: dict[str, NormalizationMode] = field(
        default_factory=lambda: {
@@ -122,10 +122,10 @@ class DiffusionConfig(PreTrainedConfig):
    crop_ratio: float = 1.0
    crop_shape: tuple[int, int] | None = None
    crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
-    use_group_norm: bool = False
+    pretrained_backbone_weights: str | None = None
+    use_group_norm: bool = True
    spatial_softmax_num_keypoints: int = 32
-    use_separate_rgb_encoder_per_camera: bool = True
+    use_separate_rgb_encoder_per_camera: bool = False
    # Unet.
    down_dims: tuple[int, ...] = (512, 1024, 2048)
    kernel_size: int = 5
@@ -97,8 +97,8 @@ class VQBeTConfig(PreTrainedConfig):
    vision_backbone: str = "resnet18"
    crop_shape: tuple[int, int] | None = (84, 84)
    crop_is_random: bool = True
-    pretrained_backbone_weights: str | None = "ResNet18_Weights.IMAGENET1K_V1"
-    use_group_norm: bool = False
+    pretrained_backbone_weights: str | None = None
+    use_group_norm: bool = True
    spatial_softmax_num_keypoints: int = 32
    # VQ-VAE
    n_vqvae_training_steps: int = 20000
@@ -939,7 +939,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
        input_dtype = query_states.dtype
        if input_dtype == torch.float32:
            if torch.is_autocast_enabled():
-                target_dtype = torch.get_autocast_dtype(query_states.device.type)
+                target_dtype = torch.get_autocast_gpu_dtype()
            # Handle the case where the model is quantized
            elif hasattr(self.config, "_pre_quantization_dtype"):
                target_dtype = self.config._pre_quantization_dtype
@@ -985,7 +985,7 @@ class Florence2FlashAttention2(Florence2Attention):
        input_dtype = query_states.dtype
        if input_dtype == torch.float32:
            if torch.is_autocast_enabled():
-                target_dtype = torch.get_autocast_dtype(query_states.device.type)
+                target_dtype = torch.get_autocast_gpu_dtype()
            # Handle the case where the model is quantized
            elif hasattr(self.config, "_pre_quantization_dtype"):
                target_dtype = self.config._pre_quantization_dtype
@@ -46,7 +46,7 @@ class LeKiwiConfig(RobotConfig):
    cameras: dict[str, CameraConfig] = field(default_factory=lekiwi_cameras_config)

    # Set to `True` for backward compatibility with previous policies/dataset
-    use_degrees: bool = True
+    use_degrees: bool = False


@dataclass
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51effd76b73e972f10d31f5084ab906386134b600c87b2668767d30232a902bd
+oid sha256:54aecbc1af72a4cd5e9261492f5e7601890517516257aacdf2a0ffb3ce281f1b
 size 992
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4d7a16ca67f9adefac0e0620a7b2e9c822f2db42faaaced7a89fbad60e5ead4
-size 47680
+oid sha256:88a9c3775a2aa1e90a08850521970070a4fcf0f6b82aab43cd8ccc5cf77e0013
+size 47424
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:796c439ee8a64bf9901ff8325e7419bda8bd316360ee95e6304e8e1ae0f4c36c
+oid sha256:91a2635e05a75fe187a5081504c5f35ce3417378813fa2deaf9ca4e8200e1819
 size 68
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad33a8b47c39c2e1374567ff9da43cdb95e2dbe904c1b02a35051346d3043095
-size 47680
+oid sha256:645bff922ac7bea63ad018ebf77c303c0e4cd2c1c0dc5ef3192865281bef3dc6
+size 47424