From 10fa65a996f982083070fc51879cbb3279221e96 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Tue, 28 Apr 2026 12:05:00 +0200 Subject: [PATCH] fix(annotate): LEROBOT_DISABLE_CUDNN escape hatch for conv3d crash cuDNN 9.x + torch 2.8 has a regression where the conv3d kernel used in Qwen-VL vision tower patch embedders fails with CUDNN_STATUS_NOT_INITIALIZED. The crash is independent of model size and reproduces on both Qwen2.5-VL and Qwen3-VL because both use 3D conv for video patch embedding. Setting LEROBOT_DISABLE_CUDNN=1 falls back to native PyTorch conv3d kernels (slower but functional) so the pipeline can run while the torch/cuDNN stack is still on the broken combo. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../annotations/steerable_pipeline/vlm_client.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/lerobot/annotations/steerable_pipeline/vlm_client.py b/src/lerobot/annotations/steerable_pipeline/vlm_client.py index 9ac40ec4f..2406a2a2b 100644 --- a/src/lerobot/annotations/steerable_pipeline/vlm_client.py +++ b/src/lerobot/annotations/steerable_pipeline/vlm_client.py @@ -148,6 +148,16 @@ def _make_vllm_client(config: VlmConfig) -> VlmClient: raise ImportError( "vllm is required for backend='vllm'. Install with `pip install lerobot[annotations]`." ) from exc + # Workaround for cuDNN 9.x + torch 2.8 conv3d regression that surfaces + # as CUDNN_STATUS_NOT_INITIALIZED in Qwen-VL vision-tower patch + # embedders. Setting LEROBOT_DISABLE_CUDNN=1 forces native PyTorch + # convolution kernels — slower but functional. + import os as _os # noqa: PLC0415 + + if _os.environ.get("LEROBOT_DISABLE_CUDNN", "").lower() in {"1", "true", "yes"}: + import torch as _torch # noqa: PLC0415 + + _torch.backends.cudnn.enabled = False llm_kwargs: dict[str, Any] = { "model": config.model_id, "tensor_parallel_size": config.tensor_parallel_size,