From 0b326053e927c98731dbf7b5c01d16251343e27c Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Thu, 27 Nov 2025 13:38:12 +0100 Subject: [PATCH] remove timm dep --- pyproject.toml | 4 ++-- .../policies/xvla/modeling_florence2.py | 17 +++++++++++++++- src/lerobot/policies/xvla/utils.py | 20 +++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 72e0b50a2..c71bc45fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,7 @@ groot = [ "ninja>=1.11.1,<2.0.0", "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'" ] -xlva = ["lerobot[transformers-dep]", "timm>=1.0.0,<1.1.0"] +xlva = ["lerobot[transformers-dep]"] hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"] # Features @@ -158,7 +158,7 @@ all = [ "lerobot[pi]", "lerobot[smolvla]", # "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn - # "lerobot[xvla]", + "lerobot[xvla]", "lerobot[hilserl]", "lerobot[async]", "lerobot[dev]", diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py index e65e15967..49a5e9c84 100644 --- a/src/lerobot/policies/xvla/modeling_florence2.py +++ b/src/lerobot/policies/xvla/modeling_florence2.py @@ -23,7 +23,6 @@ import torch.nn.functional as functional import torch.utils.checkpoint import torch.utils.checkpoint as checkpoint from einops import rearrange -from timm.layers import DropPath from torch import nn from torch.nn import CrossEntropyLoss from transformers.activations import ACT2FN @@ -52,6 +51,7 @@ from transformers.utils import ( ) from .configuration_florence2 import Florence2Config, Florence2LanguageConfig, Florence2VisionConfig +from .utils import drop_path if is_flash_attn_2_available(): from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa @@ -61,6 +61,21 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "Florence2Config" +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + super().__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) + + def extra_repr(self): + return f"drop_prob={round(self.drop_prob, 3):0.3f}" + + class LearnedAbsolutePositionEmbedding2D(nn.Module): """ This module learns positional embeddings up to a fixed maximum size. diff --git a/src/lerobot/policies/xvla/utils.py b/src/lerobot/policies/xvla/utils.py index 73793981e..bf31ffd82 100644 --- a/src/lerobot/policies/xvla/utils.py +++ b/src/lerobot/policies/xvla/utils.py @@ -116,3 +116,23 @@ def mat_to_rotate6d(abs_action): return np.concatenate([abs_action[:, :3, 0], abs_action[:, :3, 1]], axis=-1) else: raise NotImplementedError + + +def drop_path(x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor