remove timm dep

2026-07-24 18:26:11 +00:00 · 2025-11-27 13:38:12 +01:00
parent ca4b3d035b
commit 0b326053e9
3 changed files with 38 additions and 3 deletions
@@ -129,7 +129,7 @@ groot = [
    "ninja>=1.11.1,<2.0.0",
    "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
 ]
-xlva = ["lerobot[transformers-dep]", "timm>=1.0.0,<1.1.0"]
+xlva = ["lerobot[transformers-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
 # Features
@@ -158,7 +158,7 @@ all = [
    "lerobot[pi]",
    "lerobot[smolvla]",
    # "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
-    # "lerobot[xvla]",
+    "lerobot[xvla]",
    "lerobot[hilserl]",
    "lerobot[async]",
    "lerobot[dev]",
@@ -23,7 +23,6 @@ import torch.nn.functional as functional
 import torch.utils.checkpoint
 import torch.utils.checkpoint as checkpoint
 from einops import rearrange
 from timm.layers import DropPath
 from torch import nn
 from torch.nn import CrossEntropyLoss
 from transformers.activations import ACT2FN
@@ -52,6 +51,7 @@ from transformers.utils import (
 )
 from .configuration_florence2 import Florence2Config, Florence2LanguageConfig, Florence2VisionConfig
 from .utils import drop_path
 if is_flash_attn_2_available():
    from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
@@ -61,6 +61,21 @@ logger = logging.get_logger(__name__)
 _CONFIG_FOR_DOC = "Florence2Config"
 class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
        super().__init__()
        self.drop_prob = drop_prob
        self.scale_by_keep = scale_by_keep
    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
    def extra_repr(self):
        return f"drop_prob={round(self.drop_prob, 3):0.3f}"
 class LearnedAbsolutePositionEmbedding2D(nn.Module):
    """
    This module learns positional embeddings up to a fixed maximum size.
@@ -116,3 +116,23 @@ def mat_to_rotate6d(abs_action):
        return np.concatenate([abs_action[:, :3, 0], abs_action[:, :3, 1]], axis=-1)
    else:
        raise NotImplementedError
 def drop_path(x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.
    """
    if drop_prob == 0.0 or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
    if keep_prob > 0.0 and scale_by_keep:
        random_tensor.div_(keep_prob)
    return x * random_tensor