remove timm dep

This commit is contained in:
Jade Choghari
2025-11-27 13:38:12 +01:00
parent ca4b3d035b
commit 0b326053e9
3 changed files with 38 additions and 3 deletions
+2 -2
View File
@@ -129,7 +129,7 @@ groot = [
"ninja>=1.11.1,<2.0.0",
"flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
]
xlva = ["lerobot[transformers-dep]", "timm>=1.0.0,<1.1.0"]
xlva = ["lerobot[transformers-dep]"]
hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
# Features
@@ -158,7 +158,7 @@ all = [
"lerobot[pi]",
"lerobot[smolvla]",
# "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
# "lerobot[xvla]",
"lerobot[xvla]",
"lerobot[hilserl]",
"lerobot[async]",
"lerobot[dev]",
@@ -23,7 +23,6 @@ import torch.nn.functional as functional
import torch.utils.checkpoint
import torch.utils.checkpoint as checkpoint
from einops import rearrange
from timm.layers import DropPath
from torch import nn
from torch.nn import CrossEntropyLoss
from transformers.activations import ACT2FN
@@ -52,6 +51,7 @@ from transformers.utils import (
)
from .configuration_florence2 import Florence2Config, Florence2LanguageConfig, Florence2VisionConfig
from .utils import drop_path
if is_flash_attn_2_available():
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
@@ -61,6 +61,21 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "Florence2Config"
class DropPath(nn.Module):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
super().__init__()
self.drop_prob = drop_prob
self.scale_by_keep = scale_by_keep
def forward(self, x):
return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
def extra_repr(self):
return f"drop_prob={round(self.drop_prob, 3):0.3f}"
class LearnedAbsolutePositionEmbedding2D(nn.Module):
"""
This module learns positional embeddings up to a fixed maximum size.
+20
View File
@@ -116,3 +116,23 @@ def mat_to_rotate6d(abs_action):
return np.concatenate([abs_action[:, :3, 0], abs_action[:, :3, 1]], axis=-1)
else:
raise NotImplementedError
def drop_path(x, drop_prob: float = 0.0, training: bool = False, scale_by_keep: bool = True):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if drop_prob == 0.0 or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
if keep_prob > 0.0 and scale_by_keep:
random_tensor.div_(keep_prob)
return x * random_tensor