mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 20:19:43 +00:00
xvla log fix
This commit is contained in:
@@ -13,12 +13,9 @@
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from transformers.configuration_utils import PretrainedConfig
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
from transformers.utils import logging
|
|
||||||
|
|
||||||
""" Florence-2 configuration"""
|
""" Florence-2 configuration"""
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Florence2VisionConfig(PretrainedConfig):
|
class Florence2VisionConfig(PretrainedConfig):
|
||||||
r"""
|
r"""
|
||||||
|
|||||||
@@ -46,7 +46,6 @@ from transformers.utils import (
|
|||||||
add_start_docstrings_to_model_forward,
|
add_start_docstrings_to_model_forward,
|
||||||
is_flash_attn_2_available,
|
is_flash_attn_2_available,
|
||||||
is_flash_attn_greater_or_equal_2_10,
|
is_flash_attn_greater_or_equal_2_10,
|
||||||
logging,
|
|
||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -57,8 +56,6 @@ if is_flash_attn_2_available():
|
|||||||
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
||||||
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
_CONFIG_FOR_DOC = "Florence2Config"
|
_CONFIG_FOR_DOC = "Florence2Config"
|
||||||
|
|
||||||
|
|
||||||
@@ -992,12 +989,6 @@ class Florence2FlashAttention2(Florence2Attention):
|
|||||||
else:
|
else:
|
||||||
target_dtype = self.q_proj.weight.dtype
|
target_dtype = self.q_proj.weight.dtype
|
||||||
|
|
||||||
logger.warning_once(
|
|
||||||
f"The input hidden states seems to be silently casted in float32, this might be related to"
|
|
||||||
f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
|
|
||||||
f" {target_dtype}."
|
|
||||||
)
|
|
||||||
|
|
||||||
query_states = query_states.to(target_dtype)
|
query_states = query_states.to(target_dtype)
|
||||||
key_states = key_states.to(target_dtype)
|
key_states = key_states.to(target_dtype)
|
||||||
value_states = value_states.to(target_dtype)
|
value_states = value_states.to(target_dtype)
|
||||||
@@ -1135,11 +1126,6 @@ class Florence2SdpaAttention(Florence2Attention):
|
|||||||
) -> tuple[torch.Tensor, torch.Tensor | None, tuple[torch.Tensor] | None]:
|
) -> tuple[torch.Tensor, torch.Tensor | None, tuple[torch.Tensor] | None]:
|
||||||
"""Input shape: Batch x Time x Channel"""
|
"""Input shape: Batch x Time x Channel"""
|
||||||
if output_attentions or layer_head_mask is not None:
|
if output_attentions or layer_head_mask is not None:
|
||||||
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.
|
|
||||||
logger.warning_once(
|
|
||||||
"Florence2Model is using Florence2SdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention"
|
|
||||||
' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
|
|
||||||
)
|
|
||||||
return super().forward(
|
return super().forward(
|
||||||
hidden_states,
|
hidden_states,
|
||||||
key_value_states=key_value_states,
|
key_value_states=key_value_states,
|
||||||
@@ -1860,9 +1846,6 @@ class Florence2Decoder(Florence2LanguagePreTrainedModel):
|
|||||||
hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
|
hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
|
||||||
|
|
||||||
if self.gradient_checkpointing and self.training and use_cache:
|
if self.gradient_checkpointing and self.training and use_cache:
|
||||||
logger.warning_once(
|
|
||||||
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
|
|
||||||
)
|
|
||||||
use_cache = False
|
use_cache = False
|
||||||
|
|
||||||
# decoder layers
|
# decoder layers
|
||||||
@@ -2160,8 +2143,6 @@ class Florence2LanguageForConditionalGeneration(Florence2LanguagePreTrainedModel
|
|||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
if use_cache:
|
|
||||||
logger.warning("The `use_cache` argument is changed to `False` since `labels` is provided.")
|
|
||||||
use_cache = False
|
use_cache = False
|
||||||
if decoder_input_ids is None and decoder_inputs_embeds is None:
|
if decoder_input_ids is None and decoder_inputs_embeds is None:
|
||||||
decoder_input_ids = shift_tokens_right(
|
decoder_input_ids = shift_tokens_right(
|
||||||
|
|||||||
Reference in New Issue
Block a user