From 6260ffa159bb9edc40059deffb50246fbac45f1c Mon Sep 17 00:00:00 2001
From: Maxime Ellerbach <maxime.ellerbach@huggingface.co>
Date: Fri, 27 Mar 2026 16:13:14 +0100
Subject: [PATCH] Update src/lerobot/policies/xvla/modeling_florence2.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Maxime Ellerbach <maxime@ellerbach.net>
---
 src/lerobot/policies/xvla/modeling_florence2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py
index 1cdeed781..81f9c8234 100644
--- a/src/lerobot/policies/xvla/modeling_florence2.py
+++ b/src/lerobot/policies/xvla/modeling_florence2.py
@@ -909,7 +909,7 @@ class Florence2FlashAttention2(Florence2Attention):
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
         # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.10")
+        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0")
 
     def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
         return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)