diff --git a/eval.sh b/eval.sh
index 45acb100c..37b67ad3b 100644
--- a/eval.sh
+++ b/eval.sh
@@ -6,4 +6,3 @@ lerobot-eval \
   --eval.batch_size=1 \
   --eval.n_episodes=1 \
   --seed=142
-
diff --git a/examples/tutorial/xvla/inference.py b/examples/tutorial/xvla/inference.py
index 00511d72c..8f59adaeb 100644
--- a/examples/tutorial/xvla/inference.py
+++ b/examples/tutorial/xvla/inference.py
@@ -1,15 +1,13 @@
-from transformers import AutoModel, AutoProcessor
-import json_numpy
 import numpy as np
 import torch
 from PIL import Image
+from transformers import AutoModel, AutoProcessor
 
-model = AutoModel.from_pretrained(
-    "2toINF/X-VLA-WidowX",
-    trust_remote_code=True
-)
+model = AutoModel.from_pretrained("2toINF/X-VLA-WidowX", trust_remote_code=True)
 
 processor = AutoProcessor.from_pretrained("2toINF/X-VLA-WidowX", trust_remote_code=True)
+
+
 # append 3 random image to a list
 def make_random_pil_images(num_images=3, H=480, W=640):
     images = []
@@ -20,6 +18,7 @@ def make_random_pil_images(num_images=3, H=480, W=640):
         images.append(img)
     return images
 
+
 # Example:
 images = make_random_pil_images()
 language_instruction = "This is a random image"
@@ -29,23 +28,27 @@ if not {"input_ids", "image_input", "image_mask"}.issubset(inputs):
     raise ValueError("Processor did not return the expected keys.")
 
 proprio = torch.randn(1, 20)
-domain_id = torch.tensor([int(0)], dtype=torch.long)
+domain_id = torch.tensor([0], dtype=torch.long)
 
 # Align to model's device/dtype
 device = model.device
 dtype = next(model.parameters()).dtype
 
+
 def to_model(t: torch.Tensor) -> torch.Tensor:
     if not isinstance(t, torch.Tensor):
         t = torch.as_tensor(t)
     # cast floats to model dtype, keep integral/bool as-is
     return t.to(device=device, dtype=dtype) if t.is_floating_point() else t.to(device=device)
 
+
 inputs = {k: to_model(v) for k, v in inputs.items()}
-inputs.update({
-    "proprio": to_model(proprio),
-    "domain_id": domain_id.to(device),
-})
+inputs.update(
+    {
+        "proprio": to_model(proprio),
+        "domain_id": domain_id.to(device),
+    }
+)
 
 # Inference
 
diff --git a/examples/tutorial/xvla/inference_pipe.py b/examples/tutorial/xvla/inference_pipe.py
index c60a4e305..93bff30ca 100644
--- a/examples/tutorial/xvla/inference_pipe.py
+++ b/examples/tutorial/xvla/inference_pipe.py
@@ -1,11 +1,12 @@
-from lerobot.policies.factory import make_policy, make_pre_post_processors
+import numpy as np
+import torch
+
 # from lerobot.policies.xvla.configuration_xvla import XVLAConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.envs.factory import make_env_config
-from lerobot.policies.xvla.utils import Rotate6D_to_AxisAngle
+from lerobot.policies.factory import make_policy, make_pre_post_processors
+from lerobot.policies.xvla.utils import rotate6d_to_axis_angle
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
-import torch
-import numpy as np
 
 observation_height: int = 360
 observation_width: int = 360
@@ -13,18 +14,22 @@ observation_width: int = 360
 OBS = {
     f"{OBS_IMAGES}.image1": torch.randn(1, 3, observation_height, observation_width),
     f"{OBS_IMAGES}.image2": torch.randn(1, 3, observation_height, observation_width),
-    OBS_STATE: torch.randn(1, 9),                  # ONLY if OBS_STATE is already a string
+    OBS_STATE: torch.randn(1, 9),  # ONLY if OBS_STATE is already a string
     "task": "put the object in the box",
 }
+
+
 def fake_rgb(H, W):
     img = torch.randint(0, 255, (H, W, 3), dtype=torch.uint8).numpy()
     return img
 
+
 OBS[f"{OBS_IMAGES}.image1"] = fake_rgb(observation_height, observation_width)
 OBS[f"{OBS_IMAGES}.image2"] = fake_rgb(observation_height, observation_width)
 
 # observation = preprocessor(OBS)
 from transformers import AutoProcessor
+
 processor = AutoProcessor.from_pretrained("2toINF/X-VLA-WidowX", num_views=2, trust_remote_code=True)
 inputs = processor([OBS[f"{OBS_IMAGES}.image1"], OBS[f"{OBS_IMAGES}.image2"]], OBS["task"])
 breakpoint()
@@ -40,19 +45,19 @@ policy = make_policy(
 policy.eval()
 
 preprocessor_overrides = {
-        "device_processor": {"device": str(cfg.device)},
+    "device_processor": {"device": str(cfg.device)},
 }
 
 preprocessor, postprocessor = make_pre_post_processors(
-        policy_cfg=cfg,
-        pretrained_path=cfg.pretrained_path,
-        preprocessor_overrides=preprocessor_overrides,
+    policy_cfg=cfg,
+    pretrained_path=cfg.pretrained_path,
+    preprocessor_overrides=preprocessor_overrides,
 )
 
 observation = preprocessor(OBS)
 action = policy.select_action(observation)
 
 target_eef = action[:, :3].to("cpu").numpy()
-target_axis = Rotate6D_to_AxisAngle(action[:, 3:9].to("cpu").numpy())
+target_axis = rotate6d_to_axis_angle(action[:, 3:9].to("cpu").numpy())
 target_act = action[:, 9:10].to("cpu").numpy()
 final_action = np.concatenate([target_eef, target_axis, target_act], axis=-1)
diff --git a/examples/tutorial/xvla/test_xvla.py b/examples/tutorial/xvla/test_xvla.py
index cc6e03df7..d001b9877 100644
--- a/examples/tutorial/xvla/test_xvla.py
+++ b/examples/tutorial/xvla/test_xvla.py
@@ -1,6 +1,8 @@
+import os
+
 from lerobot.datasets.lerobot_dataset import LeRobotDatasetMetadata
 from lerobot.policies.factory import make_policy, make_policy_config
-import os
+
 cfg = make_policy_config("xvla")
 
 dataset_id = "lerobot/svla_so101_pickplace"
@@ -16,7 +18,9 @@ for name, param in policy.state_dict().items():
 import safetensors.torch
 from huggingface_hub import snapshot_download
 
-cache_dir = snapshot_download(repo_id="2toINF/X-VLA-Libero", repo_type="model", cache_dir="/fsx/jade_choghari/.cache/huggingface/model")
+cache_dir = snapshot_download(
+    repo_id="2toINF/X-VLA-Libero", repo_type="model", cache_dir="/fsx/jade_choghari/.cache/huggingface/model"
+)
 state_dict = safetensors.torch.load_file(os.path.join(cache_dir, "model.safetensors"))
 # policy.load_state_dict(state_dict)
 # 3. Add "model." prefix to every key
@@ -36,34 +40,38 @@ print()
 print("unexpected keys:", unexpected)
 
 
+import random
+
+import numpy as np
+import torch
+from xvla.models.modeling_xvla import XVLA
 
-from lerobot.policies.factory import make_policy, make_pre_post_processors
 # from lerobot.policies.xvla.configuration_xvla import XVLAConfig
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.envs.factory import make_env_config
+from lerobot.policies.factory import make_policy, make_pre_post_processors
 from lerobot.utils.constants import OBS_IMAGES, OBS_STATE
-from xvla.models.modeling_xvla import XVLA
-import torch
-import numpy as np
-import random
+
 torch.manual_seed(42)
 random.seed(42)
 np.random.seed(42)
 observation_height: int = 224
-observation_width: int = 224 # todo: jadechoghari, image size is different for the two models
+observation_width: int = 224  # todo: jadechoghari, image size is different for the two models
 # create an observation dict
 OBS = {
     f"{OBS_IMAGES}.image": torch.randn(1, 3, observation_height, observation_width),
     f"{OBS_IMAGES}.image2": torch.randn(1, 3, observation_height, observation_width),
-    OBS_STATE: torch.randn(1, 20),                  # ONLY if OBS_STATE is already a string
+    OBS_STATE: torch.randn(1, 20),  # ONLY if OBS_STATE is already a string
     "task": "put the object in the box",
 }
 
-IMAGENET_MEAN = torch.tensor([0.485, 0.456, 0.406]).view(1,3,1,1)
-IMAGENET_STD  = torch.tensor([0.229, 0.224, 0.225]).view(1,3,1,1)
+IMAGENET_MEAN = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+IMAGENET_STD = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+
+
 def fake_rgb(H, W):
     arr = np.random.randint(0, 255, (H, W, 3), dtype=np.uint8)
-    t = torch.from_numpy(arr).permute(2, 0, 1)      # CHW
+    t = torch.from_numpy(arr).permute(2, 0, 1)  # CHW
     t = t.unsqueeze(0).float()
     # normalize pixel to imagenet
     return t
@@ -83,13 +91,13 @@ policy = make_policy(
 policy.eval()
 
 preprocessor_overrides = {
-        "device_processor": {"device": str(cfg.device)},
+    "device_processor": {"device": str(cfg.device)},
 }
 
 preprocessor, postprocessor = make_pre_post_processors(
-        policy_cfg=cfg,
-        pretrained_path=cfg.pretrained_path,
-        preprocessor_overrides=preprocessor_overrides,
+    policy_cfg=cfg,
+    pretrained_path=cfg.pretrained_path,
+    preprocessor_overrides=preprocessor_overrides,
 )
 
 observation = preprocessor(OBS)
@@ -101,14 +109,16 @@ from xvla.models.processing_xvla import XVLAProcessor
 
 processor = XVLAProcessor.from_pretrained("/raid/jade/models/xvla-libero", num_views=2)
 inputs_1 = processor([OBS[f"{OBS_IMAGES}.image"], OBS[f"{OBS_IMAGES}.image2"]], OBS["task"])
-domain_id = torch.tensor([int(3)], dtype=torch.long)
-inputs.update({
-    "proprio": OBS[OBS_STATE].to("cuda"),
-    "domain_id": domain_id.to("cuda"),
-})
+domain_id = torch.tensor([3], dtype=torch.long)
+inputs.update(
+    {
+        "proprio": OBS[OBS_STATE].to("cuda"),
+        "domain_id": domain_id.to("cuda"),
+    }
+)
 
 
-for k in inputs.keys() & inputs_1.keys():   # intersection of keys
+for k in inputs.keys() & inputs_1.keys():  # intersection of keys
     a = inputs[k]
     b = inputs_1[k].to("cuda")
 
@@ -139,22 +149,25 @@ action = model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().num
 # (Pdb) [0, 0, :, :4, 0]
 action_1 = policy.model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy()
 
-#np all close
+# np all close
 print(np.allclose(action, action_1, atol=1e-2, rtol=1e-2))
 print("max diff:", np.max(np.abs(action - action_1)))
 print("mean diff:", np.mean(np.abs(action - action_1)))
 
 
-from xvla.models.processor_xvla import XVLAProcessor
-from xvla.models.modeling_xvla import XVLA
-from xvla.models.configuration_xvla import XVLAConfig
-import torch
 import random
+
 import numpy as np
+import torch
 from PIL import Image
-from lerobot.policies.factory import make_policy
+from xvla.models.configuration_xvla import XVLAConfig
+from xvla.models.modeling_xvla import XVLA
+from xvla.models.processor_xvla import XVLAProcessor
+
 from lerobot.configs.policies import PreTrainedConfig
 from lerobot.envs.factory import make_env_config
+from lerobot.policies.factory import make_policy
+
 cfg = XVLAConfig.from_pretrained("/raid/jade/models/xvla-libero")
 model = XVLA.from_pretrained("/raid/jade/models/xvla-libero")
 model.eval()
@@ -166,6 +179,7 @@ torch.manual_seed(42)
 random.seed(42)
 np.random.seed(42)
 
+
 def make_random_pil_images(num_images=3, H=480, W=640):
     images = []
     for _ in range(num_images):
@@ -175,6 +189,7 @@ def make_random_pil_images(num_images=3, H=480, W=640):
         images.append(img)
     return images
 
+
 # Example:
 images = make_random_pil_images()
 language_instruction = "This is a random image"
@@ -184,23 +199,27 @@ if not {"input_ids", "image_input", "image_mask"}.issubset(inputs):
     raise ValueError("Processor did not return the expected keys.")
 
 proprio = torch.randn(1, 20)
-domain_id = torch.tensor([int(0)], dtype=torch.long)
+domain_id = torch.tensor([0], dtype=torch.long)
 
 # Align to model's device/dtype
 device = model.device
 dtype = next(model.parameters()).dtype
 
+
 def to_model(t: torch.Tensor) -> torch.Tensor:
     if not isinstance(t, torch.Tensor):
         t = torch.as_tensor(t)
     # cast floats to model dtype, keep integral/bool as-is
     return t.to(device=device, dtype=dtype) if t.is_floating_point() else t.to(device=device)
 
+
 inputs = {k: to_model(v) for k, v in inputs.items()}
-inputs.update({
-    "proprio": to_model(proprio),
-    "domain_id": domain_id.to(device),
-})
+inputs.update(
+    {
+        "proprio": to_model(proprio),
+        "domain_id": domain_id.to(device),
+    }
+)
 
 # Inference
 action = model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy()
diff --git a/src/lerobot/envs/libero.py b/src/lerobot/envs/libero.py
index f0ee22503..58f4e051a 100644
--- a/src/lerobot/envs/libero.py
+++ b/src/lerobot/envs/libero.py
@@ -29,7 +29,9 @@ from gymnasium import spaces
 from libero.libero import benchmark, get_libero_path
 from libero.libero.envs import OffScreenRenderEnv
 from robosuite.utils.transform_utils import quat2axisangle
-from lerobot.policies.xvla.utils import Mat_to_Rotate6D
+
+from lerobot.policies.xvla.utils import mat_to_rotate6d
+
 
 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
     """Normalize camera_name into a non-empty list of strings."""
@@ -215,7 +217,7 @@ class LiberoEnv(gym.Env):
             if camera_name == "agentview_image":
                 image = image[::-1, ::-1]  # rotate 180 degrees
             images[self.camera_name_mapping[camera_name]] = image
-        
+
         if self.action_type == "rel":
             state = np.concatenate(
                 (
@@ -227,13 +229,15 @@ class LiberoEnv(gym.Env):
         # TODO: jadechoghari, this is an ugly quick workaround for XVLA states.
         # we will open a new PR to handle this in a preprocessor.
         elif self.action_type == "abs":
-            robo_ori = Mat_to_Rotate6D(self._env.robots[0].controller.ee_ori_mat)
+            robo_ori = mat_to_rotate6d(self._env.robots[0].controller.ee_ori_mat)
             robo_pos = self._env.robots[0].controller.ee_pos
             proprio = np.concatenate([robo_pos, robo_ori, np.array([0.0])], axis=-1)
             state = np.concatenate([proprio, np.zeros_like(proprio)], axis=-1)
         else:
-            raise NotImplementedError(f"The action type '{self.action_type}' is not supported in LiberoEnv. "
-                                      "Please switch to an action type (e.g. 'rel', 'abs').")
+            raise NotImplementedError(
+                f"The action type '{self.action_type}' is not supported in LiberoEnv. "
+                "Please switch to an action type (e.g. 'rel', 'abs')."
+            )
         agent_pos = state
         if self.obs_type == "pixels":
             return {"pixels": images.copy()}
@@ -252,7 +256,7 @@ class LiberoEnv(gym.Env):
         self._env.seed(seed)
         if self.init_states and self._init_states is not None:
             self._env.set_init_state(self._init_states[self._init_state_id])
-        
+
         raw_obs = self._env.reset()
 
         # After reset, objects may be unstable (slightly floating, intersecting, etc.).
@@ -261,7 +265,7 @@ class LiberoEnv(gym.Env):
         for _ in range(self.num_steps_wait):
             action = np.array(get_libero_dummy_action())
             raw_obs, _, _, _ = self._env.step(action)
-        
+
         observation = self._format_raw_obs(raw_obs)
         for robot in self._env.robots:
             robot.controller.use_delta = False
diff --git a/src/lerobot/policies/xvla/__init__.py b/src/lerobot/policies/xvla/__init__.py
index 84fecda43..822a4797e 100644
--- a/src/lerobot/policies/xvla/__init__.py
+++ b/src/lerobot/policies/xvla/__init__.py
@@ -1,6 +1,6 @@
 from lerobot.policies.xvla.processor_xvla import (
-            make_xvla_pre_post_processors,
-            XVLAImageScaleProcessorStep,
-            XVLAAddDomainIdProcessorStep,
-            XVLARotation6DToAxisAngleProcessorStep,
-        )
\ No newline at end of file
+    XVLAAddDomainIdProcessorStep,
+    XVLAImageScaleProcessorStep,
+    XVLARotation6DToAxisAngleProcessorStep,
+    make_xvla_pre_post_processors,
+)
diff --git a/src/lerobot/policies/xvla/configuration_florence2.py b/src/lerobot/policies/xvla/configuration_florence2.py
index 7dec9d68e..1a66cf3e2 100644
--- a/src/lerobot/policies/xvla/configuration_florence2.py
+++ b/src/lerobot/policies/xvla/configuration_florence2.py
@@ -187,7 +187,7 @@ class Florence2LanguageConfig(PretrainedConfig):
     >>> configuration = Florence2LanguageConfig()
 
     >>> # Initializing a model (with random weights)
-    >>> model = Florence2LangaugeModel(configuration)
+    >>> model = Florence2LanguageModel(configuration)
 
     >>> # Accessing the model configuration
     >>> configuration = model.config
diff --git a/src/lerobot/policies/xvla/configuration_xvla.py b/src/lerobot/policies/xvla/configuration_xvla.py
index 865896277..c6c3f1f25 100644
--- a/src/lerobot/policies/xvla/configuration_xvla.py
+++ b/src/lerobot/policies/xvla/configuration_xvla.py
@@ -28,8 +28,6 @@ from lerobot.optim.schedulers import CosineDecayWithWarmupSchedulerConfig
 from lerobot.utils.constants import OBS_IMAGES
 
 from .configuration_florence2 import Florence2Config
-from .configuration_florence2 import Florence2VisionConfig
-from .configuration_florence2 import Florence2LanguageConfig
 
 
 @PreTrainedConfig.register_subclass("xvla")
@@ -72,7 +70,7 @@ class XVLAConfig(PreTrainedConfig):
     num_domains: int = 30
     len_soft_prompts: int = 32
     dim_time: int = 32
-    max_len_seq: int = 512 #TODO: jadechoghari: change to 512 1024
+    max_len_seq: int = 512  # TODO: jadechoghari: change to 512 1024
     use_hetero_proj: bool = False
 
     # Action & proprioception
diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py
index 84f075807..e61689568 100644
--- a/src/lerobot/policies/xvla/modeling_florence2.py
+++ b/src/lerobot/policies/xvla/modeling_florence2.py
@@ -496,8 +496,8 @@ class DaViT(nn.Module):
         drop_path_rate (float): Stochastic depth rate. Default: 0.1.
         norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
         enable_checkpoint (bool): If True, enable checkpointing. Default: False.
-        conv_at_attn (bool): If True, performe depthwise convolution before attention layer. Default: True.
-        conv_at_ffn (bool): If True, performe depthwise convolution before ffn layer. Default: True.
+        conv_at_attn (bool): If True, perform depthwise convolution before attention layer. Default: True.
+        conv_at_ffn (bool): If True, perform depthwise convolution before ffn layer. Default: True.
     """
 
     def __init__(
@@ -892,7 +892,7 @@ class Florence2FlashAttention2(Florence2Attention):
         super().__init__(*args, **kwargs)
 
         # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
-        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
+        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
         # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
         self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
 
diff --git a/src/lerobot/policies/xvla/modeling_xvla.py b/src/lerobot/policies/xvla/modeling_xvla.py
index 8eadffffa..a8ee2eae0 100644
--- a/src/lerobot/policies/xvla/modeling_xvla.py
+++ b/src/lerobot/policies/xvla/modeling_xvla.py
@@ -18,6 +18,7 @@
 
 from __future__ import annotations
 
+import os
 from collections import deque
 
 import torch
@@ -34,7 +35,6 @@ from .configuration_xvla import XVLAConfig
 from .modeling_florence2 import Florence2ForConditionalGeneration
 from .transformer import SoftPromptedTransformer
 
-import os
 
 class XVLAModel(nn.Module):
     """
@@ -94,11 +94,10 @@ class XVLAModel(nn.Module):
         batch_size, num_views = pixel_values.shape[:2]
         flat_mask = image_mask.view(-1).to(dtype=torch.bool)
         flat_images = pixel_values.flatten(0, 1)
-        #TODO: jadechoghari: remove this resizing logic, and provide a way in training to do this
+        # TODO: jadechoghari: remove this resizing logic, and provide a way in training to do this
         # target_size = (224, 224)
         # flat_images = F.interpolate(flat_images, size=target_size, mode="bilinear", align_corners=False)
 
-
         num_valid = int(flat_mask.sum().item())
         if num_valid == 0:
             raise ValueError("At least one image view must be valid per batch.")
@@ -341,13 +340,13 @@ class XVLAPolicy(PreTrainedPolicy):
             self._queues[ACTION].extend(actions.transpose(0, 1)[: self.config.n_action_steps])
 
         return self._queues[ACTION].popleft()
-    
+
     @classmethod
     def from_pretrained(
         cls,
         pretrained_name_or_path: str | Path,
         *,
-        config: "PreTrainedConfig" | None = None,
+        config: PreTrainedConfig | None = None,
         force_download: bool = False,
         resume_download: bool | None = None,
         proxies: dict | None = None,
@@ -364,6 +363,7 @@ class XVLAPolicy(PreTrainedPolicy):
         - skip list for layers that should remain randomly initialized
         """
         import safetensors.torch
+
         # --- Step 1: Load config ---
         if config is None:
             config = PreTrainedConfig.from_pretrained(
@@ -386,6 +386,9 @@ class XVLAPolicy(PreTrainedPolicy):
             model_file = os.path.join(model_id, "model.safetensors")
         else:
             try:
+                from huggingface_hub import hf_hub_download
+                from huggingface_hub.utils import HfHubHTTPError
+
                 model_file = hf_hub_download(
                     repo_id=model_id,
                     filename="model.safetensors",
@@ -398,9 +401,7 @@ class XVLAPolicy(PreTrainedPolicy):
                     local_files_only=local_files_only,
                 )
             except HfHubHTTPError as e:
-                raise FileNotFoundError(
-                    f"model.safetensors not found on the Hub at {model_id}"
-                ) from e
+                raise FileNotFoundError(f"model.safetensors not found on the Hub at {model_id}") from e
 
         print(f"Loading checkpoint from {model_file}")
         state_dict = safetensors.torch.load_file(model_file)
@@ -421,7 +422,7 @@ class XVLAPolicy(PreTrainedPolicy):
         # }
         # # ---- ADD THIS: Fix shared embeddings ----
         encoder_key = "model.vlm.language_model.model.encoder.embed_tokens.weight"
-        shared_key  = "model.vlm.language_model.model.shared.weight"
+        shared_key = "model.vlm.language_model.model.shared.weight"
         if encoder_key in state_dict:
             state_dict[shared_key] = state_dict[encoder_key]
         # step 5: load into instance
@@ -431,7 +432,7 @@ class XVLAPolicy(PreTrainedPolicy):
             print(f"Missing keys: {missing}")
         if unexpected:
             print(f"Unexpected keys: {unexpected}")
-    
+
         # step 6: finalize
         instance.to(config.device)
         instance.eval()
diff --git a/src/lerobot/policies/xvla/processor_xvla.py b/src/lerobot/policies/xvla/processor_xvla.py
index e5fc96704..5296f2131 100644
--- a/src/lerobot/policies/xvla/processor_xvla.py
+++ b/src/lerobot/policies/xvla/processor_xvla.py
@@ -21,7 +21,7 @@ import numpy as np
 import torch
 
 from lerobot.policies.xvla.configuration_xvla import XVLAConfig
-from lerobot.policies.xvla.utils import Rotate6D_to_AxisAngle
+from lerobot.policies.xvla.utils import rotate6d_to_axis_angle
 from lerobot.processor import (
     AddBatchDimensionProcessorStep,
     DeviceProcessorStep,
@@ -38,6 +38,7 @@ from lerobot.processor.converters import policy_action_to_transition, transition
 from lerobot.processor.core import EnvTransition, TransitionKey
 from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME
 
+
 def make_xvla_pre_post_processors(
     config: XVLAConfig,
     dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
@@ -94,45 +95,45 @@ def make_xvla_pre_post_processors(
 @ProcessorStepRegistry.register(name="xvla_image_scale")
 class XVLAImageScaleProcessorStep(ProcessorStep):
     """Scale image observations by 255 to convert from [0, 1] to [0, 255] range.
-    
+
     This processor step multiplies all image observations by 255, which is required
     for XVLA models that expect images in uint8-like range.
-    
+
     Args:
         image_keys: List of observation keys that contain images to scale.
                    If None, will automatically detect keys starting with "observation.images."
     """
-    
+
     image_keys: list[str] | None = None
-    
+
     def __call__(self, transition: EnvTransition) -> EnvTransition:
         """Scale image observations by 255."""
         new_transition = transition.copy()
         obs = new_transition.get(TransitionKey.OBSERVATION, {})
         if obs is None:
             return new_transition
-        
+
         # Make a copy of observations to avoid modifying the original
         obs = obs.copy()
-        
+
         # Determine which keys to scale
         keys_to_scale = self.image_keys
         if keys_to_scale is None:
             # Auto-detect image keys
             keys_to_scale = [k for k in obs.keys() if k.startswith("observation.images.")]
-        
+
         # Scale each image
         for key in keys_to_scale:
             if key in obs and isinstance(obs[key], torch.Tensor):
                 obs[key] = obs[key] * 255
-        
+
         new_transition[TransitionKey.OBSERVATION] = obs
         return new_transition
-    
+
     def transform_features(self, features):
         """Image scaling doesn't change feature structure."""
         return features
-    
+
     def get_config(self) -> dict[str, Any]:
         """Return serializable configuration."""
         return {
@@ -144,18 +145,18 @@ class XVLAImageScaleProcessorStep(ProcessorStep):
 @ProcessorStepRegistry.register(name="xvla_add_domain_id")
 class XVLAAddDomainIdProcessorStep(ProcessorStep):
     """Add domain_id to complementary data.
-    
+
     This processor step adds a domain_id tensor to the complementary data,
     which is used by XVLA to identify different robot embodiments or task domains.
-    
+
     Args:
         domain_id: The domain ID to add (default: 3)
         device: Device to place the domain_id tensor on (default: "cuda")
     """
-    
+
     domain_id: int = 3
     device: str = "cuda"
-    
+
     def __call__(self, transition: EnvTransition) -> EnvTransition:
         """Add domain_id to complementary data."""
         new_transition = transition.copy()
@@ -164,7 +165,7 @@ class XVLAAddDomainIdProcessorStep(ProcessorStep):
             comp = {}
         else:
             comp = comp.copy()
-        
+
         # Infer batch size from observation tensors
         obs = new_transition.get(TransitionKey.OBSERVATION, {})
         batch_size = 1
@@ -173,17 +174,17 @@ class XVLAAddDomainIdProcessorStep(ProcessorStep):
                 if isinstance(v, torch.Tensor):
                     batch_size = v.shape[0]
                     break
-        
+
         # Add domain_id tensor
         comp["domain_id"] = torch.tensor([int(self.domain_id)] * batch_size, dtype=torch.long).to(self.device)
-        
+
         new_transition[TransitionKey.COMPLEMENTARY_DATA] = comp
         return new_transition
-    
+
     def transform_features(self, features):
         """Domain ID addition doesn't change feature structure."""
         return features
-    
+
     def get_config(self) -> dict[str, Any]:
         """Return serializable configuration."""
         return {
@@ -196,61 +197,61 @@ class XVLAAddDomainIdProcessorStep(ProcessorStep):
 @ProcessorStepRegistry.register(name="xvla_rotation_6d_to_axis_angle")
 class XVLARotation6DToAxisAngleProcessorStep(ProcessorStep):
     """Convert 6D rotation representation to axis-angle and reorganize action dimensions.
-    
+
     This processor step takes actions with 6D rotation representation and converts them to
     axis-angle representation, reorganizing the action dimensions as:
     - action[:, :3] -> target_eef (end-effector position)
     - action[:, 3:9] -> 6D rotation (converted to axis-angle, 3D)
     - action[:, 9:10] -> gripper action
-    
+
     Final output: [target_eef (3), axis_angle (3), gripper (1)] = 7D action
-    
+
     Args:
         expected_action_dim: Expected input action dimension (default: 10, supports 6D rotation + extras)
     """
-    
+
     expected_action_dim: int = 10
-    
+
     def __call__(self, transition: EnvTransition) -> EnvTransition:
         """Convert 6D rotation to axis-angle in action."""
         new_transition = transition.copy()
         action = new_transition.get(TransitionKey.ACTION)
-        
+
         if action is None or not isinstance(action, torch.Tensor):
             return new_transition
-        
+
         # Convert to numpy for processing
         device = action.device
         dtype = action.dtype
         action_np = action.cpu().numpy()
-        
+
         # Extract components
         # action shape: (B, D) where D >= 10
         target_eef = action_np[:, :3]  # (B, 3)
         rotation_6d = action_np[:, 3:9]  # (B, 6)
         target_act = action_np[:, 9:10]  # (B, 1)
-        
+
         # Convert 6D rotation to axis-angle
-        target_axis = Rotate6D_to_AxisAngle(rotation_6d)  # (B, 3)
-        
+        target_axis = rotate6d_to_axis_angle(rotation_6d)  # (B, 3)
+
         # Concatenate: [eef (3), axis_angle (3), gripper (1)] = 7D
         action_np = np.concatenate([target_eef, target_axis, target_act], axis=-1)
 
         # Convert gripper action to -1 or 1
         action_np[:, -1] = np.where(action_np[:, -1] > 0.5, 1.0, -1.0)
-        
+
         # Convert back to tensor
         action = torch.from_numpy(action_np).to(device=device, dtype=dtype)
-        
+
         new_transition[TransitionKey.ACTION] = action
         return new_transition
-    
+
     def transform_features(self, features):
         """Rotation conversion changes action dimension from 10 to 7."""
         # Note: This is a simplified version. In practice, you might want to
         # update the action feature shape in the features dict.
         return features
-    
+
     def get_config(self) -> dict[str, Any]:
         """Return serializable configuration."""
         return {
diff --git a/src/lerobot/policies/xvla/utils.py b/src/lerobot/policies/xvla/utils.py
index 0603777ac..abd02edcc 100644
--- a/src/lerobot/policies/xvla/utils.py
+++ b/src/lerobot/policies/xvla/utils.py
@@ -1,48 +1,51 @@
 import numpy as np
 import robosuite.utils.transform_utils as T
-def Rotate6D_to_AxisAngle(r6d):
-        """
-        r6d: np.ndarray, shape (N, 6)
-        return: np.ndarray, shape (N, 3), axis-angle vectors
-        """
-        flag = 0
-        if len(r6d.shape) == 1:
-            r6d = r6d[None, ...]
-            flag = 1
-            
-        a1 = r6d[:, 0:3]
-        a2 = r6d[:, 3:6]
-        
-        # b1
-        b1 = a1 / (np.linalg.norm(a1, axis=-1, keepdims=True) + 1e-6)
-        
-        # b2
-        dot_prod = np.sum(b1 * a2, axis=-1, keepdims=True)
-        b2_orth = a2 - dot_prod * b1
-        b2 = b2_orth / (np.linalg.norm(b2_orth, axis=-1, keepdims=True) + 1e-6)
-        
-        # b3
-        b3 = np.cross(b1, b2, axis=-1)
-        
-        R = np.stack([b1, b2, b3], axis=-1)  # shape: (N, 3, 3)
-        
-        axis_angle_list = []
-        for i in range(R.shape[0]):
-            quat = T.mat2quat(R[i])
-            axis_angle = T.quat2axisangle(quat)
-            axis_angle_list.append(axis_angle)
-        
-        axis_angle_array = np.stack(axis_angle_list, axis=0)  # shape: (N, 3)
-        
-        if flag == 1:   
-            axis_angle_array = axis_angle_array[0]
-                    
-        return axis_angle_array
-        
-def Mat_to_Rotate6D(abs_action):
-        if len(abs_action.shape) == 2:
-            return np.concatenate([abs_action[:3, 0], abs_action[:3, 1]], axis=-1)
-        elif len(abs_action.shape) == 3:
-            return np.concatenate([abs_action[:, :3, 0], abs_action[:, :3, 1]], axis=-1)
-        else:
-            raise NotImplementedError
\ No newline at end of file
+
+
+def rotate6d_to_axis_angle(r6d):
+    """
+    r6d: np.ndarray, shape (N, 6)
+    return: np.ndarray, shape (N, 3), axis-angle vectors
+    """
+    flag = 0
+    if len(r6d.shape) == 1:
+        r6d = r6d[None, ...]
+        flag = 1
+
+    a1 = r6d[:, 0:3]
+    a2 = r6d[:, 3:6]
+
+    # b1
+    b1 = a1 / (np.linalg.norm(a1, axis=-1, keepdims=True) + 1e-6)
+
+    # b2
+    dot_prod = np.sum(b1 * a2, axis=-1, keepdims=True)
+    b2_orth = a2 - dot_prod * b1
+    b2 = b2_orth / (np.linalg.norm(b2_orth, axis=-1, keepdims=True) + 1e-6)
+
+    # b3
+    b3 = np.cross(b1, b2, axis=-1)
+
+    R = np.stack([b1, b2, b3], axis=-1)  # shape: (N, 3, 3)
+
+    axis_angle_list = []
+    for i in range(R.shape[0]):
+        quat = T.mat2quat(R[i])
+        axis_angle = T.quat2axisangle(quat)
+        axis_angle_list.append(axis_angle)
+
+    axis_angle_array = np.stack(axis_angle_list, axis=0)  # shape: (N, 3)
+
+    if flag == 1:
+        axis_angle_array = axis_angle_array[0]
+
+    return axis_angle_array
+
+
+def mat_to_rotate6d(abs_action):
+    if len(abs_action.shape) == 2:
+        return np.concatenate([abs_action[:3, 0], abs_action[:3, 1]], axis=-1)
+    elif len(abs_action.shape) == 3:
+        return np.concatenate([abs_action[:, :3, 0], abs_action[:, :3, 1]], axis=-1)
+    else:
+        raise NotImplementedError
diff --git a/src/lerobot/processor/migrate_policy_normalization.py b/src/lerobot/processor/migrate_policy_normalization.py
index 0b614c7c0..e602bb5d0 100644
--- a/src/lerobot/processor/migrate_policy_normalization.py
+++ b/src/lerobot/processor/migrate_policy_normalization.py
@@ -325,7 +325,9 @@ def load_state_dict_with_missing_key_handling(
     Returns:
         List of problematic missing keys that weren't in the whitelist.
     """
-    state_dict['model.vlm.language_model.model.encoder.embed_tokens.weight'] = state_dict['model.vlm.language_model.model.shared.weight'].clone()
+    state_dict["model.vlm.language_model.model.encoder.embed_tokens.weight"] = state_dict[
+        "model.vlm.language_model.model.shared.weight"
+    ].clone()
     # Load the cleaned state dict with strict=False to capture missing/unexpected keys
     load_result = policy.load_state_dict(state_dict, strict=False)
 
diff --git a/src/lerobot/processor/normalize_processor.py b/src/lerobot/processor/normalize_processor.py
index 4ab09beaf..87d8ae267 100644
--- a/src/lerobot/processor/normalize_processor.py
+++ b/src/lerobot/processor/normalize_processor.py
@@ -25,9 +25,10 @@ import torch
 from torch import Tensor
 
 from lerobot.configs.types import FeatureType, NormalizationMode, PipelineFeatureType, PolicyFeature
+from lerobot.datasets.factory import IMAGENET_STATS
 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.utils.constants import ACTION
-from lerobot.datasets.factory import IMAGENET_STATS
+
 from .converters import from_tensor_to_numpy, to_tensor
 from .core import EnvTransition, PolicyAction, TransitionKey
 from .pipeline import PolicyProcessorPipeline, ProcessorStep, ProcessorStepRegistry
@@ -303,7 +304,11 @@ class _NormalizationMixin:
             ValueError: If an unsupported normalization mode is encountered.
         """
         norm_mode = self.norm_map.get(feature_type, NormalizationMode.IDENTITY)
-        if norm_mode == NormalizationMode.IDENTITY or key not in self._tensor_stats and norm_mode != NormalizationMode.IMAGENET:
+        if (
+            norm_mode == NormalizationMode.IDENTITY
+            or key not in self._tensor_stats
+            and norm_mode != NormalizationMode.IMAGENET
+        ):
             return tensor
         if norm_mode not in (
             NormalizationMode.MEAN_STD,
diff --git a/src/lerobot/processor/pipeline.py b/src/lerobot/processor/pipeline.py
index 9dcfbe806..e14d8b0b9 100644
--- a/src/lerobot/processor/pipeline.py
+++ b/src/lerobot/processor/pipeline.py
@@ -55,6 +55,7 @@ from .core import EnvAction, EnvTransition, PolicyAction, RobotAction, Transitio
 TInput = TypeVar("TInput")
 TOutput = TypeVar("TOutput")
 
+
 class ProcessorStepRegistry:
     """A registry for ProcessorStep classes to allow instantiation from a string name.
 
diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py
index 15952e16e..0d66fa1aa 100644
--- a/src/lerobot/scripts/lerobot_eval.py
+++ b/src/lerobot/scripts/lerobot_eval.py
@@ -45,6 +45,7 @@ Note that in both examples, the repo/folder should contain at least `config.json
 
 You can learn about the CLI options for this script in the `EvalPipelineConfig` in lerobot/configs/eval.py
 """
+
 import concurrent.futures as cf
 import json
 import logging
@@ -89,6 +90,7 @@ from lerobot.utils.utils import (
     inside_slurm,
 )
 
+
 def rollout(
     env: gym.vector.VectorEnv,
     policy: PreTrainedPolicy,
@@ -153,7 +155,6 @@ def rollout(
         disable=inside_slurm(),  # we dont want progress bar when we use slurm, since it clutters the logs
         leave=False,
     )
-
     check_env_attributes_and_types(env)
     while not np.all(done) and step < max_steps:
         # Numpy array to tensor and changing dictionary keys to LeRobot policy format.
@@ -164,17 +165,13 @@ def rollout(
         # Infer "task" from attributes of environments.
         # TODO: works with SyncVectorEnv but not AsyncVectorEnv
         observation = add_envs_task(env, observation)
-        
-        # Preprocess observation (includes image scaling and domain_id addition)
         observation = preprocessor(observation)
-        # Policy inference
         with torch.inference_mode():
             action = policy.select_action(observation)
-        
-        # Postprocess action (includes rotation conversion and device transfer to CPU)
         action = postprocessor(action)
-        # Convert to numpy
-        action_numpy: np.ndarray = action.numpy()
+
+        # Convert to CPU / numpy.
+        action_numpy: np.ndarray = action.to("cpu").numpy()
         assert action_numpy.ndim == 2, "Action dimensions should be (batch, action_dim)"
 
         # Apply the next action.
@@ -500,6 +497,7 @@ def eval_main(cfg: EvalPipelineConfig):
     envs = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
 
     logging.info("Making policy.")
+
     policy = make_policy(
         cfg=cfg.policy,
         env_cfg=cfg.env,