From 863ae89ff2f1e44c8d07c9cf1cf8b4363060e213 Mon Sep 17 00:00:00 2001
From: Jade Choghari <chogharijade@gmail.com>
Date: Wed, 26 Nov 2025 15:34:45 +0100
Subject: [PATCH] fix styling

---
 docs/source/xvla.mdx | 51 +++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/docs/source/xvla.mdx b/docs/source/xvla.mdx
index 94f29f33a..1509ac46d 100644
--- a/docs/source/xvla.mdx
+++ b/docs/source/xvla.mdx
@@ -130,12 +130,12 @@ lerobot-train \
 
 ### Training Parameters Explained
 
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `freeze_vision_encoder` | `True` | Freeze the VLM vision encoder weights |
-| `freeze_language_encoder` | `True` | Freeze the VLM language encoder weights |
-| `train_policy_transformer` | `True` | Allow policy transformer layers to train |
-| `train_soft_prompts` | `True` | Allow soft prompts to train |
+| Parameter                  | Default | Description                              |
+| -------------------------- | ------- | ---------------------------------------- |
+| `freeze_vision_encoder`    | `True`  | Freeze the VLM vision encoder weights    |
+| `freeze_language_encoder`  | `True`  | Freeze the VLM language encoder weights  |
+| `train_policy_transformer` | `True`  | Allow policy transformer layers to train |
+| `train_soft_prompts`       | `True`  | Allow soft prompts to train              |
 
 **💡 Best Practice**: For Phase II adaptation to new embodiments, freeze the VLM encoders and only train the policy transformer and soft prompts. This provides excellent sample efficiency with minimal compute.
 
@@ -167,13 +167,13 @@ X-VLA uses an **Action Registry** system to handle different action spaces and e
 
 #### Available Action Modes
 
-| Action Mode | Action Dim | Description | Use Case |
-|-------------|------------|-------------|----------|
-| `ee6d` | 20 | End-effector with xyz, 6D rotation, gripper | Dual-arm setups with spatial control |
-| `joint` | 14 | Joint-space with gripper | Direct joint control robots |
-| `agibot_ee6d` | 20 | AGI-bot variant with MSE loss | AGI-bot platforms |
-| `franka_joint7` | 7 | Franka Panda 7-joint control | Franka robots without gripper |
-| `so101_bimanual` | 20 (model), 12 (real) | SO101 bimanual robot | Bimanual manipulation tasks |
+| Action Mode      | Action Dim            | Description                                 | Use Case                             |
+| ---------------- | --------------------- | ------------------------------------------- | ------------------------------------ |
+| `ee6d`           | 20                    | End-effector with xyz, 6D rotation, gripper | Dual-arm setups with spatial control |
+| `joint`          | 14                    | Joint-space with gripper                    | Direct joint control robots          |
+| `agibot_ee6d`    | 20                    | AGI-bot variant with MSE loss               | AGI-bot platforms                    |
+| `franka_joint7`  | 7                     | Franka Panda 7-joint control                | Franka robots without gripper        |
+| `so101_bimanual` | 20 (model), 12 (real) | SO101 bimanual robot                        | Bimanual manipulation tasks          |
 
 #### Why Action Modes Matter
 
@@ -289,27 +289,27 @@ import torch.nn as nn
 @register_action("my_custom_robot")
 class MyCustomActionSpace(BaseActionSpace):
     """Custom action space for my robot."""
-    
+
     dim_action = 15  # Your robot's action dimension
     gripper_idx = (7, 14)  # Gripper channel indices
-    
+
     def __init__(self):
         super().__init__()
         self.mse = nn.MSELoss()
         self.bce = nn.BCEWithLogitsLoss()
-    
+
     def compute_loss(self, pred, target):
         """Define your loss computation."""
         # Example: MSE for joints, BCE for grippers
         joints_loss = self.mse(pred[:, :, :7], target[:, :, :7])
-        gripper_loss = self.bce(pred[:, :, self.gripper_idx], 
+        gripper_loss = self.bce(pred[:, :, self.gripper_idx],
                                 target[:, :, self.gripper_idx])
-        
+
         return {
             "joints_loss": joints_loss,
             "gripper_loss": gripper_loss,
         }
-    
+
     def preprocess(self, proprio, action, mode="train"):
         """Preprocess actions before training."""
         # Example: Zero out grippers in proprioception
@@ -319,7 +319,7 @@ class MyCustomActionSpace(BaseActionSpace):
         if action_m is not None:
             action_m[..., self.gripper_idx] = 0.0
         return proprio_m, action_m
-    
+
     def postprocess(self, action):
         """Post-process predictions for deployment."""
         # Example: Apply sigmoid to gripper logits
@@ -381,6 +381,7 @@ preprocessor = PolicyProcessorPipeline(
 When your dataset has fewer action dimensions than the pretrained model:
 
 **Option 1**: Use padding (automatic in most action modes)
+
 ```python
 # Model expects 20D, dataset has 12D
 # Action mode handles padding internally
@@ -388,12 +389,13 @@ action_mode = "so101_bimanual"  # Pads 12 → 20
 ```
 
 **Option 2**: Create a custom action mode that maps dimensions explicitly
+
 ```python
 @register_action("my_mapped_action")
 class MappedActionSpace(BaseActionSpace):
     dim_action = 20
     REAL_DIM = 12
-    
+
     def _pad_to_model_dim(self, x):
         # Custom padding logic
         ...
@@ -406,22 +408,27 @@ class MappedActionSpace(BaseActionSpace):
 ### Common Issues
 
 **Issue**: "Action dimension mismatch"
+
 - **Solution**: Check that your `action_mode` matches your robot's action space. Create a custom action mode if needed.
 
 **Issue**: "Image values outside [0, 1] range"
+
 - **Solution**: Ensure images are preprocessed with `XVLAImageToFloatProcessorStep` before normalization.
 
 **Issue**: "Domain ID not found"
+
 - **Solution**: Make sure `XVLAAddDomainIdProcessorStep` is in your preprocessing pipeline with the correct domain_id.
 
 **Issue**: "Low success rate on new embodiment"
-- **Solution**: 
+
+- **Solution**:
   1. Verify your action_mode is correct
   2. Check that soft prompts are being trained (`train_soft_prompts=True`)
   3. Ensure proper preprocessing (ImageNet normalization, domain_id)
   4. Consider increasing training steps
 
 **Issue**: "Out of memory during training"
+
 - **Solution**:
   1. Reduce `chunk_size` (e.g., from 32 to 16)
   2. Enable gradient checkpointing