diff --git a/src/lerobot/policies/pi052/fit_fast_tokenizer.py b/src/lerobot/policies/pi052/fit_fast_tokenizer.py
index 14e4217ca..513553c00 100644
--- a/src/lerobot/policies/pi052/fit_fast_tokenizer.py
+++ b/src/lerobot/policies/pi052/fit_fast_tokenizer.py
@@ -189,12 +189,30 @@ def fit_fast_tokenizer(
             "lengths."
         )
 
-    actions = np.stack(actions_buf, axis=0)  # (N, H, D)
+    actions = np.stack(actions_buf, axis=0).astype(np.float32)  # (N, H, D)
     logger.info(
         "FAST fit: collected %d chunks of shape %s from %d episodes",
         actions.shape[0], actions.shape[1:], eps_visited,
     )
 
+    # Quantile-normalise per dimension before fitting.
+    #
+    # The FAST tokenizer DCT-transforms actions, scales by ``scale`` and
+    # rounds to integer tokens; the integer *range* must fit the
+    # codebook (vocab_size, default 1024). Raw motor units (e.g. encoder
+    # ticks) blow that range up — hence "Vocab size 1024 is too small".
+    # More importantly, at training time ``ActionTokenizerProcessorStep``
+    # runs *after* the QUANTILES ``NormalizerProcessorStep``, so it
+    # encodes normalised actions. Fitting on raw actions would mismatch
+    # that space. We replicate QUANTILES normalisation here (per-dim
+    # [q01, q99] → [-1, 1], clipped) so the fit and the training-time
+    # encode see the same distribution.
+    flat = actions.reshape(-1, actions.shape[-1])
+    q01 = np.quantile(flat, 0.01, axis=0)
+    q99 = np.quantile(flat, 0.99, axis=0)
+    span = np.where((q99 - q01) > 1e-6, q99 - q01, 1.0)
+    actions = np.clip((actions - q01) / span * 2.0 - 1.0, -1.0, 1.0).astype(np.float32)
+
     base = AutoProcessor.from_pretrained(base_tokenizer_name, trust_remote_code=True)
     if not hasattr(base, "fit"):
         raise ImportError(