From 7124d471c1ae7c8130a900e0fa55d1f4993fbe42 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 11:31:35 +0000
Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/source/introduction_processors.mdx | 94 ++++++++++++++-----------
 1 file changed, 51 insertions(+), 43 deletions(-)

diff --git a/docs/source/introduction_processors.mdx b/docs/source/introduction_processors.mdx
index f6a58f780..7f1540c6a 100644
--- a/docs/source/introduction_processors.mdx
+++ b/docs/source/introduction_processors.mdx
@@ -3,16 +3,19 @@
 In robotics, there's a fundamental mismatch between the data that robots and humans produce and what machine learning models expect. This creates several translation challenges:
 
 **Raw Robot Data → Model Input:**
+
 - Robots output raw sensor data (camera images, joint positions, force readings) that need normalization, batching, and device placement before models can process them
 - Language instructions from humans ("pick up the red cube") must be tokenized into numerical representations
 - Different robots use different coordinate systems and units that need standardization
 
 **Model Output → Robot Commands:**
+
 - Models might output end-effector positions, but robots need joint-space commands
 - Teleoperators (like gamepads) produce relative movements (delta positions), but robots expect absolute commands
 - Model predictions are often normalized and need to be converted back to real-world scales
 
 **Cross-Domain Translation:**
+
 - Training data from one robot setup needs adaptation for deployment on different hardware
 - Models trained with specific camera configurations must work with new camera arrangements
 - Datasets with different naming conventions need harmonization
@@ -24,6 +27,7 @@ Processors are the data transformation backbone of LeRobot. They handle all the
 ## What are Processors?
 
 In robotics, data comes in many forms - images from cameras, joint positions from sensors, text instructions from users, and more. Each type of data requires specific transformations before a model can use it effectively. Models need this data to be:
+
 - **Normalized**: Scaled to appropriate ranges for neural network processing
 - **Batched**: Organized with proper dimensions for batch processing
 - **Tokenized**: Text converted to numerical representations
@@ -63,6 +67,7 @@ transition: EnvTransition = {
 ```
 
 Each key in the transition has a specific purpose:
+
 - **OBSERVATION**: All sensor data (images, states, proprioception)
 - **ACTION**: The action to execute or that was executed
 - **REWARD**: Reinforcement learning signal
@@ -82,27 +87,27 @@ import torch
 
 class MyProcessorStep:
     """Example processor step interface - all methods must be implemented."""
-    
+
     def __call__(self, transition: EnvTransition) -> EnvTransition:
         """Transform the transition - this is the main processing logic."""
         raise NotImplementedError
-    
+
     def feature_contract(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
         """Declare how this step transforms feature shapes/types."""
         raise NotImplementedError
-    
+
     def get_config(self) -> dict[str, Any]:
         """Return JSON-serializable configuration for saving/loading."""
         raise NotImplementedError
-    
+
     def state_dict(self) -> dict[str, torch.Tensor]:
         """Return any learnable parameters (tensors only)."""
         raise NotImplementedError
-    
+
     def load_state_dict(self, state: dict[str, torch.Tensor]) -> None:
         """Load learnable parameters from saved state."""
         raise NotImplementedError
-    
+
     def reset(self) -> None:
         """Reset any internal state between episodes."""
         raise NotImplementedError
@@ -123,7 +128,7 @@ processor = RobotProcessor(
         step3   # Third transformation
     ],
     name="my_preprocessing_pipeline",
-    
+
     # Optional: Custom converters for input/output formats
     to_transition=custom_batch_to_transition,  # How to convert batch dict → EnvTransition
     to_output=custom_transition_to_batch       # How to convert EnvTransition → output format
@@ -146,7 +151,6 @@ output = processor(transition)  # Stays as EnvTransition throughout
 The `to_transition` and `to_output` converters enable seamless integration with existing codebases.
 By default, they handle the standard LeRobot batch format, but you can customize them for different data structures.
 
-
 ### Data Format Conversion
 
 Different data sources have different formats, but processors need a unified `EnvTransition` structure internally.
@@ -191,7 +195,7 @@ processor = RobotProcessor(
 
 ## Common Processor Steps
 
-LeRobot provides a rich set of pre-built processor steps for common transformations. 
+LeRobot provides a rich set of pre-built processor steps for common transformations.
 Let's explore each in detail:
 
 ### Data Normalization
@@ -351,6 +355,7 @@ Different datasets and models may use different naming conventions.
 The `RenameProcessor` solves this mismatch:
 
 **Why is this useful?**
+
 - When loading a model trained on a different dataset with different key names
 - When using foundation models that expect specific key naming conventions
 - When standardizing datasets from different sources
@@ -414,17 +419,17 @@ preprocessor = RobotProcessor(
                 "observation.images.wrist": "observation.images.camera1"
             }
         ),
-        
+
         # 2. Add batch dimensions for model
         ToBatchProcessor(),
-        
+
         # 3. Tokenize language instructions if present
         TokenizerProcessor(
             tokenizer_name="google/paligemma-3b-pt-224",
             max_length=64,
             task_key="task"
         ),
-        
+
         # 4. Normalize numerical data
         NormalizerProcessor(
             features=policy_features,
@@ -435,7 +440,7 @@ preprocessor = RobotProcessor(
             },
             stats=dataset.meta.stats
         ),
-        
+
         # 5. Move to GPU and convert to half precision
         DeviceProcessor(
             device="cuda:0",
@@ -450,7 +455,7 @@ postprocessor = RobotProcessor(
     steps=[
         # 1. Move back to CPU for robot hardware
         DeviceProcessor(device="cpu"),
-        
+
         # 2. Denormalize actions to original scale
         UnnormalizerProcessor(
             features=policy_features,
@@ -489,15 +494,15 @@ for epoch in range(num_epochs):
     for batch in dataloader:
         # Preprocess batch
         processed_batch = preprocessor(batch)
-        
+
         # Forward pass - returns loss and optional metrics
         loss, metrics = model.forward(processed_batch)
-        
+
         # Backward pass
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-        
+
         # Log metrics if available
         if metrics:
             wandb.log(metrics)
@@ -515,7 +520,7 @@ preprocessor = RobotProcessor.from_pretrained(
     config_filename="robot_preprocessor.json"
 )
 postprocessor = RobotProcessor.from_pretrained(
-    "path/to/model", 
+    "path/to/model",
     config_filename="robot_postprocessor.json"
 )
 
@@ -534,34 +539,34 @@ with torch.no_grad():
     while not done:
         # Get observation from robot
         observation = robot.get_observation()
-        
+
         # Build dataset-compatible frame
         observation_frame = build_dataset_frame(
-            dataset.features, 
-            observation, 
+            dataset.features,
+            observation,
             prefix="observation"
         )
-        
+
         # Add task instruction to complementary data
         observation_frame["task"] = "pick up the red cube"
-        
+
         # Preprocess for model
         model_input = preprocessor(observation_frame)
-        
+
         # Run policy
         raw_action = policy.select_action(model_input)
-        
+
         # Postprocess action
         action_transition = {TransitionKey.ACTION: raw_action}
         processed = postprocessor(action_transition)
         action = processed[TransitionKey.ACTION]
-        
+
         # Convert to robot action format
         robot_action = {
-            key: action[i].item() 
+            key: action[i].item()
             for i, key in enumerate(robot.action_features)
         }
-        
+
         # Execute on robot
         robot.send_action(robot_action)
 ```
@@ -632,10 +637,10 @@ processor = RobotProcessor.from_pretrained(
     overrides={
         # Change device for different hardware
         "device_processor": {"device": "cuda:1"},
-        
+
         # Update statistics for new dataset
         "normalizer_processor": {"stats": new_dataset.meta.stats},
-        
+
         # Provide non-serializable objects (like tokenizers)
         "tokenizer_processor": {"tokenizer": custom_tokenizer}
     }
@@ -669,16 +674,16 @@ from lerobot.processor.pipeline import ProcessorStepRegistry, ObservationProcess
 @ProcessorStepRegistry.register("my_company/gaussian_noise")
 class GaussianNoiseProcessor(ObservationProcessor):
     """Add Gaussian noise to observations for robustness training."""
-    
+
     noise_std: float = 0.01
     training_only: bool = True
     is_training: bool = True
-    
+
     def observation(self, observation):
         """Add noise to observation tensors."""
         if not self.is_training and self.training_only:
             return observation
-        
+
         noisy_obs = {}
         for key, value in observation.items():
             if isinstance(value, torch.Tensor) and "image" not in key:
@@ -687,9 +692,9 @@ class GaussianNoiseProcessor(ObservationProcessor):
                 noisy_obs[key] = value + noise
             else:
                 noisy_obs[key] = value
-        
+
         return noisy_obs
-    
+
     def get_config(self):
         return {
             "noise_std": self.noise_std,
@@ -716,15 +721,15 @@ from lerobot.processor import ActionProcessor
 @ProcessorStepRegistry.register("my_company/action_clipper")
 class ActionClipProcessor(ActionProcessor):
     """Clip actions to safe ranges."""
-    
+
     min_value: float = -1.0
     max_value: float = 1.0
-    
+
     def action(self, action):
         """Process only the action component."""
         # No need to handle transition dict - base class does it
         return torch.clamp(action, self.min_value, self.max_value)
-    
+
     def get_config(self):
         return {"min_value": self.min_value, "max_value": self.max_value}
 ```
@@ -776,7 +781,7 @@ Use `step_through()` for detailed debugging of the transformation pipeline:
 # Inspect data at each transformation stage
 for i, intermediate in enumerate(processor.step_through(data)):
     print(f"\n=== After step {i} ===")
-    
+
     # Check observation shapes
     obs = intermediate.get(TransitionKey.OBSERVATION)
     if obs:
@@ -786,7 +791,7 @@ for i, intermediate in enumerate(processor.step_through(data)):
                       f"dtype={value.dtype}, "
                       f"device={value.device}, "
                       f"range=[{value.min():.3f}, {value.max():.3f}]")
-    
+
     # Check action if present
     action = intermediate.get(TransitionKey.ACTION)
     if action is not None and isinstance(action, torch.Tensor):
@@ -818,6 +823,7 @@ variant_processor = RobotProcessor(
 ### 1. Order Matters
 
 The sequence of processors is crucial. Follow this general order:
+
 ```python
 # Preprocessing: Raw → Model-ready
 1. Rename (standardize keys)
@@ -851,6 +857,7 @@ print(ProcessorStepRegistry.list())  # See all registered processors
 ### 3. Common Pitfalls and Solutions
 
 **Tensor Device Mismatch:**
+
 ```python
 # Problem: RuntimeError: Expected all tensors on same device
 # Solution: Ensure DeviceProcessor is in pipeline
@@ -863,6 +870,7 @@ preprocessor = RobotProcessor(
 ```
 
 **Missing Statistics:**
+
 ```python
 # Problem: NormalizerProcessor has no stats
 # Solution 1: Compute stats from dataset
@@ -897,6 +905,6 @@ Processors are the unsung heroes of robotics pipelines, handling the critical tr
 - Create custom transformations for specialized tasks
 
 Remember: good preprocessing is often the difference between a model that works in theory
-and one that works in practice! 
-The modular pipeline approach ensures your transformations are testable, reproducible, 
-and portable across different robots and environments.
\ No newline at end of file
+and one that works in practice!
+The modular pipeline approach ensures your transformations are testable, reproducible,
+and portable across different robots and environments.