From ef6b3b5b0f891cee150250d5a0b1ea80cb8bae2e Mon Sep 17 00:00:00 2001
From: Khalil Meftah <khalil.meftah@huggingface.co>
Date: Tue, 28 Apr 2026 11:11:02 +0200
Subject: [PATCH] refactor: simplify docstrings for clarity and conciseness
 across multiple files

---
 .../gaussian_actor/modeling_gaussian_actor.py |  9 --------
 src/lerobot/rl/actor.py                       |  9 +-------
 src/lerobot/rl/algorithms/factory.py          |  2 +-
 src/lerobot/rl/data_sources/data_mixer.py     | 22 +++----------------
 src/lerobot/rl/train_rl.py                    |  7 +-----
 src/lerobot/rl/trainer.py                     |  6 +----
 tests/rl/test_actor_learner.py                |  5 -----
 7 files changed, 7 insertions(+), 53 deletions(-)

diff --git a/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py b/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py
index cabeae3f4..21828ba5d 100644
--- a/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py
+++ b/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py
@@ -36,15 +36,6 @@ DISCRETE_DIMENSION_INDEX = -1  # Gripper is always the last dimension
 class GaussianActorPolicy(
     PreTrainedPolicy,
 ):
-    """Gaussian actor + observation encoder.
-
-    Policy-side ``nn.Module`` used by SAC and related maximum-entropy continuous
-    control algorithms. It owns the actor network (``Policy``) and the observation
-    encoder (``GaussianActorObservationEncoder``); the critics, temperature, and
-    Bellman-update logic live on the algorithm side
-    (see ``lerobot.rl.algorithms.sac``).
-    """
-
     config_class = GaussianActorConfig
     name = "gaussian_actor"
 
diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py
index a3cc0478e..116970cc8 100644
--- a/src/lerobot/rl/actor.py
+++ b/src/lerobot/rl/actor.py
@@ -291,11 +291,7 @@ def act_with_policy(
         with policy_timer:
             normalized_observation = preprocessor.process_observation(observation)
             action = policy.select_action(batch=normalized_observation)
-            # Unnormalize only the continuous part. When `num_discrete_actions` is set,
-            # `select_action` concatenates an argmax index in env space at the last dim;
-            # action stats cover the continuous dims only, so feeding the full vector to
-            # the unnormalizer would shape-mismatch and would also corrupt the discrete
-            # index by treating it as a normalized value.
+            # Unnormalize only the continuous part.
             if cfg.policy.num_discrete_actions is not None:
                 continuous_action = postprocessor.process_action(action[..., :-1])
                 discrete_action = action[..., -1:].to(
@@ -346,9 +342,6 @@ def act_with_policy(
             "discrete_penalty": torch.tensor(
                 [new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)]
             ),
-            # Forward the intervention flag so the learner can route this transition
-            # into the offline replay buffer (see `process_transitions` in learner.py).
-            # Use the plain string key so the payload survives torch.load(weights_only=True).
             TeleopEvents.IS_INTERVENTION.value: is_intervention,
         }
         # Create transition for learner (convert to old format)
diff --git a/src/lerobot/rl/algorithms/factory.py b/src/lerobot/rl/algorithms/factory.py
index 9e71473ec..ea89e598c 100644
--- a/src/lerobot/rl/algorithms/factory.py
+++ b/src/lerobot/rl/algorithms/factory.py
@@ -21,7 +21,7 @@ from lerobot.rl.algorithms.configs import RLAlgorithmConfig
 
 
 def make_algorithm_config(algorithm_type: str, **kwargs) -> RLAlgorithmConfig:
-    """Instantiate an :class:`RLAlgorithmConfig` from its registered type name.
+    """Instantiate an `RLAlgorithmConfig` from its registered type name.
 
     Args:
         algorithm_type: Registry key of the algorithm (e.g. ``"sac"``).
diff --git a/src/lerobot/rl/data_sources/data_mixer.py b/src/lerobot/rl/data_sources/data_mixer.py
index d80b64021..7546bde0c 100644
--- a/src/lerobot/rl/data_sources/data_mixer.py
+++ b/src/lerobot/rl/data_sources/data_mixer.py
@@ -21,11 +21,7 @@ from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions
 
 
 class DataMixer(abc.ABC):
-    """Abstract interface for all data mixing strategies.
-
-    Subclasses must implement ``sample(batch_size)`` and may override
-    ``get_iterator`` for specialised iteration.
-    """
+    """Abstract interface for all data mixing strategies."""
 
     @abc.abstractmethod
     def sample(self, batch_size: int) -> BatchType:
@@ -38,25 +34,13 @@ class DataMixer(abc.ABC):
         async_prefetch: bool = True,
         queue_size: int = 2,
     ):
-        """Infinite iterator that yields batches.
-
-        The default implementation repeatedly calls ``self.sample()``.
-        Subclasses with underlying buffer iterators (async prefetch)
-        should override this for better throughput.
-        """
+        """Infinite iterator that yields batches."""
         while True:
             yield self.sample(batch_size)
 
 
 class OnlineOfflineMixer(DataMixer):
-    """Mixes transitions from an online and an optional offline replay buffer.
-
-    When both buffers are present, each batch is constructed by sampling
-    ``ceil(batch_size * online_ratio)`` from the online buffer and the
-    remainder from the offline buffer, then concatenating.
-
-    This mixer assumes both online and offline buffers are present.
-    """
+    """Mixes transitions from an online and an offline replay buffer."""
 
     def __init__(
         self,
diff --git a/src/lerobot/rl/train_rl.py b/src/lerobot/rl/train_rl.py
index 21ee3afb9..fe8078858 100644
--- a/src/lerobot/rl/train_rl.py
+++ b/src/lerobot/rl/train_rl.py
@@ -31,10 +31,7 @@ class TrainRLServerPipelineConfig(TrainPipelineConfig):
     # TODO: Make `TrainPipelineConfig.dataset` optional
     dataset: DatasetConfig | None = None  # type: ignore[assignment] # because the parent class has made it's type non-optional
 
-    # Algorithm config (a `draccus.ChoiceRegistry` subclass selected by `type`,
-    # e.g. ``"type": "sac"``). When omitted, defaults to a SAC config with
-    # default hyperparameters. The top-level `policy` is injected into
-    # ``algorithm.policy_config`` at validation time.
+    # Algorithm config.
     algorithm: RLAlgorithmConfig | None = None
 
     # Data mixer strategy name. Currently supports "online_offline".
@@ -48,7 +45,5 @@ class TrainRLServerPipelineConfig(TrainPipelineConfig):
         if self.algorithm is None:
             self.algorithm = make_algorithm_config("sac")
 
-        # The pipeline owns the policy config; inject it so the algorithm can
-        # introspect policy architecture (e.g. ``num_discrete_actions``).
         if getattr(self.algorithm, "policy_config", None) is None:
             self.algorithm.policy_config = self.policy
diff --git a/src/lerobot/rl/trainer.py b/src/lerobot/rl/trainer.py
index 948ea9d5e..9332ccd15 100644
--- a/src/lerobot/rl/trainer.py
+++ b/src/lerobot/rl/trainer.py
@@ -73,11 +73,7 @@ class RLTrainer:
 
 
 def preprocess_rl_batch(preprocessor: Any, batch: BatchType) -> BatchType:
-    """Apply policy preprocessing to RL observations only.
-
-    This mirrors the pre-refactor SAC learner behavior where actions are left
-    unchanged and only state/next_state observations are normalized.
-    """
+    """Apply policy preprocessing to RL observations only."""
     observations = batch["state"]
     next_observations = batch["next_state"]
     batch["state"] = preprocessor.process_observation(observations)
diff --git a/tests/rl/test_actor_learner.py b/tests/rl/test_actor_learner.py
index 33c150997..e0df14e62 100644
--- a/tests/rl/test_actor_learner.py
+++ b/tests/rl/test_actor_learner.py
@@ -303,11 +303,6 @@ def test_end_to_end_parameters_flow(cfg, data_size):
         assert torch.allclose(received_params[key], input_params[key])
 
 
-# ---------------------------------------------------------------------------
-# Regression test: learner algorithm integration (no gRPC required)
-# ---------------------------------------------------------------------------
-
-
 def test_learner_algorithm_wiring():
     """Verify that make_algorithm constructs an SACAlgorithm from config,
     make_optimizers_and_scheduler() creates the right optimizers, update() works, and