From ef6b3b5b0f891cee150250d5a0b1ea80cb8bae2e Mon Sep 17 00:00:00 2001 From: Khalil Meftah Date: Tue, 28 Apr 2026 11:11:02 +0200 Subject: [PATCH] refactor: simplify docstrings for clarity and conciseness across multiple files --- .../gaussian_actor/modeling_gaussian_actor.py | 9 -------- src/lerobot/rl/actor.py | 9 +------- src/lerobot/rl/algorithms/factory.py | 2 +- src/lerobot/rl/data_sources/data_mixer.py | 22 +++---------------- src/lerobot/rl/train_rl.py | 7 +----- src/lerobot/rl/trainer.py | 6 +---- tests/rl/test_actor_learner.py | 5 ----- 7 files changed, 7 insertions(+), 53 deletions(-) diff --git a/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py b/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py index cabeae3f4..21828ba5d 100644 --- a/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py +++ b/src/lerobot/policies/gaussian_actor/modeling_gaussian_actor.py @@ -36,15 +36,6 @@ DISCRETE_DIMENSION_INDEX = -1 # Gripper is always the last dimension class GaussianActorPolicy( PreTrainedPolicy, ): - """Gaussian actor + observation encoder. - - Policy-side ``nn.Module`` used by SAC and related maximum-entropy continuous - control algorithms. It owns the actor network (``Policy``) and the observation - encoder (``GaussianActorObservationEncoder``); the critics, temperature, and - Bellman-update logic live on the algorithm side - (see ``lerobot.rl.algorithms.sac``). - """ - config_class = GaussianActorConfig name = "gaussian_actor" diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py index a3cc0478e..116970cc8 100644 --- a/src/lerobot/rl/actor.py +++ b/src/lerobot/rl/actor.py @@ -291,11 +291,7 @@ def act_with_policy( with policy_timer: normalized_observation = preprocessor.process_observation(observation) action = policy.select_action(batch=normalized_observation) - # Unnormalize only the continuous part. When `num_discrete_actions` is set, - # `select_action` concatenates an argmax index in env space at the last dim; - # action stats cover the continuous dims only, so feeding the full vector to - # the unnormalizer would shape-mismatch and would also corrupt the discrete - # index by treating it as a normalized value. + # Unnormalize only the continuous part. if cfg.policy.num_discrete_actions is not None: continuous_action = postprocessor.process_action(action[..., :-1]) discrete_action = action[..., -1:].to( @@ -346,9 +342,6 @@ def act_with_policy( "discrete_penalty": torch.tensor( [new_transition[TransitionKey.COMPLEMENTARY_DATA].get("discrete_penalty", 0.0)] ), - # Forward the intervention flag so the learner can route this transition - # into the offline replay buffer (see `process_transitions` in learner.py). - # Use the plain string key so the payload survives torch.load(weights_only=True). TeleopEvents.IS_INTERVENTION.value: is_intervention, } # Create transition for learner (convert to old format) diff --git a/src/lerobot/rl/algorithms/factory.py b/src/lerobot/rl/algorithms/factory.py index 9e71473ec..ea89e598c 100644 --- a/src/lerobot/rl/algorithms/factory.py +++ b/src/lerobot/rl/algorithms/factory.py @@ -21,7 +21,7 @@ from lerobot.rl.algorithms.configs import RLAlgorithmConfig def make_algorithm_config(algorithm_type: str, **kwargs) -> RLAlgorithmConfig: - """Instantiate an :class:`RLAlgorithmConfig` from its registered type name. + """Instantiate an `RLAlgorithmConfig` from its registered type name. Args: algorithm_type: Registry key of the algorithm (e.g. ``"sac"``). diff --git a/src/lerobot/rl/data_sources/data_mixer.py b/src/lerobot/rl/data_sources/data_mixer.py index d80b64021..7546bde0c 100644 --- a/src/lerobot/rl/data_sources/data_mixer.py +++ b/src/lerobot/rl/data_sources/data_mixer.py @@ -21,11 +21,7 @@ from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions class DataMixer(abc.ABC): - """Abstract interface for all data mixing strategies. - - Subclasses must implement ``sample(batch_size)`` and may override - ``get_iterator`` for specialised iteration. - """ + """Abstract interface for all data mixing strategies.""" @abc.abstractmethod def sample(self, batch_size: int) -> BatchType: @@ -38,25 +34,13 @@ class DataMixer(abc.ABC): async_prefetch: bool = True, queue_size: int = 2, ): - """Infinite iterator that yields batches. - - The default implementation repeatedly calls ``self.sample()``. - Subclasses with underlying buffer iterators (async prefetch) - should override this for better throughput. - """ + """Infinite iterator that yields batches.""" while True: yield self.sample(batch_size) class OnlineOfflineMixer(DataMixer): - """Mixes transitions from an online and an optional offline replay buffer. - - When both buffers are present, each batch is constructed by sampling - ``ceil(batch_size * online_ratio)`` from the online buffer and the - remainder from the offline buffer, then concatenating. - - This mixer assumes both online and offline buffers are present. - """ + """Mixes transitions from an online and an offline replay buffer.""" def __init__( self, diff --git a/src/lerobot/rl/train_rl.py b/src/lerobot/rl/train_rl.py index 21ee3afb9..fe8078858 100644 --- a/src/lerobot/rl/train_rl.py +++ b/src/lerobot/rl/train_rl.py @@ -31,10 +31,7 @@ class TrainRLServerPipelineConfig(TrainPipelineConfig): # TODO: Make `TrainPipelineConfig.dataset` optional dataset: DatasetConfig | None = None # type: ignore[assignment] # because the parent class has made it's type non-optional - # Algorithm config (a `draccus.ChoiceRegistry` subclass selected by `type`, - # e.g. ``"type": "sac"``). When omitted, defaults to a SAC config with - # default hyperparameters. The top-level `policy` is injected into - # ``algorithm.policy_config`` at validation time. + # Algorithm config. algorithm: RLAlgorithmConfig | None = None # Data mixer strategy name. Currently supports "online_offline". @@ -48,7 +45,5 @@ class TrainRLServerPipelineConfig(TrainPipelineConfig): if self.algorithm is None: self.algorithm = make_algorithm_config("sac") - # The pipeline owns the policy config; inject it so the algorithm can - # introspect policy architecture (e.g. ``num_discrete_actions``). if getattr(self.algorithm, "policy_config", None) is None: self.algorithm.policy_config = self.policy diff --git a/src/lerobot/rl/trainer.py b/src/lerobot/rl/trainer.py index 948ea9d5e..9332ccd15 100644 --- a/src/lerobot/rl/trainer.py +++ b/src/lerobot/rl/trainer.py @@ -73,11 +73,7 @@ class RLTrainer: def preprocess_rl_batch(preprocessor: Any, batch: BatchType) -> BatchType: - """Apply policy preprocessing to RL observations only. - - This mirrors the pre-refactor SAC learner behavior where actions are left - unchanged and only state/next_state observations are normalized. - """ + """Apply policy preprocessing to RL observations only.""" observations = batch["state"] next_observations = batch["next_state"] batch["state"] = preprocessor.process_observation(observations) diff --git a/tests/rl/test_actor_learner.py b/tests/rl/test_actor_learner.py index 33c150997..e0df14e62 100644 --- a/tests/rl/test_actor_learner.py +++ b/tests/rl/test_actor_learner.py @@ -303,11 +303,6 @@ def test_end_to_end_parameters_flow(cfg, data_size): assert torch.allclose(received_params[key], input_params[key]) -# --------------------------------------------------------------------------- -# Regression test: learner algorithm integration (no gRPC required) -# --------------------------------------------------------------------------- - - def test_learner_algorithm_wiring(): """Verify that make_algorithm constructs an SACAlgorithm from config, make_optimizers_and_scheduler() creates the right optimizers, update() works, and