Added server directory in lerobot/scripts that contains scripts and the protobuf message types to split training into two processes, acting and learning. The actor rollouts the policy and collects interaction data while the learner recieves the data, trains the policy and sends the updated parameters to the actor. The two scripts are ran simultaneously

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
2026-05-26 22:20:06 +00:00 · 2025-01-28 15:52:03 +00:00
parent d75b44f89f
commit 322a78a378
6 changed files with 759 additions and 3 deletions
@@ -76,7 +76,11 @@ def get_policy_and_config_classes(name: str) -> tuple[Policy, object]:


 def make_policy(
-    hydra_cfg: DictConfig, pretrained_policy_name_or_path: str | None = None, dataset_stats=None
+    hydra_cfg: DictConfig,
+    pretrained_policy_name_or_path: str | None = None,
+    dataset_stats=None,
+    *args,
+    **kwargs,
 ) -> Policy:
    """Make an instance of a policy class.

@@ -100,7 +104,9 @@ def make_policy(
    policy_cfg = _policy_cfg_from_hydra_cfg(policy_cfg_class, hydra_cfg)
    if pretrained_policy_name_or_path is None:
        # Make a fresh policy.
-        policy = policy_cls(policy_cfg, dataset_stats)
+        # HACK: We pass *args and **kwargs to the policy constructor to allow for additional arguments
+        # for example device for the sac policy.
+        policy = policy_cls(*args, **kwargs, config=policy_cfg, dataset_stats=dataset_stats)
    else:
        # Load a pretrained policy and override the config if needed (for example, if there are inference-time
        # hyperparameters that we want to vary).