From 3b797bb1184503419c38b015c011a204a5f70a47 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Wed, 29 Apr 2026 00:28:38 +0200 Subject: [PATCH] feat(annotate): --vlm.push_to_hub uploads the annotated dataset After the pipeline completes, optionally create/locate a dataset repo and upload the dataset root (excluding .annotate_staging/). Add push_private and push_commit_message knobs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../annotations/steerable_pipeline/config.py | 9 ++++++ src/lerobot/scripts/lerobot_annotate.py | 28 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 59965e970..356eed290 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -201,5 +201,14 @@ class AnnotationPipelineConfig: skip_validation: bool = False only_episodes: tuple[int, ...] | None = None + push_to_hub: str | None = None + """If set, after the pipeline completes, upload the annotated dataset + root to the Hugging Face Hub as a dataset repo with this id (e.g. + ``pepijn/super_poulain_steerable``). Creates the repo if missing.""" + push_private: bool = False + """When ``push_to_hub`` is set, create the repo as private.""" + push_commit_message: str | None = None + """Override the commit message used for the hub upload.""" + def resolved_staging_dir(self, root: Path) -> Path: return self.staging_dir if self.staging_dir is not None else root / ".annotate_staging" diff --git a/src/lerobot/scripts/lerobot_annotate.py b/src/lerobot/scripts/lerobot_annotate.py index 08c52fd82..6c2ccd72b 100644 --- a/src/lerobot/scripts/lerobot_annotate.py +++ b/src/lerobot/scripts/lerobot_annotate.py @@ -95,6 +95,34 @@ def annotate(cfg: AnnotationPipelineConfig) -> None: for w in summary.validation_report.warnings: logger.warning(w) + if cfg.push_to_hub: + _push_to_hub(root, cfg) + + +def _push_to_hub(root: Path, cfg: AnnotationPipelineConfig) -> None: + """Upload the annotated dataset directory to the Hugging Face Hub.""" + from huggingface_hub import HfApi # noqa: PLC0415 + + repo_id = cfg.push_to_hub + commit_message = cfg.push_commit_message or "Add steerable annotations (lerobot-annotate)" + api = HfApi() + print(f"[lerobot-annotate] creating/locating dataset repo {repo_id}...", flush=True) + api.create_repo( + repo_id=repo_id, + repo_type="dataset", + private=cfg.push_private, + exist_ok=True, + ) + print(f"[lerobot-annotate] uploading {root} -> {repo_id}...", flush=True) + api.upload_folder( + folder_path=str(root), + repo_id=repo_id, + repo_type="dataset", + commit_message=commit_message, + ignore_patterns=[".annotate_staging/**", "**/.DS_Store"], + ) + print(f"[lerobot-annotate] uploaded to https://huggingface.co/datasets/{repo_id}", flush=True) + def main() -> None: annotate()