diff --git a/examples/annotations/run_hf_job.py b/examples/annotations/run_hf_job.py index f881d168d..8dd354c7d 100644 --- a/examples/annotations/run_hf_job.py +++ b/examples/annotations/run_hf_job.py @@ -20,10 +20,13 @@ Spawns one ``h200x2`` job that: 1. installs this branch of ``lerobot`` plus the annotation extras, 2. boots two vllm servers (one per GPU) with Qwen3.6-35B-A3B-FP8, 3. runs the plan / interjections / vqa modules across the dataset, - 4. uploads the annotated dataset back to ``--repo_id``. + 4. uploads the annotated dataset back to ``--repo_id`` (or to + ``--dest_repo_id`` when set). -``--repo_id`` is both the download source and, with ``--push_to_hub=true``, -the upload destination — the job annotates the dataset in place. +``--repo_id`` is the download source and, with ``--push_to_hub=true``, also +the default upload destination — the job annotates the dataset in place. +Pass ``--dest_repo_id`` to push the result to a separate repo instead and +leave the source untouched. Usage: @@ -53,9 +56,11 @@ CMD = ( "export VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 && " "export VLLM_VIDEO_BACKEND=pyav && " "lerobot-annotate " - # The dataset to annotate; also the push destination (annotate in place). + # The dataset to annotate. By default it is also the push destination + # (annotate in place); pass --dest_repo_id to push to a separate repo. "--repo_id=/ " "--push_to_hub=true " + # "--dest_repo_id=/ " "--vlm.backend=openai " "--vlm.model_id=Qwen/Qwen3.6-35B-A3B-FP8 " "--vlm.parallel_servers=2 " diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py index 1cf59cced..8ff8fce78 100644 --- a/src/lerobot/annotations/steerable_pipeline/config.py +++ b/src/lerobot/annotations/steerable_pipeline/config.py @@ -163,8 +163,15 @@ class AnnotationPipelineConfig: """ # Hub dataset id. Used as the download source when ``root`` is unset, - # and as the destination repo when ``push_to_hub`` is enabled. + # and as the destination repo when ``push_to_hub`` is enabled and + # ``dest_repo_id`` is unset. repo_id: str | None = None + + # Optional separate Hub dataset id to push the annotated result to. When + # unset, ``push_to_hub`` uploads back to ``repo_id`` (annotate in place); + # when set, the source ``repo_id`` is left untouched. + dest_repo_id: str | None = None + root: Path | None = None # Defaults to ``/.annotate_staging/`` when unset. @@ -182,8 +189,9 @@ class AnnotationPipelineConfig: skip_validation: bool = False only_episodes: tuple[int, ...] | None = None - # When True, upload the annotated dataset back to ``repo_id`` on the - # Hugging Face Hub. ``repo_id`` must be set for this to take effect. + # When True, upload the annotated dataset to the Hugging Face Hub: + # to ``dest_repo_id`` if set, otherwise back to ``repo_id``. One of + # the two must be set for this to take effect. push_to_hub: bool = False push_private: bool = False push_commit_message: str | None = None diff --git a/src/lerobot/scripts/lerobot_annotate.py b/src/lerobot/scripts/lerobot_annotate.py index 99edf9990..5bc91a242 100644 --- a/src/lerobot/scripts/lerobot_annotate.py +++ b/src/lerobot/scripts/lerobot_annotate.py @@ -113,16 +113,21 @@ def annotate(cfg: AnnotationPipelineConfig) -> None: logger.warning(w) if cfg.push_to_hub: - if cfg.repo_id is None: - raise ValueError("--push_to_hub requires --repo_id (the dataset repo to push to).") + if cfg.repo_id is None and cfg.dest_repo_id is None: + raise ValueError( + "--push_to_hub requires --repo_id or --dest_repo_id (the dataset repo to push to)." + ) _push_to_hub(root, cfg) def _push_to_hub(root: Path, cfg: AnnotationPipelineConfig) -> None: - """Upload the annotated dataset directory back to ``cfg.repo_id`` on the Hub.""" + """Upload the annotated dataset directory to the Hub. + + Pushes to ``cfg.dest_repo_id`` when set, otherwise back to ``cfg.repo_id``. + """ from huggingface_hub import HfApi # noqa: PLC0415 - repo_id = cfg.repo_id + repo_id = cfg.dest_repo_id or cfg.repo_id commit_message = cfg.push_commit_message or "Add steerable annotations (lerobot-annotate)" api = HfApi() print(f"[lerobot-annotate] creating/locating dataset repo {repo_id}...", flush=True)