mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 20:50:02 +00:00
feat(annotate): --vlm.push_to_hub uploads the annotated dataset
After the pipeline completes, optionally create/locate a dataset repo and upload the dataset root (excluding .annotate_staging/). Add push_private and push_commit_message knobs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -201,5 +201,14 @@ class AnnotationPipelineConfig:
|
|||||||
skip_validation: bool = False
|
skip_validation: bool = False
|
||||||
only_episodes: tuple[int, ...] | None = None
|
only_episodes: tuple[int, ...] | None = None
|
||||||
|
|
||||||
|
push_to_hub: str | None = None
|
||||||
|
"""If set, after the pipeline completes, upload the annotated dataset
|
||||||
|
root to the Hugging Face Hub as a dataset repo with this id (e.g.
|
||||||
|
``pepijn/super_poulain_steerable``). Creates the repo if missing."""
|
||||||
|
push_private: bool = False
|
||||||
|
"""When ``push_to_hub`` is set, create the repo as private."""
|
||||||
|
push_commit_message: str | None = None
|
||||||
|
"""Override the commit message used for the hub upload."""
|
||||||
|
|
||||||
def resolved_staging_dir(self, root: Path) -> Path:
|
def resolved_staging_dir(self, root: Path) -> Path:
|
||||||
return self.staging_dir if self.staging_dir is not None else root / ".annotate_staging"
|
return self.staging_dir if self.staging_dir is not None else root / ".annotate_staging"
|
||||||
|
|||||||
@@ -95,6 +95,34 @@ def annotate(cfg: AnnotationPipelineConfig) -> None:
|
|||||||
for w in summary.validation_report.warnings:
|
for w in summary.validation_report.warnings:
|
||||||
logger.warning(w)
|
logger.warning(w)
|
||||||
|
|
||||||
|
if cfg.push_to_hub:
|
||||||
|
_push_to_hub(root, cfg)
|
||||||
|
|
||||||
|
|
||||||
|
def _push_to_hub(root: Path, cfg: AnnotationPipelineConfig) -> None:
|
||||||
|
"""Upload the annotated dataset directory to the Hugging Face Hub."""
|
||||||
|
from huggingface_hub import HfApi # noqa: PLC0415
|
||||||
|
|
||||||
|
repo_id = cfg.push_to_hub
|
||||||
|
commit_message = cfg.push_commit_message or "Add steerable annotations (lerobot-annotate)"
|
||||||
|
api = HfApi()
|
||||||
|
print(f"[lerobot-annotate] creating/locating dataset repo {repo_id}...", flush=True)
|
||||||
|
api.create_repo(
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="dataset",
|
||||||
|
private=cfg.push_private,
|
||||||
|
exist_ok=True,
|
||||||
|
)
|
||||||
|
print(f"[lerobot-annotate] uploading {root} -> {repo_id}...", flush=True)
|
||||||
|
api.upload_folder(
|
||||||
|
folder_path=str(root),
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="dataset",
|
||||||
|
commit_message=commit_message,
|
||||||
|
ignore_patterns=[".annotate_staging/**", "**/.DS_Store"],
|
||||||
|
)
|
||||||
|
print(f"[lerobot-annotate] uploaded to https://huggingface.co/datasets/{repo_id}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
annotate()
|
annotate()
|
||||||
|
|||||||
Reference in New Issue
Block a user