diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index 8cbf89fa8..d296554d6 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -238,24 +238,23 @@ def get_safe_version(repo_id: str, version: str | packaging.version.Version) -> if not hub_versions: msg = ( - f"Repo {repo_id!r} has no codebase-version tags. " - f"Either the dataset doesn't exist on the Hub yet, or it was " - f"pushed without a version tag. To tag an existing dataset:\n" - f"```python\n" - f"from huggingface_hub import HfApi\n" - f"HfApi().create_tag({repo_id!r}, tag='_version_', repo_type='dataset')\n" - f"```" + f"Repo {repo_id!r} has no codebase-version tags. The dataset " + f"either doesn't exist on the Hub yet, or it was uploaded " + f"without a ``v3.x``-style tag. To tag an existing dataset run:\n" + f" from huggingface_hub import HfApi\n" + f" HfApi().create_tag({repo_id!r}, tag='v3.0', repo_type='dataset', exist_ok=True)" ) - # ``RevisionNotFoundError`` extends ``HfHubHTTPError`` which on - # newer ``huggingface_hub`` versions makes ``response`` a required - # keyword arg. Pass ``response=None`` explicitly so this raises - # with a clean message instead of an upstream - # ``TypeError: __init__() missing 1 required keyword-only argument: 'response'``. - try: - raise RevisionNotFoundError(msg, response=None) - except TypeError: - # Older ``huggingface_hub`` (no ``response`` kwarg). - raise RevisionNotFoundError(msg) # noqa: B904 + # ``RevisionNotFoundError`` extends ``HfHubHTTPError`` whose + # ``__init__`` indexes ``response.headers`` unconditionally on + # current ``huggingface_hub`` versions. Constructing it without + # a real ``Response`` object crashes with either + # ``TypeError: missing 1 required keyword-only argument`` (old + # builds) or ``AttributeError: 'NoneType' object has no attribute + # 'headers'`` (new builds). Skip that path entirely — this isn't + # really an HTTP error, it's a configuration issue — and raise a + # plain ``RuntimeError`` so the message actually reaches the + # caller. + raise RuntimeError(msg) if target_version in hub_versions: return f"v{target_version}" diff --git a/src/lerobot/scripts/lerobot_annotate.py b/src/lerobot/scripts/lerobot_annotate.py index 61148b1a4..b58ea26a2 100644 --- a/src/lerobot/scripts/lerobot_annotate.py +++ b/src/lerobot/scripts/lerobot_annotate.py @@ -141,6 +141,43 @@ def _push_to_hub(root: Path, cfg: AnnotationPipelineConfig) -> None: ) print(f"[lerobot-annotate] uploaded to https://huggingface.co/datasets/{repo_id}", flush=True) + # Tag the upload with the codebase version. ``LeRobotDatasetMetadata`` + # resolves the dataset revision via ``get_safe_version`` which scans + # for tags like ``v3.0``; without a tag it raises + # ``RevisionNotFoundError``. Read the version straight from the + # dataset's own ``meta/info.json`` so we tag whatever the writer + # actually wrote (no accidental drift if the codebase floor moves). + from lerobot.datasets.dataset_metadata import CODEBASE_VERSION # noqa: PLC0415 + + info_path = root / "meta" / "info.json" + version_tag = CODEBASE_VERSION + if info_path.exists(): + try: + from lerobot.utils.io_utils import load_json # noqa: PLC0415 + + info = load_json(info_path) + ds_version = info.get("codebase_version") + if isinstance(ds_version, str) and ds_version.startswith("v"): + version_tag = ds_version + except Exception as exc: # noqa: BLE001 + print(f"[lerobot-annotate] could not read codebase_version from info.json ({exc}); falling back to {version_tag}", flush=True) + try: + api.create_tag( + repo_id=repo_id, + tag=version_tag, + repo_type="dataset", + exist_ok=True, + ) + print(f"[lerobot-annotate] tagged {repo_id} as {version_tag}", flush=True) + except Exception as exc: # noqa: BLE001 + print( + f"[lerobot-annotate] WARNING: could not create tag {version_tag!r} on {repo_id}: {exc}. " + "Dataset is uploaded but ``LeRobotDataset`` won't be able to load it until it's tagged. " + "Run: from huggingface_hub import HfApi; " + f"HfApi().create_tag({repo_id!r}, tag={version_tag!r}, repo_type='dataset', exist_ok=True)", + flush=True, + ) + def main() -> None: annotate()