mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 12:40:08 +00:00
fix(datasets,annotate): tag pushed dataset + clean revision error
Two bugs combining to make the brand-new ``_tool3`` dataset unloadable: 1. ``lerobot_annotate.py:_push_to_hub`` uploads the annotated dataset folder but never creates a codebase-version tag, so ``api/datasets/<repo>/refs`` returns ``"tags": []``. Then ``LeRobotDatasetMetadata`` → ``get_safe_version`` → ``get_repo_versions`` returns empty and the loader raises ``RevisionNotFoundError``. 2. ``RevisionNotFoundError`` itself was unconstructible: its ``HfHubHTTPError.__init__`` indexes ``response.headers`` unconditionally on current ``huggingface_hub`` versions, so constructing it without a real ``Response`` blew up with ``AttributeError: 'NoneType' object has no attribute 'headers'``, masking the real "no tag" message. Fix #1: after upload, read ``meta/info.json["codebase_version"]`` and ``HfApi.create_tag(..., tag=<v3.x>, repo_type='dataset', exist_ok=True)`` so the dataset is loadable straight from the Hub on the next ``LeRobotDataset(repo_id)`` call. Falls back to the in-tree ``CODEBASE_VERSION`` if info.json is missing/malformed; on tag creation failure, prints the manual one-liner the user needs. Fix #2: stop trying to instantiate ``RevisionNotFoundError`` (which inherits HfHubHTTPError) for what is really a config issue, not an HTTP failure. Raise plain ``RuntimeError`` with the same message — the caller actually sees what's wrong instead of an upstream attribute error. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -238,24 +238,23 @@ def get_safe_version(repo_id: str, version: str | packaging.version.Version) ->
|
|||||||
|
|
||||||
if not hub_versions:
|
if not hub_versions:
|
||||||
msg = (
|
msg = (
|
||||||
f"Repo {repo_id!r} has no codebase-version tags. "
|
f"Repo {repo_id!r} has no codebase-version tags. The dataset "
|
||||||
f"Either the dataset doesn't exist on the Hub yet, or it was "
|
f"either doesn't exist on the Hub yet, or it was uploaded "
|
||||||
f"pushed without a version tag. To tag an existing dataset:\n"
|
f"without a ``v3.x``-style tag. To tag an existing dataset run:\n"
|
||||||
f"```python\n"
|
f" from huggingface_hub import HfApi\n"
|
||||||
f"from huggingface_hub import HfApi\n"
|
f" HfApi().create_tag({repo_id!r}, tag='v3.0', repo_type='dataset', exist_ok=True)"
|
||||||
f"HfApi().create_tag({repo_id!r}, tag='_version_', repo_type='dataset')\n"
|
|
||||||
f"```"
|
|
||||||
)
|
)
|
||||||
# ``RevisionNotFoundError`` extends ``HfHubHTTPError`` which on
|
# ``RevisionNotFoundError`` extends ``HfHubHTTPError`` whose
|
||||||
# newer ``huggingface_hub`` versions makes ``response`` a required
|
# ``__init__`` indexes ``response.headers`` unconditionally on
|
||||||
# keyword arg. Pass ``response=None`` explicitly so this raises
|
# current ``huggingface_hub`` versions. Constructing it without
|
||||||
# with a clean message instead of an upstream
|
# a real ``Response`` object crashes with either
|
||||||
# ``TypeError: __init__() missing 1 required keyword-only argument: 'response'``.
|
# ``TypeError: missing 1 required keyword-only argument`` (old
|
||||||
try:
|
# builds) or ``AttributeError: 'NoneType' object has no attribute
|
||||||
raise RevisionNotFoundError(msg, response=None)
|
# 'headers'`` (new builds). Skip that path entirely — this isn't
|
||||||
except TypeError:
|
# really an HTTP error, it's a configuration issue — and raise a
|
||||||
# Older ``huggingface_hub`` (no ``response`` kwarg).
|
# plain ``RuntimeError`` so the message actually reaches the
|
||||||
raise RevisionNotFoundError(msg) # noqa: B904
|
# caller.
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
|
||||||
if target_version in hub_versions:
|
if target_version in hub_versions:
|
||||||
return f"v{target_version}"
|
return f"v{target_version}"
|
||||||
|
|||||||
@@ -141,6 +141,43 @@ def _push_to_hub(root: Path, cfg: AnnotationPipelineConfig) -> None:
|
|||||||
)
|
)
|
||||||
print(f"[lerobot-annotate] uploaded to https://huggingface.co/datasets/{repo_id}", flush=True)
|
print(f"[lerobot-annotate] uploaded to https://huggingface.co/datasets/{repo_id}", flush=True)
|
||||||
|
|
||||||
|
# Tag the upload with the codebase version. ``LeRobotDatasetMetadata``
|
||||||
|
# resolves the dataset revision via ``get_safe_version`` which scans
|
||||||
|
# for tags like ``v3.0``; without a tag it raises
|
||||||
|
# ``RevisionNotFoundError``. Read the version straight from the
|
||||||
|
# dataset's own ``meta/info.json`` so we tag whatever the writer
|
||||||
|
# actually wrote (no accidental drift if the codebase floor moves).
|
||||||
|
from lerobot.datasets.dataset_metadata import CODEBASE_VERSION # noqa: PLC0415
|
||||||
|
|
||||||
|
info_path = root / "meta" / "info.json"
|
||||||
|
version_tag = CODEBASE_VERSION
|
||||||
|
if info_path.exists():
|
||||||
|
try:
|
||||||
|
from lerobot.utils.io_utils import load_json # noqa: PLC0415
|
||||||
|
|
||||||
|
info = load_json(info_path)
|
||||||
|
ds_version = info.get("codebase_version")
|
||||||
|
if isinstance(ds_version, str) and ds_version.startswith("v"):
|
||||||
|
version_tag = ds_version
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
print(f"[lerobot-annotate] could not read codebase_version from info.json ({exc}); falling back to {version_tag}", flush=True)
|
||||||
|
try:
|
||||||
|
api.create_tag(
|
||||||
|
repo_id=repo_id,
|
||||||
|
tag=version_tag,
|
||||||
|
repo_type="dataset",
|
||||||
|
exist_ok=True,
|
||||||
|
)
|
||||||
|
print(f"[lerobot-annotate] tagged {repo_id} as {version_tag}", flush=True)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
print(
|
||||||
|
f"[lerobot-annotate] WARNING: could not create tag {version_tag!r} on {repo_id}: {exc}. "
|
||||||
|
"Dataset is uploaded but ``LeRobotDataset`` won't be able to load it until it's tagged. "
|
||||||
|
"Run: from huggingface_hub import HfApi; "
|
||||||
|
f"HfApi().create_tag({repo_id!r}, tag={version_tag!r}, repo_type='dataset', exist_ok=True)",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
annotate()
|
annotate()
|
||||||
|
|||||||
Reference in New Issue
Block a user