mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-30 06:37:15 +00:00
refactor(jobs): check dataset presence with api.repo_exists
Replace the dataset_info try/except RepositoryNotFoundError dance with a direct api.repo_exists(repo_id, repo_type="dataset") call, dropping the httpx/RepositoryNotFoundError test scaffolding.
This commit is contained in:
@@ -24,8 +24,6 @@ from __future__ import annotations
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub.errors import RepositoryNotFoundError
|
||||
|
||||
|
||||
def ensure_dataset_available(repo_id: str, *, api, tags: list[str] | None = None) -> None:
|
||||
"""Ensure repo_id resolves on the Hub, pushing a local-only dataset privately first.
|
||||
@@ -34,11 +32,8 @@ def ensure_dataset_available(repo_id: str, *, api, tags: list[str] | None = None
|
||||
dataset is left untouched). Raises RuntimeError if the dataset is neither on
|
||||
the Hub nor in the local cache.
|
||||
"""
|
||||
try:
|
||||
api.dataset_info(repo_id)
|
||||
if api.repo_exists(repo_id, repo_type="dataset"):
|
||||
return
|
||||
except RepositoryNotFoundError:
|
||||
pass
|
||||
|
||||
cache_root = Path(os.environ.get("HF_LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
|
||||
local_present = (cache_root / repo_id / "meta" / "info.json").is_file()
|
||||
|
||||
@@ -15,25 +15,14 @@
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from huggingface_hub.errors import RepositoryNotFoundError
|
||||
|
||||
from lerobot.jobs.dataset import ensure_dataset_available
|
||||
|
||||
|
||||
def _repo_not_found() -> RepositoryNotFoundError:
|
||||
req = httpx.Request("GET", "https://huggingface.co/datasets/test")
|
||||
resp = httpx.Response(404, request=req)
|
||||
return RepositoryNotFoundError("nope", response=resp)
|
||||
|
||||
|
||||
def _api_with_dataset(exists: bool):
|
||||
api = MagicMock()
|
||||
if exists:
|
||||
api.dataset_info.return_value = object()
|
||||
else:
|
||||
api.dataset_info.side_effect = _repo_not_found()
|
||||
api.repo_exists.return_value = exists
|
||||
return api
|
||||
|
||||
|
||||
@@ -48,7 +37,7 @@ def _make_local_cache(tmp_path, repo_id: str) -> None:
|
||||
def test_dataset_already_on_hub_is_noop():
|
||||
api = _api_with_dataset(True)
|
||||
assert ensure_dataset_available("user/ds", api=api) is None
|
||||
api.dataset_info.assert_called_once_with("user/ds")
|
||||
api.repo_exists.assert_called_once_with("user/ds", repo_type="dataset")
|
||||
|
||||
|
||||
# Branch 2: not on Hub but present locally → always push privately.
|
||||
|
||||
Reference in New Issue
Block a user