mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-30 06:37:15 +00:00
refactor(jobs): use HF_LEROBOT_HOME constant for the local cache root
Resolve the local dataset cache via lerobot.utils.constants.HF_LEROBOT_HOME instead of re-reading the env var by hand, dropping the os/Path imports. Tests now patch the imported constant and assert on a stable message substring (the previous "neither" match only passed by accident, matching the test name embedded in the pytest tmp_path).
This commit is contained in:
@@ -21,10 +21,10 @@ local-only dataset is pushed to a PRIVATE repo first (never public).
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from lerobot.utils.constants import HF_LEROBOT_HOME
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
@@ -39,12 +39,12 @@ def ensure_dataset_available(repo_id: str, *, api: HfApi, tags: list[str] | None
|
||||
if api.repo_exists(repo_id, repo_type="dataset"):
|
||||
return
|
||||
|
||||
cache_root = Path(os.environ.get("HF_LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
|
||||
local_present = (cache_root / repo_id / "meta" / "info.json").is_file()
|
||||
local_present = (HF_LEROBOT_HOME / repo_id / "meta" / "info.json").is_file()
|
||||
if not local_present:
|
||||
raise RuntimeError(
|
||||
f"Dataset '{repo_id}' is neither on the Hub nor in the local cache "
|
||||
f"({cache_root}). Record or download it first."
|
||||
f"Dataset '{repo_id}' is not in the local cache ({HF_LEROBOT_HOME}) and could not be "
|
||||
f"reached on the Hub — it may not exist, or be private and inaccessible with your "
|
||||
f"token. Record or download it first, or run `hf auth login`."
|
||||
)
|
||||
|
||||
print(f"[dataset] '{repo_id}' is local-only; pushing to a PRIVATE Hub repo...")
|
||||
|
||||
@@ -42,7 +42,7 @@ def test_dataset_already_on_hub_is_noop():
|
||||
|
||||
# Branch 2: not on Hub but present locally → always push privately.
|
||||
def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path))
|
||||
monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path)
|
||||
_make_local_cache(tmp_path, "user/ds")
|
||||
|
||||
api = _api_with_dataset(False)
|
||||
@@ -57,11 +57,11 @@ def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch):
|
||||
mock_ds_cls.return_value.push_to_hub.assert_called_once_with(private=True, tags=["lerobot", "lelab"])
|
||||
|
||||
|
||||
# Branch 3: not on Hub, NOT in local cache → RuntimeError "neither".
|
||||
# Branch 3: not on Hub, NOT in local cache → RuntimeError.
|
||||
def test_dataset_neither_on_hub_nor_local_raises(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path))
|
||||
monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path)
|
||||
# tmp_path is empty — no local cache.
|
||||
|
||||
api = _api_with_dataset(False)
|
||||
with pytest.raises(RuntimeError, match="neither"):
|
||||
with pytest.raises(RuntimeError, match="not in the local cache"):
|
||||
ensure_dataset_available("user/ds", api=api)
|
||||
|
||||
Reference in New Issue
Block a user