refactor(jobs): use HF_LEROBOT_HOME constant for the local cache root

Resolve the local dataset cache via lerobot.utils.constants.HF_LEROBOT_HOME
instead of re-reading the env var by hand, dropping the os/Path imports.
Tests now patch the imported constant and assert on a stable message
substring (the previous "neither" match only passed by accident, matching
the test name embedded in the pytest tmp_path).
This commit is contained in:
Nicolas Rabault
2026-06-24 11:09:58 +02:00
parent 7da560e5c3
commit 3a05512ca2
2 changed files with 10 additions and 10 deletions
+6 -6
View File
@@ -21,10 +21,10 @@ local-only dataset is pushed to a PRIVATE repo first (never public).
from __future__ import annotations
import os
from pathlib import Path
from typing import TYPE_CHECKING
from lerobot.utils.constants import HF_LEROBOT_HOME
if TYPE_CHECKING:
from huggingface_hub import HfApi
@@ -39,12 +39,12 @@ def ensure_dataset_available(repo_id: str, *, api: HfApi, tags: list[str] | None
if api.repo_exists(repo_id, repo_type="dataset"):
return
cache_root = Path(os.environ.get("HF_LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser()
local_present = (cache_root / repo_id / "meta" / "info.json").is_file()
local_present = (HF_LEROBOT_HOME / repo_id / "meta" / "info.json").is_file()
if not local_present:
raise RuntimeError(
f"Dataset '{repo_id}' is neither on the Hub nor in the local cache "
f"({cache_root}). Record or download it first."
f"Dataset '{repo_id}' is not in the local cache ({HF_LEROBOT_HOME}) and could not be "
f"reached on the Hub — it may not exist, or be private and inaccessible with your "
f"token. Record or download it first, or run `hf auth login`."
)
print(f"[dataset] '{repo_id}' is local-only; pushing to a PRIVATE Hub repo...")
+4 -4
View File
@@ -42,7 +42,7 @@ def test_dataset_already_on_hub_is_noop():
# Branch 2: not on Hub but present locally → always push privately.
def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch):
monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path))
monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path)
_make_local_cache(tmp_path, "user/ds")
api = _api_with_dataset(False)
@@ -57,11 +57,11 @@ def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch):
mock_ds_cls.return_value.push_to_hub.assert_called_once_with(private=True, tags=["lerobot", "lelab"])
# Branch 3: not on Hub, NOT in local cache → RuntimeError "neither".
# Branch 3: not on Hub, NOT in local cache → RuntimeError.
def test_dataset_neither_on_hub_nor_local_raises(tmp_path, monkeypatch):
monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path))
monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path)
# tmp_path is empty — no local cache.
api = _api_with_dataset(False)
with pytest.raises(RuntimeError, match="neither"):
with pytest.raises(RuntimeError, match="not in the local cache"):
ensure_dataset_available("user/ds", api=api)