From 3a05512ca25227906798419a42fcdd1531883fd7 Mon Sep 17 00:00:00 2001 From: Nicolas Rabault Date: Wed, 24 Jun 2026 11:09:58 +0200 Subject: [PATCH] refactor(jobs): use HF_LEROBOT_HOME constant for the local cache root Resolve the local dataset cache via lerobot.utils.constants.HF_LEROBOT_HOME instead of re-reading the env var by hand, dropping the os/Path imports. Tests now patch the imported constant and assert on a stable message substring (the previous "neither" match only passed by accident, matching the test name embedded in the pytest tmp_path). --- src/lerobot/jobs/dataset.py | 12 ++++++------ tests/jobs/test_dataset.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lerobot/jobs/dataset.py b/src/lerobot/jobs/dataset.py index cb124fec5..fcccfa3c1 100644 --- a/src/lerobot/jobs/dataset.py +++ b/src/lerobot/jobs/dataset.py @@ -21,10 +21,10 @@ local-only dataset is pushed to a PRIVATE repo first (never public). from __future__ import annotations -import os -from pathlib import Path from typing import TYPE_CHECKING +from lerobot.utils.constants import HF_LEROBOT_HOME + if TYPE_CHECKING: from huggingface_hub import HfApi @@ -39,12 +39,12 @@ def ensure_dataset_available(repo_id: str, *, api: HfApi, tags: list[str] | None if api.repo_exists(repo_id, repo_type="dataset"): return - cache_root = Path(os.environ.get("HF_LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser() - local_present = (cache_root / repo_id / "meta" / "info.json").is_file() + local_present = (HF_LEROBOT_HOME / repo_id / "meta" / "info.json").is_file() if not local_present: raise RuntimeError( - f"Dataset '{repo_id}' is neither on the Hub nor in the local cache " - f"({cache_root}). Record or download it first." + f"Dataset '{repo_id}' is not in the local cache ({HF_LEROBOT_HOME}) and could not be " + f"reached on the Hub — it may not exist, or be private and inaccessible with your " + f"token. Record or download it first, or run `hf auth login`." ) print(f"[dataset] '{repo_id}' is local-only; pushing to a PRIVATE Hub repo...") diff --git a/tests/jobs/test_dataset.py b/tests/jobs/test_dataset.py index e89cc1805..9d82caf99 100644 --- a/tests/jobs/test_dataset.py +++ b/tests/jobs/test_dataset.py @@ -42,7 +42,7 @@ def test_dataset_already_on_hub_is_noop(): # Branch 2: not on Hub but present locally → always push privately. def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch): - monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path)) + monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path) _make_local_cache(tmp_path, "user/ds") api = _api_with_dataset(False) @@ -57,11 +57,11 @@ def test_dataset_local_only_uploads_privately(tmp_path, monkeypatch): mock_ds_cls.return_value.push_to_hub.assert_called_once_with(private=True, tags=["lerobot", "lelab"]) -# Branch 3: not on Hub, NOT in local cache → RuntimeError "neither". +# Branch 3: not on Hub, NOT in local cache → RuntimeError. def test_dataset_neither_on_hub_nor_local_raises(tmp_path, monkeypatch): - monkeypatch.setenv("HF_LEROBOT_HOME", str(tmp_path)) + monkeypatch.setattr("lerobot.jobs.dataset.HF_LEROBOT_HOME", tmp_path) # tmp_path is empty — no local cache. api = _api_with_dataset(False) - with pytest.raises(RuntimeError, match="neither"): + with pytest.raises(RuntimeError, match="not in the local cache"): ensure_dataset_available("user/ds", api=api)