mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 16:49:55 +00:00
feat(convert_dataset_v21_to_v3) added the use of more efficient Dataset.from_parquet and concatenate_datasets
This commit is contained in:
@@ -67,24 +67,6 @@ def save_dataset_to_safetensors(output_dir, repo_id="lerobot/pusht"):
|
||||
save_file(dataset[i - 2], repo_dir / f"frame_{i - 2}.safetensors")
|
||||
save_file(dataset[i - 1], repo_dir / f"frame_{i - 1}.safetensors")
|
||||
|
||||
# TODO(rcadene): Enable testing on second and last episode
|
||||
# We currently cant because our test dataset only contains the first episode
|
||||
|
||||
# # save 2 first frames of second episode
|
||||
# i = dataset.meta.episodes["dataset_from_index"][1].item()
|
||||
# save_file(dataset[i], repo_dir / f"frame_{i}.safetensors")
|
||||
# save_file(dataset[i + 1], repo_dir / f"frame_{i+1}.safetensors")
|
||||
|
||||
# # save 2 last frames of second episode
|
||||
# i = dataset.meta.episodes["dataset_to_index"][1].item()
|
||||
# save_file(dataset[i - 2], repo_dir / f"frame_{i-2}.safetensors")
|
||||
# save_file(dataset[i - 1], repo_dir / f"frame_{i-1}.safetensors")
|
||||
|
||||
# # save 2 last frames of last episode
|
||||
# i = dataset.meta.episodes["dataset_to_index"][-1].item()
|
||||
# save_file(dataset[i - 2], repo_dir / f"frame_{i-2}.safetensors")
|
||||
# save_file(dataset[i - 1], repo_dir / f"frame_{i-1}.safetensors")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for dataset in [
|
||||
|
||||
@@ -527,24 +527,6 @@ def test_backward_compatibility(repo_id):
|
||||
load_and_compare(i - 2)
|
||||
load_and_compare(i - 1)
|
||||
|
||||
# TODO(rcadene): Enable testing on second and last episode
|
||||
# We currently cant because our test dataset only contains the first episode
|
||||
|
||||
# # test 2 first frames of second episode
|
||||
# i = dataset.meta.episodes["dataset_from_index"][1].item()
|
||||
# load_and_compare(i)
|
||||
# load_and_compare(i + 1)
|
||||
|
||||
# # test 2 last frames of second episode
|
||||
# i = dataset.meta.episodes["dataset_to_index"][1].item()
|
||||
# load_and_compare(i - 2)
|
||||
# load_and_compare(i - 1)
|
||||
|
||||
# # test 2 last frames of last episode
|
||||
# i = dataset.meta.episodes["dataset_to_index"][-1].item()
|
||||
# load_and_compare(i - 2)
|
||||
# load_and_compare(i - 1)
|
||||
|
||||
|
||||
@pytest.mark.skip("Requires internet access")
|
||||
def test_create_branch():
|
||||
|
||||
Vendored
-19
@@ -218,25 +218,6 @@ def stats_factory():
|
||||
return _create_stats
|
||||
|
||||
|
||||
# @pytest.fixture(scope="session")
|
||||
# def episodes_stats_factory(stats_factory):
|
||||
# def _create_episodes_stats(
|
||||
# features: dict[str],
|
||||
# total_episodes: int = 3,
|
||||
# ) -> dict:
|
||||
|
||||
# def _generator(total_episodes):
|
||||
# for ep_idx in range(total_episodes):
|
||||
# flat_ep_stats = flatten_dict(stats_factory(features))
|
||||
# flat_ep_stats["episode_index"] = ep_idx
|
||||
# yield flat_ep_stats
|
||||
|
||||
# # Simpler to rely on generator instead of from_dict
|
||||
# return Dataset.from_generator(lambda: _generator(total_episodes))
|
||||
|
||||
# return _create_episodes_stats
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tasks_factory():
|
||||
def _create_tasks(total_tasks: int = 3) -> pd.DataFrame:
|
||||
|
||||
Reference in New Issue
Block a user