feat(convert_dataset_v21_to_v3) added the use of more efficient Dataset.from_parquet and concatenate_datasets

This commit is contained in:
Michel Aractingi
2025-07-22 17:27:41 +02:00
parent 670d7f485f
commit 218ebed3ef
4 changed files with 9 additions and 68 deletions
-19
View File
@@ -218,25 +218,6 @@ def stats_factory():
return _create_stats
# @pytest.fixture(scope="session")
# def episodes_stats_factory(stats_factory):
# def _create_episodes_stats(
# features: dict[str],
# total_episodes: int = 3,
# ) -> dict:
# def _generator(total_episodes):
# for ep_idx in range(total_episodes):
# flat_ep_stats = flatten_dict(stats_factory(features))
# flat_ep_stats["episode_index"] = ep_idx
# yield flat_ep_stats
# # Simpler to rely on generator instead of from_dict
# return Dataset.from_generator(lambda: _generator(total_episodes))
# return _create_episodes_stats
@pytest.fixture(scope="session")
def tasks_factory():
def _create_tasks(total_tasks: int = 3) -> pd.DataFrame: