fix: add frame idx

This commit is contained in:
Francesco Capuano
2025-11-06 16:36:19 +00:00
committed by fracapuano
parent 14743b896e
commit c1f5d8f48f
+16 -1
View File
@@ -32,6 +32,7 @@ from lerobot.datasets.utils import (
DEFAULT_CHUNK_SIZE,
DEFAULT_DATA_FILE_SIZE_IN_MB,
DEFAULT_DATA_PATH,
DEFAULT_FEATURES,
DEFAULT_VIDEO_FILE_SIZE_IN_MB,
DEFAULT_VIDEO_PATH,
LEGACY_EPISODES_PATH,
@@ -57,6 +58,14 @@ from lerobot.utils.utils import init_logging
NEW_ROOT = Path("/fsx/jade_choghari/tmp/bb")
def fix_episode_dataframe(df: pd.DataFrame) -> pd.DataFrame:
# Inject per-episode frame_index if missing (0..N-1 within each episode)
if "frame_index" not in df.columns:
df["frame_index"] = range(len(df))
return df
def get_total_episodes_task(local_dir: Path, task_id: int, task_ranges: dict, step) -> int:
"""
Calculates the total number of episodes for a single, specified task.
@@ -88,6 +97,7 @@ def convert_info(
):
info = load_info(root)
info["codebase_version"] = "v3.0"
info["features"] = {**info["features"], **DEFAULT_FEATURES}
del info["total_videos"]
info["data_files_size_in_mb"] = data_file_size_in_mb
info["video_files_size_in_mb"] = video_file_size_in_mb
@@ -133,7 +143,12 @@ def convert_tasks(root, new_root, task_id: int):
def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
# TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
dataframes = [pd.read_parquet(file) for file in paths_to_cat]
dataframes = []
for file in paths_to_cat:
df = pd.read_parquet(file)
df = fix_episode_dataframe(df)
dataframes.append(df)
# Concatenate all DataFrames along rows
concatenated_df = pd.concat(dataframes, ignore_index=True)