mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
chore(docstrings): updating v2.1-v3.0 conversion script docstrings to match the new task label (#3077)
* chore(docstrings): updating v2.1-v3.0 conversion script docstrings to match the new task label * chore(task): renamming the default index label in the tasks DataFrame to task * Revert "chore(docstrings): updating v2.1-v3.0 conversion script docstrings to match the new task label" This reverts commit f55de3255278f23f18b5d955565f6768d094951d. * chore(docstrings): updating docstrings to match dataset v3.0 architecture * chore(format): formatting code
This commit is contained in:
@@ -289,7 +289,9 @@ def aggregate_datasets(
|
|||||||
|
|
||||||
logging.info("Find all tasks")
|
logging.info("Find all tasks")
|
||||||
unique_tasks = pd.concat([m.tasks for m in all_metadata]).index.unique()
|
unique_tasks = pd.concat([m.tasks for m in all_metadata]).index.unique()
|
||||||
dst_meta.tasks = pd.DataFrame({"task_index": range(len(unique_tasks))}, index=unique_tasks)
|
dst_meta.tasks = pd.DataFrame(
|
||||||
|
{"task_index": range(len(unique_tasks))}, index=pd.Index(unique_tasks, name="task")
|
||||||
|
)
|
||||||
|
|
||||||
meta_idx = {"chunk": 0, "file": 0}
|
meta_idx = {"chunk": 0, "file": 0}
|
||||||
data_idx = {"chunk": 0, "file": 0}
|
data_idx = {"chunk": 0, "file": 0}
|
||||||
|
|||||||
@@ -1475,7 +1475,9 @@ def modify_tasks(
|
|||||||
|
|
||||||
# Collect all unique tasks and create new task mapping
|
# Collect all unique tasks and create new task mapping
|
||||||
unique_tasks = sorted(set(episode_to_task.values()))
|
unique_tasks = sorted(set(episode_to_task.values()))
|
||||||
new_task_df = pd.DataFrame({"task_index": list(range(len(unique_tasks)))}, index=unique_tasks)
|
new_task_df = pd.DataFrame(
|
||||||
|
{"task_index": list(range(len(unique_tasks)))}, index=pd.Index(unique_tasks, name="task")
|
||||||
|
)
|
||||||
task_to_index = {task: idx for idx, task in enumerate(unique_tasks)}
|
task_to_index = {task: idx for idx, task in enumerate(unique_tasks)}
|
||||||
|
|
||||||
logging.info(f"Modifying tasks in {dataset.repo_id}")
|
logging.info(f"Modifying tasks in {dataset.repo_id}")
|
||||||
|
|||||||
@@ -314,7 +314,7 @@ class LeRobotDatasetMetadata:
|
|||||||
if self.tasks is None:
|
if self.tasks is None:
|
||||||
new_tasks = tasks
|
new_tasks = tasks
|
||||||
task_indices = range(len(tasks))
|
task_indices = range(len(tasks))
|
||||||
self.tasks = pd.DataFrame({"task_index": task_indices}, index=tasks)
|
self.tasks = pd.DataFrame({"task_index": task_indices}, index=pd.Index(tasks, name="task"))
|
||||||
else:
|
else:
|
||||||
new_tasks = [task for task in tasks if task not in self.tasks.index]
|
new_tasks = [task for task in tasks if task not in self.tasks.index]
|
||||||
new_task_indices = range(len(self.tasks), len(self.tasks) + len(new_tasks))
|
new_task_indices = range(len(self.tasks), len(self.tasks) + len(new_tasks))
|
||||||
|
|||||||
@@ -341,6 +341,7 @@ def write_tasks(tasks: pandas.DataFrame, local_dir: Path) -> None:
|
|||||||
|
|
||||||
def load_tasks(local_dir: Path) -> pandas.DataFrame:
|
def load_tasks(local_dir: Path) -> pandas.DataFrame:
|
||||||
tasks = pd.read_parquet(local_dir / DEFAULT_TASKS_PATH)
|
tasks = pd.read_parquet(local_dir / DEFAULT_TASKS_PATH)
|
||||||
|
tasks.index.name = "task"
|
||||||
return tasks
|
return tasks
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ episodes.jsonl
|
|||||||
{"episode_index": 1, "tasks": ["Put the blue block in the green bowl"], "length": 266}
|
{"episode_index": 1, "tasks": ["Put the blue block in the green bowl"], "length": 266}
|
||||||
|
|
||||||
NEW
|
NEW
|
||||||
meta/episodes/chunk-000/episodes_000.parquet
|
meta/episodes/chunk-000/file_000.parquet
|
||||||
episode_index | video_chunk_index | video_file_index | data_chunk_index | data_file_index | tasks | length
|
episode_index | video_chunk_index | video_file_index | data_chunk_index | data_file_index | tasks | length
|
||||||
-------------------------
|
-------------------------
|
||||||
OLD
|
OLD
|
||||||
@@ -116,15 +116,16 @@ tasks.jsonl
|
|||||||
{"task_index": 1, "task": "Put the blue block in the green bowl"}
|
{"task_index": 1, "task": "Put the blue block in the green bowl"}
|
||||||
|
|
||||||
NEW
|
NEW
|
||||||
meta/tasks/chunk-000/file_000.parquet
|
meta/tasks.parquet
|
||||||
task_index | task
|
task_index | task
|
||||||
-------------------------
|
-------------------------
|
||||||
OLD
|
OLD
|
||||||
episodes_stats.jsonl
|
episodes_stats.jsonl
|
||||||
|
{"episode_index": 1, "stats": {"feature_name": {"min": ..., "max": ..., "mean": ..., "std": ..., "count": ...}}}
|
||||||
|
|
||||||
NEW
|
NEW
|
||||||
meta/episodes_stats/chunk-000/file_000.parquet
|
meta/episodes/chunk-000/file_000.parquet
|
||||||
episode_index | mean | std | min | max
|
episode_index | feature_name/min | feature_name/max | feature_name/mean | feature_name/std | feature_name/count
|
||||||
-------------------------
|
-------------------------
|
||||||
UPDATE
|
UPDATE
|
||||||
meta/info.json
|
meta/info.json
|
||||||
@@ -173,7 +174,7 @@ def convert_tasks(root, new_root):
|
|||||||
tasks, _ = legacy_load_tasks(root)
|
tasks, _ = legacy_load_tasks(root)
|
||||||
task_indices = tasks.keys()
|
task_indices = tasks.keys()
|
||||||
task_strings = tasks.values()
|
task_strings = tasks.values()
|
||||||
df_tasks = pd.DataFrame({"task_index": task_indices}, index=task_strings)
|
df_tasks = pd.DataFrame({"task_index": task_indices}, index=pd.Index(task_strings, name="task"))
|
||||||
write_tasks(df_tasks, new_root)
|
write_tasks(df_tasks, new_root)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Vendored
+1
-1
@@ -222,7 +222,7 @@ def tasks_factory():
|
|||||||
def _create_tasks(total_tasks: int = 3) -> pd.DataFrame:
|
def _create_tasks(total_tasks: int = 3) -> pd.DataFrame:
|
||||||
ids = list(range(total_tasks))
|
ids = list(range(total_tasks))
|
||||||
tasks = [f"Perform action {i}." for i in ids]
|
tasks = [f"Perform action {i}." for i in ids]
|
||||||
df = pd.DataFrame({"task_index": ids}, index=tasks)
|
df = pd.DataFrame({"task_index": ids}, index=pd.Index(tasks, name="task"))
|
||||||
return df
|
return df
|
||||||
|
|
||||||
return _create_tasks
|
return _create_tasks
|
||||||
|
|||||||
Reference in New Issue
Block a user