mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 02:00:03 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8008cb357d | |||
| ca5a4a7ae5 | |||
| b5dcd70d2c |
@@ -83,11 +83,11 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Remove Tags with Git dependencies
|
- name: Remove Tags with Git dependencies
|
||||||
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies.
|
# TODO(Steven): Temporary patch to remove libero and pi from PyPi 0.4.0 release due to its reliance on git dependencies.
|
||||||
run: |
|
run: |
|
||||||
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
|
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
|
||||||
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
|
grep -E '@ git\+https|lerobot\[pi\]|lerobot\[libero\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
|
||||||
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml
|
sed -E -i '/@ git\+https|lerobot\[pi\]|lerobot\[libero\]/d' pyproject.toml
|
||||||
echo "::info:: Git dependencies removed. Proceeding with build."
|
echo "::info:: Git dependencies removed. Proceeding with build."
|
||||||
|
|
||||||
- name: Install build dependencies
|
- name: Install build dependencies
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ jobs:
|
|||||||
echo "Dependencies unbound:" && cat pyproject.toml
|
echo "Dependencies unbound:" && cat pyproject.toml
|
||||||
|
|
||||||
- name: Install lerobot with all extras
|
- name: Install lerobot with all extras
|
||||||
run: uv sync --all-extras --no-extra groot # TODO(Steven): Make flash-attn optional
|
run: uv sync --all-extras
|
||||||
|
|
||||||
- name: Run pytest (all extras)
|
- name: Run pytest (all extras)
|
||||||
run: uv run pytest tests -vv
|
run: uv run pytest tests -vv
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ For a full list of optional dependencies, see:
|
|||||||
https://pypi.org/project/lerobot/
|
https://pypi.org/project/lerobot/
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> For lerobot 0.4.0, if you want to install pi tags, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`.
|
> For lerobot 0.4.0, if you want to install libero or pi tags, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`.
|
||||||
>
|
>
|
||||||
> This will be solved in the next patch release
|
> This will be solved in the next patch release
|
||||||
|
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ For a full list of optional dependencies, see:
|
|||||||
https://pypi.org/project/lerobot/
|
https://pypi.org/project/lerobot/
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`
|
> For lerobot 0.4.0, if you want to install libero or pi, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
|
|
||||||
|
|||||||
@@ -28,6 +28,11 @@ LIBERO is now part of our **multi-eval supported simulation**, meaning you can b
|
|||||||
To Install LIBERO, after following LeRobot official instructions, just do:
|
To Install LIBERO, after following LeRobot official instructions, just do:
|
||||||
`pip install -e ".[libero]"`
|
`pip install -e ".[libero]"`
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> For lerobot 0.4.0, if you want to install libero tag, you will have to do: `pip install "lerobot[libero]@git+https://github.com/huggingface/lerobot.git"`.
|
||||||
|
>
|
||||||
|
> This will be solved in the next patch release
|
||||||
|
|
||||||
### Single-suite evaluation
|
### Single-suite evaluation
|
||||||
|
|
||||||
Evaluate a policy on one LIBERO suite:
|
Evaluate a policy on one LIBERO suite:
|
||||||
|
|||||||
@@ -940,26 +940,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
return query_timestamps
|
return query_timestamps
|
||||||
|
|
||||||
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
|
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
|
||||||
"""
|
return {
|
||||||
Query dataset for indices across keys, skipping video keys.
|
key: torch.stack(self.hf_dataset[q_idx][key])
|
||||||
|
for key, q_idx in query_indices.items()
|
||||||
Tries column-first [key][indices] for speed, falls back to row-first.
|
if key not in self.meta.video_keys
|
||||||
|
}
|
||||||
Args:
|
|
||||||
query_indices: Dict mapping keys to index lists to retrieve
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict with stacked tensors of queried data (video keys excluded)
|
|
||||||
"""
|
|
||||||
result: dict = {}
|
|
||||||
for key, q_idx in query_indices.items():
|
|
||||||
if key in self.meta.video_keys:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
result[key] = torch.stack(self.hf_dataset[key][q_idx])
|
|
||||||
except (KeyError, TypeError, IndexError):
|
|
||||||
result[key] = torch.stack(self.hf_dataset[q_idx][key])
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
|
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
|
||||||
"""Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
|
"""Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
|
||||||
|
|||||||
@@ -50,9 +50,9 @@ from typing import Any
|
|||||||
|
|
||||||
import jsonlines
|
import jsonlines
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow.parquet as pq
|
||||||
import tqdm
|
import tqdm
|
||||||
from datasets import Dataset, Features, Image
|
from datasets import Dataset, concatenate_datasets
|
||||||
from huggingface_hub import HfApi, snapshot_download
|
from huggingface_hub import HfApi, snapshot_download
|
||||||
from requests import HTTPError
|
from requests import HTTPError
|
||||||
|
|
||||||
@@ -68,6 +68,7 @@ from lerobot.datasets.utils import (
|
|||||||
LEGACY_EPISODES_STATS_PATH,
|
LEGACY_EPISODES_STATS_PATH,
|
||||||
LEGACY_TASKS_PATH,
|
LEGACY_TASKS_PATH,
|
||||||
cast_stats_to_numpy,
|
cast_stats_to_numpy,
|
||||||
|
embed_images,
|
||||||
flatten_dict,
|
flatten_dict,
|
||||||
get_file_size_in_mb,
|
get_file_size_in_mb,
|
||||||
get_parquet_file_size_in_mb,
|
get_parquet_file_size_in_mb,
|
||||||
@@ -174,25 +175,33 @@ def convert_tasks(root, new_root):
|
|||||||
write_tasks(df_tasks, new_root)
|
write_tasks(df_tasks, new_root)
|
||||||
|
|
||||||
|
|
||||||
def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys):
|
def concat_data_files(
|
||||||
# TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets
|
paths_to_cat: list[Path], new_root: Path, chunk_idx: int, file_idx: int, image_keys: list[str]
|
||||||
dataframes = [pd.read_parquet(file) for file in paths_to_cat]
|
):
|
||||||
# Concatenate all DataFrames along rows
|
"""Concatenate multiple parquet data files into a single file.
|
||||||
concatenated_df = pd.concat(dataframes, ignore_index=True)
|
|
||||||
|
Args:
|
||||||
|
paths_to_cat: List of parquet file paths to concatenate
|
||||||
|
new_root: Root directory for the new dataset
|
||||||
|
chunk_idx: Chunk index for the output file
|
||||||
|
file_idx: File index within the chunk
|
||||||
|
image_keys: List of feature keys that contain images
|
||||||
|
"""
|
||||||
|
|
||||||
|
datasets_list: list[Dataset] = [Dataset.from_parquet(str(file)) for file in paths_to_cat]
|
||||||
|
concatenated_ds: Dataset = concatenate_datasets(datasets_list)
|
||||||
|
|
||||||
|
if len(image_keys) > 0:
|
||||||
|
logging.debug(f"Embedding {len(image_keys)} image features for optimal training performance")
|
||||||
|
concatenated_ds = embed_images(concatenated_ds)
|
||||||
|
|
||||||
path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
|
path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if len(image_keys) > 0:
|
table = concatenated_ds.with_format("arrow")[:]
|
||||||
schema = pa.Schema.from_pandas(concatenated_df)
|
writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
|
||||||
features = Features.from_arrow_schema(schema)
|
writer.write_table(table)
|
||||||
for key in image_keys:
|
writer.close()
|
||||||
features[key] = Image()
|
|
||||||
schema = features.arrow_schema
|
|
||||||
else:
|
|
||||||
schema = None
|
|
||||||
|
|
||||||
concatenated_df.to_parquet(path, index=False, schema=schema)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):
|
def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):
|
||||||
|
|||||||
Reference in New Issue
Block a user