Compare commits

..

3 Commits

Author SHA1 Message Date
Michel Aractingi 8008cb357d remove bad typing 2025-11-06 09:13:26 +01:00
Michel Aractingi ca5a4a7ae5 add typing hints 2025-11-06 09:12:09 +01:00
Michel Aractingi b5dcd70d2c add embed images in conversion to v3 script; add parquet writer in conversion script 2025-11-05 23:41:38 +01:00
7 changed files with 42 additions and 43 deletions
+3 -3
View File
@@ -83,11 +83,11 @@ jobs:
fi fi
- name: Remove Tags with Git dependencies - name: Remove Tags with Git dependencies
# TODO(Steven): Temporary patch to remove pi from PyPi 0.4.0 release due to its reliance on git dependencies. # TODO(Steven): Temporary patch to remove libero and pi from PyPi 0.4.0 release due to its reliance on git dependencies.
run: | run: |
echo "::info:: Checking for Git dependencies to remove from pyproject.toml..." echo "::info:: Checking for Git dependencies to remove from pyproject.toml..."
grep -E '@ git\+https|lerobot\[pi\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true grep -E '@ git\+https|lerobot\[pi\]|lerobot\[libero\]' pyproject.toml | sed 's/^/::warning:: Removing line: /' || true
sed -E -i '/@ git\+https|lerobot\[pi\]/d' pyproject.toml sed -E -i '/@ git\+https|lerobot\[pi\]|lerobot\[libero\]/d' pyproject.toml
echo "::info:: Git dependencies removed. Proceeding with build." echo "::info:: Git dependencies removed. Proceeding with build."
- name: Install build dependencies - name: Install build dependencies
+1 -1
View File
@@ -70,7 +70,7 @@ jobs:
echo "Dependencies unbound:" && cat pyproject.toml echo "Dependencies unbound:" && cat pyproject.toml
- name: Install lerobot with all extras - name: Install lerobot with all extras
run: uv sync --all-extras --no-extra groot # TODO(Steven): Make flash-attn optional run: uv sync --all-extras
- name: Run pytest (all extras) - name: Run pytest (all extras)
run: uv run pytest tests -vv run: uv run pytest tests -vv
+1 -1
View File
@@ -186,7 +186,7 @@ For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/ https://pypi.org/project/lerobot/
> [!NOTE] > [!NOTE]
> For lerobot 0.4.0, if you want to install pi tags, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"`. > For lerobot 0.4.0, if you want to install libero or pi tags, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`.
> >
> This will be solved in the next patch release > This will be solved in the next patch release
+1 -1
View File
@@ -82,7 +82,7 @@ For a full list of optional dependencies, see:
https://pypi.org/project/lerobot/ https://pypi.org/project/lerobot/
> [!NOTE] > [!NOTE]
> For lerobot 0.4.0, if you want to install pi, you will have to do: `pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"` > For lerobot 0.4.0, if you want to install libero or pi, you will have to do: `pip install "lerobot[pi,libero]@git+https://github.com/huggingface/lerobot.git"`
### Troubleshooting ### Troubleshooting
+5
View File
@@ -28,6 +28,11 @@ LIBERO is now part of our **multi-eval supported simulation**, meaning you can b
To Install LIBERO, after following LeRobot official instructions, just do: To Install LIBERO, after following LeRobot official instructions, just do:
`pip install -e ".[libero]"` `pip install -e ".[libero]"`
> [!NOTE]
> For lerobot 0.4.0, if you want to install libero tag, you will have to do: `pip install "lerobot[libero]@git+https://github.com/huggingface/lerobot.git"`.
>
> This will be solved in the next patch release
### Single-suite evaluation ### Single-suite evaluation
Evaluate a policy on one LIBERO suite: Evaluate a policy on one LIBERO suite:
+5 -20
View File
@@ -940,26 +940,11 @@ class LeRobotDataset(torch.utils.data.Dataset):
return query_timestamps return query_timestamps
def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict: def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
""" return {
Query dataset for indices across keys, skipping video keys. key: torch.stack(self.hf_dataset[q_idx][key])
for key, q_idx in query_indices.items()
Tries column-first [key][indices] for speed, falls back to row-first. if key not in self.meta.video_keys
}
Args:
query_indices: Dict mapping keys to index lists to retrieve
Returns:
Dict with stacked tensors of queried data (video keys excluded)
"""
result: dict = {}
for key, q_idx in query_indices.items():
if key in self.meta.video_keys:
continue
try:
result[key] = torch.stack(self.hf_dataset[key][q_idx])
except (KeyError, TypeError, IndexError):
result[key] = torch.stack(self.hf_dataset[q_idx][key])
return result
def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]: def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
"""Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
@@ -50,9 +50,9 @@ from typing import Any
import jsonlines import jsonlines
import pandas as pd import pandas as pd
import pyarrow as pa import pyarrow.parquet as pq
import tqdm import tqdm
from datasets import Dataset, Features, Image from datasets import Dataset, concatenate_datasets
from huggingface_hub import HfApi, snapshot_download from huggingface_hub import HfApi, snapshot_download
from requests import HTTPError from requests import HTTPError
@@ -68,6 +68,7 @@ from lerobot.datasets.utils import (
LEGACY_EPISODES_STATS_PATH, LEGACY_EPISODES_STATS_PATH,
LEGACY_TASKS_PATH, LEGACY_TASKS_PATH,
cast_stats_to_numpy, cast_stats_to_numpy,
embed_images,
flatten_dict, flatten_dict,
get_file_size_in_mb, get_file_size_in_mb,
get_parquet_file_size_in_mb, get_parquet_file_size_in_mb,
@@ -174,25 +175,33 @@ def convert_tasks(root, new_root):
write_tasks(df_tasks, new_root) write_tasks(df_tasks, new_root)
def concat_data_files(paths_to_cat, new_root, chunk_idx, file_idx, image_keys): def concat_data_files(
# TODO(rcadene): to save RAM use Dataset.from_parquet(file) and concatenate_datasets paths_to_cat: list[Path], new_root: Path, chunk_idx: int, file_idx: int, image_keys: list[str]
dataframes = [pd.read_parquet(file) for file in paths_to_cat] ):
# Concatenate all DataFrames along rows """Concatenate multiple parquet data files into a single file.
concatenated_df = pd.concat(dataframes, ignore_index=True)
Args:
paths_to_cat: List of parquet file paths to concatenate
new_root: Root directory for the new dataset
chunk_idx: Chunk index for the output file
file_idx: File index within the chunk
image_keys: List of feature keys that contain images
"""
datasets_list: list[Dataset] = [Dataset.from_parquet(str(file)) for file in paths_to_cat]
concatenated_ds: Dataset = concatenate_datasets(datasets_list)
if len(image_keys) > 0:
logging.debug(f"Embedding {len(image_keys)} image features for optimal training performance")
concatenated_ds = embed_images(concatenated_ds)
path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx) path = new_root / DEFAULT_DATA_PATH.format(chunk_index=chunk_idx, file_index=file_idx)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
if len(image_keys) > 0: table = concatenated_ds.with_format("arrow")[:]
schema = pa.Schema.from_pandas(concatenated_df) writer = pq.ParquetWriter(path, schema=table.schema, compression="snappy", use_dictionary=True)
features = Features.from_arrow_schema(schema) writer.write_table(table)
for key in image_keys: writer.close()
features[key] = Image()
schema = features.arrow_schema
else:
schema = None
concatenated_df.to_parquet(path, index=False, schema=schema)
def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int): def convert_data(root: Path, new_root: Path, data_file_size_in_mb: int):