mirror of
https://github.com/Tavish9/any4lerobot.git
synced 2026-05-24 02:09:40 +00:00
🐛 fix dataset version convert (#75)
* fix v30_to_v21 * sync v21_to_v30 with official
This commit is contained in:
@@ -2,7 +2,15 @@
|
||||
|
||||
## Get started
|
||||
|
||||
1. Install v3.0 lerobot
|
||||
1. Downgrade datasets:
|
||||
|
||||
```bash
|
||||
pip install "datasets<4.0.0"
|
||||
```
|
||||
|
||||
> Need to downgrade datasets first since `4.0.0` introduces `List` and `Column`.
|
||||
|
||||
2. Install v3.0 lerobot
|
||||
|
||||
```bash
|
||||
git clone https://github.com/huggingface/lerobot.git
|
||||
|
||||
@@ -22,7 +22,6 @@ import logging
|
||||
import math
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
@@ -31,6 +30,7 @@ import jsonlines
|
||||
import numpy as np
|
||||
import pyarrow.parquet as pq
|
||||
import tqdm
|
||||
from datasets import Dataset
|
||||
from huggingface_hub import snapshot_download
|
||||
from lerobot.datasets.utils import (
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
@@ -52,10 +52,10 @@ from lerobot.utils.utils import init_logging
|
||||
V21 = "v2.1"
|
||||
V30 = "v3.0"
|
||||
|
||||
LEGACY_DATA_PATH_TEMPLATE = "data/chunk-{chunk_index:03d}/episode_{episode_index:06d}.parquet"
|
||||
LEGACY_VIDEO_PATH_TEMPLATE = "videos/chunk-{chunk_index:03d}/{video_key}/episode_{episode_index:06d}.mp4"
|
||||
LEGACY_DATA_PATH_TEMPLATE = "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet"
|
||||
LEGACY_VIDEO_PATH_TEMPLATE = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4"
|
||||
MIN_VIDEO_DURATION = 1e-6
|
||||
LEGACY_STATS_KEYS = ("mean", "std", "min", "max", "q01", "q99")
|
||||
LEGACY_STATS_KEYS = ("mean", "std", "min", "max", "count")
|
||||
|
||||
|
||||
def _to_serializable(value: Any) -> Any:
|
||||
@@ -181,15 +181,15 @@ def convert_data(root: Path, new_root: Path, episode_records: list[dict[str, Any
|
||||
f"episode_index={episode_index}, length={length}"
|
||||
)
|
||||
|
||||
episode_table = table.slice(start, length)
|
||||
episode_table = table.slice(start, length).to_pandas()
|
||||
|
||||
dest_chunk = episode_index // DEFAULT_CHUNK_SIZE
|
||||
dest_path = new_root / LEGACY_DATA_PATH_TEMPLATE.format(
|
||||
chunk_index=dest_chunk,
|
||||
episode_chunk=dest_chunk,
|
||||
episode_index=episode_index,
|
||||
)
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pq.write_table(episode_table, dest_path)
|
||||
Dataset.from_pandas(episode_table).to_parquet(dest_path)
|
||||
|
||||
|
||||
def _group_episodes_by_video_file(
|
||||
@@ -365,7 +365,7 @@ def convert_videos(root: Path, new_root: Path, episode_records: list[dict[str, A
|
||||
|
||||
dest_chunk = episode_index // DEFAULT_CHUNK_SIZE
|
||||
dest_path = new_root / LEGACY_VIDEO_PATH_TEMPLATE.format(
|
||||
chunk_index=dest_chunk,
|
||||
episode_chunk=dest_chunk,
|
||||
video_key=video_key,
|
||||
episode_index=episode_index,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user