Files
lerobot/examples/checker2.py
T
Jade Choghari 5c628f1700 new things
2025-09-10 11:32:54 +02:00

36 lines
1.4 KiB
Python

import pyarrow.parquet as pq
# # First parquet (cached HF version)
meta1 = pq.read_metadata("/raid/jade/.cache/huggingface/datasets/data/chunk-000/episode_000000.parquet")
meta1 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
print("First parquet key_value_metadata:")
print(meta1.metadata) # low-level file metadata
# print()
print("Second")
# Second parquet (your converted version)
meta2 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
print("\nSecond parquet key_value_metadata:")
# print(meta2.metadata)
# from datasets import load_dataset
# root_dir = "/raid/jade/libero_converted"
# # Load all parquet files under the root_dir recursively
# ds = load_dataset("parquet", data_files=f"{root_dir}/**/*.parquet")
# print(ds) # prints split info
# print(ds["train"].features) # check schema/features
# # Peek at one row
# example = ds["train"][0]
# print(example.keys())
# print(type(example["observation.images.image"]))
# print(type(example["observation.images.image2"]))
import pyarrow.parquet as pq
for ep in ["episode_000019.parquet", "episode_000021.parquet", "episode_000026.parquet"]:
path = f"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/{ep}"
schema = pq.read_schema(path)
print(ep, schema.names)