mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 09:09:48 +00:00
36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
import pyarrow.parquet as pq
|
|
|
|
# # First parquet (cached HF version)
|
|
meta1 = pq.read_metadata("/raid/jade/.cache/huggingface/datasets/data/chunk-000/episode_000000.parquet")
|
|
meta1 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
|
print("First parquet key_value_metadata:")
|
|
print(meta1.metadata) # low-level file metadata
|
|
# print()
|
|
print("Second")
|
|
# Second parquet (your converted version)
|
|
meta2 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
|
print("\nSecond parquet key_value_metadata:")
|
|
# print(meta2.metadata)
|
|
|
|
# from datasets import load_dataset
|
|
# root_dir = "/raid/jade/libero_converted"
|
|
|
|
# # Load all parquet files under the root_dir recursively
|
|
# ds = load_dataset("parquet", data_files=f"{root_dir}/**/*.parquet")
|
|
|
|
# print(ds) # prints split info
|
|
# print(ds["train"].features) # check schema/features
|
|
|
|
# # Peek at one row
|
|
# example = ds["train"][0]
|
|
# print(example.keys())
|
|
# print(type(example["observation.images.image"]))
|
|
# print(type(example["observation.images.image2"]))
|
|
|
|
import pyarrow.parquet as pq
|
|
|
|
for ep in ["episode_000019.parquet", "episode_000021.parquet", "episode_000026.parquet"]:
|
|
path = f"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/{ep}"
|
|
schema = pq.read_schema(path)
|
|
print(ep, schema.names)
|