mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-18 10:10:08 +00:00
new things
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
# # First parquet (cached HF version)
|
||||
meta1 = pq.read_metadata("/raid/jade/.cache/huggingface/datasets/data/chunk-000/episode_000000.parquet")
|
||||
meta1 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
||||
print("First parquet key_value_metadata:")
|
||||
print(meta1.metadata) # low-level file metadata
|
||||
# print()
|
||||
print("Second")
|
||||
# Second parquet (your converted version)
|
||||
meta2 = pq.read_metadata("//raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/episode_000019.parquet")
|
||||
print("\nSecond parquet key_value_metadata:")
|
||||
# print(meta2.metadata)
|
||||
|
||||
# from datasets import load_dataset
|
||||
# root_dir = "/raid/jade/libero_converted"
|
||||
|
||||
# # Load all parquet files under the root_dir recursively
|
||||
# ds = load_dataset("parquet", data_files=f"{root_dir}/**/*.parquet")
|
||||
|
||||
# print(ds) # prints split info
|
||||
# print(ds["train"].features) # check schema/features
|
||||
|
||||
# # Peek at one row
|
||||
# example = ds["train"][0]
|
||||
# print(example.keys())
|
||||
# print(type(example["observation.images.image"]))
|
||||
# print(type(example["observation.images.image2"]))
|
||||
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
for ep in ["episode_000019.parquet", "episode_000021.parquet", "episode_000026.parquet"]:
|
||||
path = f"/raid/jade/.cache/huggingface/lerobot/HuggingFaceVLA/libero/data/chunk-000/{ep}"
|
||||
schema = pq.read_schema(path)
|
||||
print(ep, schema.names)
|
||||
Reference in New Issue
Block a user