⬆️ sync with lerobot v0.5.1 (#96)

* update agibot2lerobot * update libero2lerobot * update robomind2lerobot * fix robomind2lerobot
2026-07-25 04:55:58 +00:00 · 2026-04-06 18:25:36 +08:00
parent ef184e44be
commit ad1381915c
5 changed files with 340 additions and 232 deletions
@@ -1,9 +1,11 @@
 import numpy as np
-import torch
-import torchvision
-from lerobot.datasets.compute_stats import auto_downsample_height_width, get_feature_stats, sample_indices
-
-torchvision.set_video_backend("pyav")
+from lerobot.datasets.compute_stats import (
+    DEFAULT_QUANTILES,
+    auto_downsample_height_width,
+    get_feature_stats,
+    sample_indices,
+)
+from torchcodec.decoders import VideoDecoder


 def generate_features_from_config(AgiBotWorld_CONFIG):
@@ -20,9 +22,8 @@ def generate_features_from_config(AgiBotWorld_CONFIG):
 def sample_images(input):
    if type(input) is str:
        video_path = input
-        reader = torchvision.io.VideoReader(video_path, stream="video")
-        frames = [frame["data"] for frame in reader]
-        frames_array = torch.stack(frames).numpy()  # Shape: [T, C, H, W]
+        decoder = VideoDecoder(video_path)
+        frames_array = decoder[0:-1].numpy()  # Shape: [T, C, H, W]

        sampled_indices = sample_indices(len(frames_array))
        images = None
@@ -50,21 +51,31 @@ def sample_images(input):
    return images


-def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict:
+def compute_episode_stats(
+    episode_data: dict[str, list[str] | np.ndarray],
+    features: dict,
+    quantile_list: list[float] | None = None,
+) -> dict:
+    if quantile_list is None:
+        quantile_list = DEFAULT_QUANTILES
+
    ep_stats = {}
    for key, data in episode_data.items():
        if features[key]["dtype"] == "string":
-            continue  # HACK: we should receive np.arrays of strings
+            continue
+
        elif features[key]["dtype"] in ["image", "video"]:
            ep_ft_array = sample_images(data)
-            axes_to_reduce = (0, 2, 3)  # keep channel dim
+            axes_to_reduce = (0, 2, 3)
            keepdims = True
        else:
-            ep_ft_array = data  # data is already a np.ndarray
-            axes_to_reduce = 0  # compute stats over the first axis
-            keepdims = data.ndim == 1  # keep as np.array
+            ep_ft_array = data
+            axes_to_reduce = 0
+            keepdims = data.ndim == 1

-        ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
+        ep_stats[key] = get_feature_stats(
+            ep_ft_array, axis=axes_to_reduce, keepdims=keepdims, quantile_list=quantile_list
+        )

        if features[key]["dtype"] in ["image", "video"]:
            value_norm = 1.0 if "depth" in key else 255.0