chore(dependencies): update uv.lock

fix(features copy): adding deepcopy on LeRobot dataset features to avoid shallow copy leaks (#3826 )
* fix(features copy): adding deepcopy on LeRobot dataset features to avoid shallow copy leaks * tests(test): adding new test
2026-06-18 00:37:10 +00:00 · 2026-06-17 05:02:01 +00:00 · 2026-06-16 17:58:59 +02:00 · 2026-06-16 12:15:48 +02:00
15 changed files with 312 additions and 2804 deletions
@@ -355,8 +355,6 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux'" }]
 torchvision = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux'" }]
-huggingface-hub = { git = "https://github.com/huggingface/huggingface_hub.git", branch = "feat/hffs-cache-cdn-range-reads" }
-datasets = { git = "https://github.com/huggingface/datasets.git", branch = "main" }

 [tool.setuptools.package-data]
 lerobot = ["envs/*.json", "annotations/steerable_pipeline/prompts/*.txt"]
@@ -423,7 +421,6 @@ exclude_dirs = [
 skips = ["B101", "B311", "B404", "B603", "B615"]

 [tool.typos]
-default.extend-words = { trak = "trak" }
 default.extend-ignore-re = [
    "(?Rm)^.*(#|//)\\s*spellchecker:disable-line$",                      # spellchecker:disable-line
    "(?s)(#|//)\\s*spellchecker:off.*?\\n\\s*(#|//)\\s*spellchecker:on", # spellchecker:<on|off>
@@ -1,860 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-from __future__ import annotations
-
-import argparse
-import random
-import resource
-import tempfile
-import threading
-import time
-from collections.abc import Sequence
-from concurrent.futures import ThreadPoolExecutor
-from pathlib import Path
-
-import fsspec
-import numpy as np
-import pyarrow as pa
-import pyarrow.compute as pc
-import pyarrow.parquet as pq
-
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.episode_video_streaming import (
-    EpisodeByteCache,
-    EpisodeVideoManifest,
-    NativeHTTPRangeFetcher,
-    assert_hf_hub_range_cache_branch,
-)
-from lerobot.datasets.video_utils import VideoDecoderCache, decode_video_frames_torchcodec
-
-DEFAULT_REPO = "allenai/MolmoAct2-BimanualYAM-Dataset"
-DEFAULT_REVISION = "e9f21ae15074330839f2ac25ed4b49d76dfa1f9c"
-DEFAULT_DATA_ROOT = "hf://buckets/pepijn223/MolmoAct2-BimanualYAM-Dataset-bucket"
-SIDECAR_CACHE_DIR = Path(tempfile.gettempdir()) / "lerobot-sidecars"
-FULL_SIDECAR_NAME = "molmoact2-full.npz"
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Benchmark episode-level streaming mini-MP4 cache.")
-    parser.add_argument("--repo-id", default=DEFAULT_REPO)
-    parser.add_argument("--revision", default=DEFAULT_REVISION)
-    parser.add_argument("--data-root", default=DEFAULT_DATA_ROOT)
-    parser.add_argument(
-        "--strategy",
-        choices=("both", "full", "indexed", "remote-decoder", "native-http"),
-        default="both",
-        help=argparse.SUPPRESS,
-    )
-    parser.add_argument(
-        "--range-backend",
-        choices=("fsspec", "native-http"),
-        default="fsspec",
-        help="Range reader used by indexed/full episode-pool fetch tracks.",
-    )
-    parser.add_argument("--num-episodes", type=int, default=512)
-    parser.add_argument(
-        "--manifest-episodes",
-        type=int,
-        default=None,
-        help="Limit manifest construction to the first N episodes for local smoke tests.",
-    )
-    parser.add_argument("--pool-size", type=int, default=16)
-    parser.add_argument("--workers", type=int, default=8)
-    parser.add_argument(
-        "--native-http-connections",
-        type=int,
-        default=None,
-        help="Max HTTP connections for --range-backend native-http. Defaults to --workers.",
-    )
-    parser.add_argument(
-        "--native-http-retries",
-        type=int,
-        default=8,
-        help="Retries per native HTTP range request.",
-    )
-    parser.add_argument(
-        "--native-http-timeout",
-        type=float,
-        default=120.0,
-        help="Timeout in seconds for native HTTP requests.",
-    )
-    parser.add_argument(
-        "--include-decode",
-        action="store_true",
-        help="Also run decoder-opening/frame-decode comparison tracks. Fetch-only is the default.",
-    )
-    parser.add_argument("--decode-workers", type=int, default=1)
-    parser.add_argument("--prefetch-ahead", type=int, default=8)
-    parser.add_argument("--frames-per-episode", type=int, default=16)
-    parser.add_argument("--max-probe-mb", type=int, default=64)
-    parser.add_argument("--seed", type=int, default=0)
-    parser.add_argument("--byte-budget-gb", type=float, default=80)
-    parser.add_argument(
-        "--in-memory", action="store_true", help="Accepted for compatibility; manifest is always in memory."
-    )
-    parser.add_argument("--no-hub-branch-assert", action="store_true")
-    return parser.parse_args()
-
-
-def _episode_pool(total: int, requested: int, pool_size: int, seed: int) -> list[int]:
-    rng = random.Random(seed)
-    upper = min(total, requested)
-    if pool_size > upper:
-        raise ValueError(f"pool-size={pool_size} exceeds available episodes={upper}")
-    return rng.sample(range(upper), pool_size)
-
-
-def _timestamps(manifest: EpisodeVideoManifest, episodes: Sequence[int], frames_per_episode: int, seed: int):
-    rng = random.Random(seed)
-    out: dict[tuple[int, str], list[float]] = {}
-    for ep in episodes:
-        for camera_key in manifest.video_keys:
-            span = manifest.lookup(ep, camera_key)
-            lo = span.first_pts
-            hi = max(span.last_pts, lo)
-            out[(ep, camera_key)] = sorted(rng.uniform(lo, hi) for _ in range(frames_per_episode))
-    return out
-
-
-def _timestamps_from_meta(
-    meta: LeRobotDatasetMetadata, episodes: Sequence[int], frames_per_episode: int, seed: int
-) -> dict[tuple[int, str], list[float]]:
-    rng = random.Random(seed)
-    out: dict[tuple[int, str], list[float]] = {}
-    for ep in episodes:
-        row = meta.episodes[ep]
-        for camera_key in meta.video_keys:
-            lo = float(row[f"videos/{camera_key}/from_timestamp"])
-            hi = max(float(row[f"videos/{camera_key}/to_timestamp"]), lo)
-            out[(ep, camera_key)] = sorted(rng.uniform(lo, hi) for _ in range(frames_per_episode))
-    return out
-
-
-def _bytes_for(manifest: EpisodeVideoManifest, episodes: Sequence[int]) -> int:
-    total = 0
-    for ep in episodes:
-        for camera_key in manifest.video_keys:
-            total += manifest.lookup(ep, camera_key).mdat_length
-    return total
-
-
-def _decode_all(
-    cache: EpisodeByteCache, timestamps: dict[tuple[int, str], list[float]], *, decode_workers: int
-) -> float:
-    start = time.perf_counter()
-    items = list(timestamps.items())
-    if decode_workers <= 1:
-        for (ep, camera_key), ts in items:
-            cache.get_frames(ep, camera_key, ts)
-    else:
-        with ThreadPoolExecutor(max_workers=decode_workers) as pool:
-            futures = [pool.submit(cache.get_frames, ep, camera_key, ts) for (ep, camera_key), ts in items]
-            for future in futures:
-                future.result()
-    return time.perf_counter() - start
-
-
-def _fill_cache(cache: EpisodeByteCache, episodes: Sequence[int]) -> float:
-    start = time.perf_counter()
-    for ep in episodes:
-        cache.submit_prefetch(ep)
-    for ep in episodes:
-        cache.ensure_ready(ep)
-    return time.perf_counter() - start
-
-
-def _samples_per_s(elapsed_s: float, episodes: Sequence[int], frames_per_episode: int) -> float:
-    if elapsed_s <= 0:
-        return float("inf")
-    return len(episodes) * frames_per_episode / elapsed_s
-
-
-def _log(message: str) -> None:
-    print(message, flush=True)
-
-
-def _format_duration(seconds: float) -> str:
-    if seconds < 60:
-        return f"{seconds:.1f}s"
-    if seconds < 3600:
-        return f"{seconds / 60:.1f}m"
-    return f"{seconds / 3600:.1f}h"
-
-
-def _current_rss_mib() -> float | None:
-    status_path = Path("/proc/self/status")
-    if not status_path.exists():
-        return None
-    for line in status_path.read_text().splitlines():
-        if line.startswith("VmRSS:"):
-            return float(line.split()[1]) / 1024
-    return None
-
-
-def _peak_rss_mib() -> float:
-    rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-    # Linux reports KiB; macOS reports bytes.
-    if rss > 10**8:
-        return rss / 1024**2
-    return rss / 1024
-
-
-def _memory_snapshot() -> dict[str, float | None]:
-    return {"rss_mib": _current_rss_mib(), "peak_rss_mib": _peak_rss_mib()}
-
-
-def _print_memory_summary(start: dict[str, float | None], end: dict[str, float | None]) -> None:
-    start_rss = start["rss_mib"]
-    end_rss = end["rss_mib"]
-    delta = None if start_rss is None or end_rss is None else end_rss - start_rss
-    print()
-    print("| Memory | MiB |")
-    print("|---|---:|")
-    if start_rss is not None:
-        print(f"| rss start | {start_rss:.1f} |")
-    if end_rss is not None:
-        print(f"| rss end | {end_rss:.1f} |")
-    if delta is not None:
-        print(f"| rss delta | {delta:.1f} |")
-    print(f"| peak rss | {end['peak_rss_mib']:.1f} |")
-
-
-def _root_join(data_root: str, relative_path: str) -> str:
-    if data_root.startswith("hf://"):
-        return f"{data_root.rstrip('/')}/{relative_path}"
-    return str(Path(data_root) / relative_path)
-
-
-def _find_or_download_sidecar(data_root: str, manifest_episode_count: int) -> Path | None:
-    _ = manifest_episode_count
-    local = SIDECAR_CACHE_DIR / FULL_SIDECAR_NAME
-    if _valid_sidecar(local):
-        return local
-    if local.exists():
-        print(f"mp4_sidecar_invalid_local: {local}")
-        local.unlink()
-    remote_relative = f"meta/mp4-sidecars/{FULL_SIDECAR_NAME}"
-    remote = _root_join(data_root, remote_relative)
-    protocol = "hf" if data_root.startswith("hf://") else "file"
-    fs = fsspec.filesystem(protocol)
-    if not fs.exists(remote):
-        return None
-    local.parent.mkdir(parents=True, exist_ok=True)
-    print(f"downloading_mp4_sidecar: {remote} -> {local}")
-    if data_root.startswith("hf://"):
-        _download_sidecar_native_http(data_root, remote_relative, local)
-    else:
-        fs.get(remote, str(local))
-    return local
-
-
-def _valid_sidecar(path: Path) -> bool:
-    if not path.exists():
-        return False
-    try:
-        with np.load(path, allow_pickle=False) as data:
-            return "manifest_json" in data
-    except Exception:
-        return False
-
-
-def _download_sidecar_native_http(data_root: str, relative_path: str, local: Path) -> None:
-    fetcher = NativeHTTPRangeFetcher(data_root, max_connections=16)
-    tmp = local.with_suffix(local.suffix + ".tmp")
-    try:
-        size = fetcher.info_size(relative_path)
-        chunk_size = 16 * 1024 * 1024
-        ranges = [(offset, min(chunk_size, size - offset)) for offset in range(0, size, chunk_size)]
-        with tmp.open("wb") as out_file:
-            out_file.truncate(size)
-
-        def read_chunk(offset_length: tuple[int, int]) -> tuple[int, bytes]:
-            offset, length = offset_length
-            return offset, fetcher.read_range(relative_path, offset, length)
-
-        start = time.perf_counter()
-        done = 0
-        with ThreadPoolExecutor(max_workers=8) as pool:
-            futures = [pool.submit(read_chunk, item) for item in ranges]
-            with tmp.open("r+b") as rw_file:
-                for future in futures:
-                    offset, data = future.result()
-                    rw_file.seek(offset)
-                    rw_file.write(data)
-                    done += len(data)
-                    elapsed = max(time.perf_counter() - start, 1e-9)
-                    print(
-                        f"sidecar_download: {done / 1024**2:.1f}/{size / 1024**2:.1f} MiB "
-                        f"({done / elapsed / 1024**2:.1f} MiB/s)",
-                        flush=True,
-                    )
-        tmp.replace(local)
-    finally:
-        fetcher.close()
-
-
-class EpisodeParquetReader:
-    def __init__(self, meta: LeRobotDatasetMetadata, data_root: str):
-        self.meta = meta
-        self.data_root = data_root
-        protocol = "hf" if data_root.startswith("hf://") else "file"
-        self.fs = fsspec.filesystem(protocol)
-        self._episode_row_groups = self._build_episode_row_groups()
-        self._table_cache: dict[str, pa.Table] = {}
-        self._cache_lock = threading.Lock()
-
-    def read_episode(self, episode_index: int) -> None:
-        relative_path = str(self.meta.get_data_file_path(episode_index))
-        table = self._read_table(relative_path)
-        table.filter(pc.equal(table["episode_index"], episode_index))
-
-    def _read_table(self, relative_path: str) -> pa.Table:
-        with self._cache_lock:
-            table = self._table_cache.get(relative_path)
-        if table is not None:
-            return table
-        with self.fs.open(
-            _root_join(self.data_root, relative_path), "rb", block_size=2**20, cache_type="none"
-        ) as f:
-            table = pq.ParquetFile(f).read()
-        with self._cache_lock:
-            return self._table_cache.setdefault(relative_path, table)
-
-    def submit_read_episode(self, pool: ThreadPoolExecutor, episode_index: int):
-        return pool.submit(self.read_episode, episode_index)
-
-    def read_episodes(self, episodes: Sequence[int], *, workers: int) -> float:
-        start = time.perf_counter()
-        if workers <= 1:
-            for ep in episodes:
-                self.read_episode(ep)
-        else:
-            with ThreadPoolExecutor(max_workers=workers) as pool:
-                futures = [pool.submit(self.read_episode, ep) for ep in episodes]
-                for future in futures:
-                    future.result()
-        return time.perf_counter() - start
-
-    def _build_episode_row_groups(self) -> dict[int, int]:
-        counts: dict[tuple[int, int], int] = {}
-        row_groups = {}
-        for ep_idx in range(int(self.meta.total_episodes)):
-            ep = self.meta.episodes[ep_idx]
-            key = (int(ep["data/chunk_index"]), int(ep["data/file_index"]))
-            row_groups[ep_idx] = counts.get(key, 0)
-            counts[key] = row_groups[ep_idx] + 1
-        return row_groups
-
-
-def run_fetch_pool(
-    manifest: EpisodeVideoManifest,
-    data_root: str,
-    episodes: Sequence[int],
-    byte_budget: int,
-    workers: int,
-    range_backend: str,
-    args: argparse.Namespace,
-) -> dict[str, float]:
-    with EpisodeByteCache(
-        manifest,
-        data_root,
-        byte_budget=byte_budget,
-        workers=workers,
-        range_backend=range_backend,
-        native_http_connections=args.native_http_connections,
-        native_http_timeout=args.native_http_timeout,
-        native_http_retries=args.native_http_retries,
-        open_decoders=False,
-    ) as cache:
-        elapsed = _fill_cache(cache, episodes)
-        timings = cache.timing_summary()
-    byte_count = _bytes_for(manifest, episodes)
-    episode_mb = byte_count / len(episodes) / 1024**2
-    job_count = max(timings["jobs"], 1.0)
-    result = {
-        "fetch_s": elapsed,
-        "fetch_mbps": byte_count / elapsed / 1024**2,
-        "fetch_episodes_s": len(episodes) / elapsed,
-        "episode_mb": episode_mb,
-        "avg_mb_miss": byte_count / (len(episodes) * len(manifest.video_keys)) / 1024**2,
-        "jobs": timings["jobs"],
-        "lookup_ms": timings["lookup_s"] * 1000 / job_count,
-        "range_fetch_ms": timings["fetch_s"] * 1000 / job_count,
-        "synthesize_ms": timings["synthesize_s"] * 1000 / job_count,
-        "store_ms": timings["store_s"] * 1000 / job_count,
-    }
-    result.update({key: value for key, value in timings.items() if key.startswith("range_")})
-    return result
-
-
-def run_parallel(
-    manifest: EpisodeVideoManifest,
-    data_root: str,
-    episodes: Sequence[int],
-    timestamps: dict[tuple[int, str], list[float]],
-    byte_budget: int,
-    workers: int,
-    decode_workers: int,
-    frames_per_episode: int,
-    parquet_reader: EpisodeParquetReader,
-    range_backend: str,
-) -> dict[str, float]:
-    with EpisodeByteCache(
-        manifest,
-        data_root,
-        byte_budget=byte_budget,
-        workers=workers,
-        range_backend=range_backend,
-        open_decoders=False,
-    ) as cache:
-        parquet_s = parquet_reader.read_episodes(episodes, workers=workers)
-        fetch_s = _fill_cache(cache, episodes)
-        decoder_start = time.perf_counter()
-        for ep in episodes:
-            for camera_key in manifest.video_keys:
-                cache.get_decoder(ep, camera_key)
-        decoder_s = time.perf_counter() - decoder_start
-        decode_s = _decode_all(cache, timestamps, decode_workers=decode_workers)
-    byte_count = _bytes_for(manifest, episodes)
-    return {
-        "fetch_s": fetch_s,
-        "fetch_mbps": byte_count / fetch_s / 1024**2,
-        "fetch_episodes_s": len(episodes) / fetch_s,
-        "parquet_s": parquet_s,
-        "decoder_ms_miss": decoder_s * 1000 / (len(episodes) * len(manifest.video_keys)),
-        "decode_samples_s": _samples_per_s(decode_s, episodes, frames_per_episode),
-    }
-
-
-def run_overlapped(
-    manifest: EpisodeVideoManifest,
-    data_root: str,
-    episodes: Sequence[int],
-    timestamps: dict[tuple[int, str], list[float]],
-    byte_budget: int,
-    workers: int,
-    decode_workers: int,
-    frames_per_episode: int,
-    prefetch_ahead: int,
-    parquet_reader: EpisodeParquetReader,
-    range_backend: str,
-) -> dict[str, float]:
-    with EpisodeByteCache(
-        manifest,
-        data_root,
-        byte_budget=byte_budget,
-        workers=workers,
-        range_backend=range_backend,
-        open_decoders=True,
-    ) as cache:
-        start = time.perf_counter()
-        video_wait_decode_s = 0.0
-        parquet_wait_s = 0.0
-        parquet_pool = ThreadPoolExecutor(max_workers=max(1, min(workers, len(episodes))))
-        parquet_futures = {
-            ep: parquet_reader.submit_read_episode(parquet_pool, ep) for ep in episodes[:prefetch_ahead]
-        }
-        for ep in episodes[:prefetch_ahead]:
-            cache.submit_prefetch(ep)
-        try:
-            for idx, ep in enumerate(episodes):
-                next_idx = idx + prefetch_ahead
-                if next_idx < len(episodes):
-                    next_ep = episodes[next_idx]
-                    cache.submit_prefetch(next_ep)
-                    parquet_futures[next_ep] = parquet_reader.submit_read_episode(parquet_pool, next_ep)
-
-                parquet_start = time.perf_counter()
-                parquet_futures.pop(ep).result()
-                parquet_wait_s += time.perf_counter() - parquet_start
-
-                video_start = time.perf_counter()
-                cache.ensure_ready(ep)
-                if decode_workers <= 1:
-                    for camera_key in manifest.video_keys:
-                        cache.get_frames(ep, camera_key, timestamps[(ep, camera_key)])
-                else:
-                    with ThreadPoolExecutor(max_workers=decode_workers) as pool:
-                        futures = [
-                            pool.submit(cache.get_frames, ep, camera_key, timestamps[(ep, camera_key)])
-                            for camera_key in manifest.video_keys
-                        ]
-                        for future in futures:
-                            future.result()
-                video_wait_decode_s += time.perf_counter() - video_start
-        finally:
-            parquet_pool.shutdown(wait=True)
-        elapsed = time.perf_counter() - start
-    return {
-        "samples_s": _samples_per_s(elapsed, episodes, frames_per_episode),
-        "video_samples_s": _samples_per_s(video_wait_decode_s, episodes, frames_per_episode),
-        "parquet_samples_s": _samples_per_s(parquet_wait_s, episodes, frames_per_episode),
-        "wall_s": elapsed,
-        "video_wait_decode_s": video_wait_decode_s,
-        "parquet_wait_s": parquet_wait_s,
-    }
-
-
-_remote_decoder_local = threading.local()
-
-
-def _remote_decoder_cache() -> VideoDecoderCache:
-    cache = getattr(_remote_decoder_local, "cache", None)
-    if cache is None:
-        cache = VideoDecoderCache(max_size=None)
-        _remote_decoder_local.cache = cache
-    return cache
-
-
-def _decode_remote_source(
-    meta: LeRobotDatasetMetadata,
-    data_root: str,
-    episode_index: int,
-    camera_key: str,
-    timestamps: list[float],
-):
-    video_path = _root_join(data_root, str(meta.get_video_file_path(episode_index, camera_key)))
-    return decode_video_frames_torchcodec(
-        video_path,
-        timestamps,
-        tolerance_s=1.0 / float(meta.fps),
-        decoder_cache=_remote_decoder_cache(),
-        return_uint8=True,
-    )
-
-
-def run_remote_decoder(
-    meta: LeRobotDatasetMetadata,
-    data_root: str,
-    episodes: Sequence[int],
-    timestamps: dict[tuple[int, str], list[float]],
-    *,
-    frames_per_episode: int,
-    decode_workers: int,
-    parquet_reader: EpisodeParquetReader,
-) -> dict[str, float]:
-    items = [
-        (ep, camera_key, timestamps[(ep, camera_key)]) for ep in episodes for camera_key in meta.video_keys
-    ]
-
-    start = time.perf_counter()
-    for ep, camera_key, ts in items:
-        if camera_key == meta.video_keys[0]:
-            parquet_reader.read_episode(ep)
-        _decode_remote_source(meta, data_root, ep, camera_key, ts)
-    sequential_s = time.perf_counter() - start
-
-    start = time.perf_counter()
-    if decode_workers <= 1:
-        for ep, camera_key, ts in items:
-            if camera_key == meta.video_keys[0]:
-                parquet_reader.read_episode(ep)
-            _decode_remote_source(meta, data_root, ep, camera_key, ts)
-    else:
-        with ThreadPoolExecutor(max_workers=decode_workers) as pool:
-            parquet_futures = [pool.submit(parquet_reader.read_episode, ep) for ep in episodes]
-            futures = [
-                pool.submit(_decode_remote_source, meta, data_root, ep, camera_key, ts)
-                for ep, camera_key, ts in items
-            ]
-            for future in parquet_futures:
-                future.result()
-            for future in futures:
-                future.result()
-    parallel_s = time.perf_counter() - start
-
-    return {
-        "sequential_samples_s": _samples_per_s(sequential_s, episodes, frames_per_episode),
-        "parallel_samples_s": _samples_per_s(parallel_s, episodes, frames_per_episode),
-    }
-
-
-def _print_range_timing_summary(fetch_pool: dict[str, float]) -> None:
-    range_jobs = fetch_pool.get("range_jobs", 0.0)
-    if range_jobs <= 0:
-        return
-
-    print()
-    print("| Range Read Stage | avg ms/range |")
-    print("|---|---:|")
-    for key, label in (
-        ("range_open_s", "fsspec handle open/lookup"),
-        ("range_seek_s", "fsspec seek"),
-        ("range_read_s", "fsspec read"),
-        ("range_resolve_s", "http URL resolve"),
-        ("range_header_s", "http response headers"),
-        ("range_first_byte_s", "http first body byte"),
-        ("range_body_s", "http body drain"),
-        ("range_retry_sleep_s", "http retry sleep"),
-    ):
-        value = fetch_pool.get(key)
-        if value is not None:
-            print(f"| {label} | {value * 1000 / range_jobs:.3f} |")
-    if "range_retry_attempts" in fetch_pool:
-        print(f"| http retries | {fetch_pool['range_retry_attempts'] / range_jobs:.3f} |")
-    if fetch_pool.get("range_failed_requests"):
-        print(f"| http failed requests | {fetch_pool['range_failed_requests']:.0f} |")
-    print(f"| range reads | {range_jobs:.0f} |")
-    print(f"| avg MiB/range | {fetch_pool.get('range_bytes', 0.0) / range_jobs / 1024**2:.1f} |")
-
-
-def run_indexed_strategy(
-    meta: LeRobotDatasetMetadata,
-    data_root: str,
-    args: argparse.Namespace,
-    parquet_reader: EpisodeParquetReader,
-    *,
-    range_backend: str = "fsspec",
-    label: str = "indexed",
-    sidecar_path: str | None = None,
-) -> None:
-    _log(f"starting_strategy: {label}")
-    memory_start = _memory_snapshot()
-    manifest_start = time.perf_counter()
-    dataset_episode_count = int(meta.total_episodes)
-    manifest_episode_count = args.manifest_episodes or dataset_episode_count
-    manifest_episode_count = min(manifest_episode_count, dataset_episode_count, args.num_episodes)
-    manifest = EpisodeVideoManifest.build(
-        meta,
-        data_root,
-        episode_indices=range(manifest_episode_count),
-        range_backend=range_backend,
-        workers=args.workers,
-        max_probe_bytes=args.max_probe_mb * 1024 * 1024,
-        sidecar_path=sidecar_path,
-    )
-    manifest_s = time.perf_counter() - manifest_start
-    _log(f"{label}: manifest_build_s={manifest_s:.2f}")
-
-    benchmark_episode_count = min(dataset_episode_count, args.num_episodes)
-    episodes = _episode_pool(dataset_episode_count, args.num_episodes, args.pool_size, args.seed)
-    byte_budget = int(args.byte_budget_gb * 1024**3)
-    byte_count = _bytes_for(manifest, episodes)
-    _log(
-        f"{label}: planned_video_fetch={byte_count / 1024**3:.2f} GiB per fetch track "
-        f"({byte_count / len(episodes) / 1024**2:.1f} MiB/episode)"
-    )
-
-    _log(f"{label}: filling episode byte cache with {args.workers} workers")
-    fetch_pool = run_fetch_pool(manifest, data_root, episodes, byte_budget, args.workers, range_backend, args)
-    estimated_dataset_s = dataset_episode_count / fetch_pool["fetch_episodes_s"]
-    estimated_benchmark_s = benchmark_episode_count / fetch_pool["fetch_episodes_s"]
-
-    print(f"manifest_build_s: {manifest_s:.2f}")
-    print(f"strategy: {label}")
-    print(f"range_backend: {range_backend}")
-    print(f"mp4_sidecar: {sidecar_path or 'none'}")
-    print(f"data_root: {data_root}")
-    print(f"dataset_episodes: {dataset_episode_count}")
-    print(f"benchmark_episodes: {benchmark_episode_count}")
-    print(f"pool_episodes: {len(episodes)}")
-    print(f"sampled_episodes: {episodes}")
-    print(f"cameras: {manifest.video_keys}")
-    print()
-    print(
-        "| Track | fetch MB/s | fetch eps/s | wall s | est benchmark | est full dataset | avg MB/camera | notes |"
-    )
-    print("|---|---:|---:|---:|---:|---:|---:|---|")
-    print(
-        f"| EPISODE POOL FETCH | {fetch_pool['fetch_mbps']:.1f} | "
-        f"{fetch_pool['fetch_episodes_s']:.2f} | {fetch_pool['fetch_s']:.2f} | "
-        f"{_format_duration(estimated_benchmark_s)} | {_format_duration(estimated_dataset_s)} | "
-        f"{fetch_pool['avg_mb_miss']:.1f} | {args.workers} workers, no decoder open/frame decode |"
-    )
-    print()
-    print("| Camera Job Stage | avg ms/job |")
-    print("|---|---:|")
-    print(f"| manifest lookup | {fetch_pool['lookup_ms']:.3f} |")
-    print(f"| remote byte-range fetch | {fetch_pool['range_fetch_ms']:.3f} |")
-    print(f"| synthesize mini-MP4 | {fetch_pool['synthesize_ms']:.3f} |")
-    print(f"| store in shared cache | {fetch_pool['store_ms']:.3f} |")
-    print(f"| camera jobs | {fetch_pool['jobs']:.0f} |")
-    _print_range_timing_summary(fetch_pool)
-    _print_memory_summary(memory_start, _memory_snapshot())
-
-    if args.include_decode:
-        timestamps = _timestamps(manifest, episodes, args.frames_per_episode, args.seed + 1)
-        _log(f"{label}: running parallel video fetch + decode-only")
-        parallel = run_parallel(
-            manifest,
-            data_root,
-            episodes,
-            timestamps,
-            byte_budget,
-            args.workers,
-            args.decode_workers,
-            args.frames_per_episode,
-            parquet_reader,
-            range_backend,
-        )
-        _log(f"{label}: running overlapped end-to-end")
-        overlapped = run_overlapped(
-            manifest,
-            data_root,
-            episodes,
-            timestamps,
-            byte_budget,
-            args.workers,
-            args.decode_workers,
-            args.frames_per_episode,
-            args.prefetch_ahead,
-            parquet_reader,
-            range_backend,
-        )
-        print(
-            f"| DECODE COMPARISON | {parallel['fetch_mbps']:.1f} | {parallel['fetch_episodes_s']:.2f} | "
-            f"{parallel['fetch_s']:.2f} | "
-            f"{_format_duration(benchmark_episode_count / parallel['fetch_episodes_s'])} | "
-            f"{_format_duration(dataset_episode_count / parallel['fetch_episodes_s'])} | "
-            f"{fetch_pool['avg_mb_miss']:.1f} | "
-            f"decoder open {parallel['decoder_ms_miss']:.1f} ms/miss, "
-            f"decode {parallel['decode_samples_s']:.1f} samples/s, parquet {parallel['parquet_s']:.2f}s |"
-        )
-        print(
-            f"| OVERLAPPED E2E | - | - | {overlapped['wall_s']:.2f} | - | - | "
-            f"{fetch_pool['avg_mb_miss']:.1f} | "
-            f"{overlapped['samples_s']:.1f} samples/s; video+decode "
-            f"{overlapped['video_wait_decode_s']:.2f}s, parquet {overlapped['parquet_wait_s']:.2f}s |"
-        )
-
-
-def run_remote_strategy(
-    meta: LeRobotDatasetMetadata,
-    data_root: str,
-    args: argparse.Namespace,
-    parquet_reader: EpisodeParquetReader,
-) -> None:
-    _log("starting_strategy: remote-decoder")
-    episodes = _episode_pool(int(meta.total_episodes), args.num_episodes, args.pool_size, args.seed)
-    timestamps = _timestamps_from_meta(meta, episodes, args.frames_per_episode, args.seed + 1)
-    _log("remote-decoder: running direct source MP4 decoder")
-    result = run_remote_decoder(
-        meta,
-        data_root,
-        episodes,
-        timestamps,
-        frames_per_episode=args.frames_per_episode,
-        decode_workers=args.decode_workers,
-        parquet_reader=parquet_reader,
-    )
-    print("strategy: remote-decoder")
-    print(f"data_root: {data_root}")
-    print(f"episodes: {episodes}")
-    print(f"cameras: {list(meta.video_keys)}")
-    print()
-    print("| Track | samples/s | notes |")
-    print("|---|---:|---|")
-    print(f"| REMOTE SEQUENTIAL | {result['sequential_samples_s']:.1f} | direct source MP4 decoder |")
-    print(
-        f"| REMOTE PARALLEL | {result['parallel_samples_s']:.1f} | "
-        f"direct source MP4 decoder, {args.decode_workers} workers |"
-    )
-
-
-def main() -> None:
-    args = parse_args()
-    if args.strategy == "full":
-        args.strategy = "both"
-    if args.strategy == "native-http":
-        args.range_backend = "native-http"
-    data_root = args.data_root
-    if data_root.startswith("hf://") and not args.no_hub_branch_assert:
-        assert_hf_hub_range_cache_branch()
-
-    meta = LeRobotDatasetMetadata(args.repo_id, revision=args.revision)
-    meta.ensure_readable()
-    parquet_reader = EpisodeParquetReader(meta, data_root)
-    manifest_episode_count = args.manifest_episodes or int(meta.total_episodes)
-    manifest_episode_count = min(manifest_episode_count, int(meta.total_episodes), args.num_episodes)
-    sidecar_path = _find_or_download_sidecar(data_root, manifest_episode_count)
-
-    if sidecar_path is not None:
-        print(f"using_mp4_sidecar: {sidecar_path}")
-
-    if sidecar_path is not None and args.strategy == "both":
-        if args.include_decode:
-            run_remote_strategy(meta, data_root, args, parquet_reader)
-            print()
-        run_indexed_strategy(
-            meta,
-            data_root,
-            args,
-            parquet_reader,
-            range_backend=args.range_backend,
-            label=f"indexed-sidecar-{args.range_backend}",
-            sidecar_path=str(sidecar_path),
-        )
-        return
-    if sidecar_path is not None and args.strategy == "indexed":
-        run_indexed_strategy(
-            meta,
-            data_root,
-            args,
-            parquet_reader,
-            range_backend=args.range_backend,
-            label=f"indexed-sidecar-{args.range_backend}",
-            sidecar_path=str(sidecar_path),
-        )
-        return
-    if sidecar_path is not None and args.strategy == "native-http":
-        run_indexed_strategy(
-            meta,
-            data_root,
-            args,
-            parquet_reader,
-            range_backend="native-http",
-            label="indexed-sidecar-native-http",
-            sidecar_path=str(sidecar_path),
-        )
-        return
-    if args.strategy == "both":
-        expected_sidecar = SIDECAR_CACHE_DIR / FULL_SIDECAR_NAME
-        expected_remote = _root_join(data_root, f"meta/mp4-sidecars/{FULL_SIDECAR_NAME}")
-        print(f"mp4_sidecar_missing_local: {expected_sidecar}")
-        print(f"mp4_sidecar_missing_remote: {expected_remote}")
-        print(
-            "build_mp4_sidecar: "
-            "uv run --no-sync python scripts/build_mp4_sidecar.py "
-            f"--workers {args.workers} --range-backend native-http --output {expected_sidecar}"
-        )
-        print("running_without_mp4_sidecar: indexed variants will build MP4 indexes online")
-        print()
-
-    if args.strategy in ("both", "indexed"):
-        run_indexed_strategy(
-            meta,
-            data_root,
-            args,
-            parquet_reader,
-            range_backend="fsspec",
-            label="indexed",
-            sidecar_path=None,
-        )
-    if args.strategy == "both":
-        print()
-    if args.strategy == "remote-decoder" or (args.strategy == "both" and args.include_decode):
-        run_remote_strategy(meta, data_root, args, parquet_reader)
-    if args.strategy == "both" and args.include_decode:
-        print()
-    if args.strategy in ("both", "native-http"):
-        run_indexed_strategy(
-            meta,
-            data_root,
-            args,
-            parquet_reader,
-            range_backend="native-http",
-            label="indexed-native-http",
-            sidecar_path=None,
-        )
-
-
-if __name__ == "__main__":
-    main()
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-from __future__ import annotations
-
-import argparse
-import time
-from pathlib import Path
-
-import fsspec
-
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.episode_video_streaming import EpisodeVideoManifest, assert_hf_hub_range_cache_branch
-
-DEFAULT_REPO = "allenai/MolmoAct2-BimanualYAM-Dataset"
-DEFAULT_REVISION = "e9f21ae15074330839f2ac25ed4b49d76dfa1f9c"
-DEFAULT_DATA_ROOT = "hf://buckets/pepijn223/MolmoAct2-BimanualYAM-Dataset-bucket"
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Build a reusable MP4 byte-index sidecar for streaming.")
-    parser.add_argument("--repo-id", default=DEFAULT_REPO)
-    parser.add_argument("--revision", default=DEFAULT_REVISION)
-    parser.add_argument("--data-root", default=DEFAULT_DATA_ROOT)
-    parser.add_argument("--output", required=True)
-    parser.add_argument("--episodes", type=int, default=None)
-    parser.add_argument("--workers", type=int, default=8)
-    parser.add_argument("--range-backend", choices=("fsspec", "native-http"), default="native-http")
-    parser.add_argument("--max-probe-mb", type=int, default=64)
-    parser.add_argument(
-        "--no-push", action="store_true", help="Do not upload the sidecar to data_root/meta/mp4-sidecars."
-    )
-    parser.add_argument("--no-hub-branch-assert", action="store_true")
-    return parser.parse_args()
-
-
-def push_sidecar(local_path: str, data_root: str) -> list[str]:
-    if not data_root.startswith("hf://"):
-        return []
-
-    local = Path(local_path)
-    fs = fsspec.filesystem("hf")
-    remote_dir = f"{data_root.rstrip('/')}/meta/mp4-sidecars"
-    remote_paths = [f"{remote_dir}/{local.name}"]
-
-    for remote in remote_paths:
-        fs.put(str(local), remote)
-    return remote_paths
-
-
-def main() -> None:
-    args = parse_args()
-    if args.data_root.startswith("hf://") and not args.no_hub_branch_assert:
-        assert_hf_hub_range_cache_branch()
-
-    meta = LeRobotDatasetMetadata(args.repo_id, revision=args.revision)
-    meta.ensure_readable()
-    total = (
-        int(meta.total_episodes) if args.episodes is None else min(args.episodes, int(meta.total_episodes))
-    )
-    rel_paths = sorted(
-        {str(meta.get_video_file_path(ep_idx, key)) for ep_idx in range(total) for key in meta.video_keys}
-    )
-
-    start = time.perf_counter()
-    EpisodeVideoManifest.write_file_sidecar(
-        args.output,
-        rel_paths,
-        args.data_root,
-        range_backend=args.range_backend,
-        workers=args.workers,
-        max_probe_bytes=args.max_probe_mb * 1024 * 1024,
-    )
-    elapsed = time.perf_counter() - start
-    print(f"wrote {args.output}")
-    print(f"episodes={total} files={len(rel_paths)} elapsed_s={elapsed:.2f}")
-    if args.no_push:
-        print("push_skipped: --no-push")
-    else:
-        pushed = push_sidecar(args.output, args.data_root)
-        for remote in pushed:
-            print(f"pushed {remote}")
-
-
-if __name__ == "__main__":
-    main()
@@ -54,6 +54,7 @@ from typing import Any
 import pyarrow as pa
 import pyarrow.parquet as pq

+from lerobot.datasets.io_utils import write_table_one_row_group_per_episode
 from lerobot.datasets.language import (
    EVENT_ONLY_STYLES,
    LANGUAGE_EVENTS,
@@ -274,12 +275,11 @@ class LanguageColumnsWriter:
        new_table = self._materialize_table(
            table, per_row_persistent, per_row_events, drop_old=self.drop_existing_subtask_index
        )
-        # Atomic replace: write to a sibling tmp path and rename so a crash
-        # mid-write can't leave a half-written shard that ``pq.read_table``
-        # would then fail to open. ``Path.replace`` is atomic on POSIX +
-        # Windows when source and target sit on the same filesystem.
+        # Re-emit one row group per episode (a bulk pq.write_table would collapse
+        # them into one). Write to a sibling tmp path and atomically rename so a
+        # crash mid-write can't leave a half-written shard.
        tmp_path = path.with_suffix(path.suffix + ".tmp")
-        pq.write_table(new_table, tmp_path)
+        write_table_one_row_group_per_episode(new_table, tmp_path)
        tmp_path.replace(path)

    def _materialize_table(
@@ -32,6 +32,7 @@ from .feature_utils import features_equal_for_merge, get_hf_features_from_featur
 from .io_utils import (
    get_file_size_in_mb,
    get_parquet_file_size_in_mb,
+    to_parquet_one_row_group_per_episode,
    to_parquet_with_hf_images,
    write_info,
    write_stats,
@@ -551,6 +552,7 @@ def aggregate_data(src_meta, dst_meta, data_idx, data_files_size_in_mb, chunk_si
            aggr_root=dst_meta.root,
            hf_features=hf_features,
            concatenate=concatenate_data,
+            one_row_group_per_episode=True,
        )

        # Record the mapping from source to actual destination
@@ -628,6 +630,7 @@ def append_or_create_parquet_file(
    aggr_root: Path = None,
    hf_features: datasets.Features | None = None,
    concatenate: bool = True,
+    one_row_group_per_episode: bool = False,
 ) -> tuple[dict[str, int], tuple[int, int]]:
    """Appends data to an existing parquet file or creates a new one based on size constraints.

@@ -645,6 +648,8 @@ def append_or_create_parquet_file(
        aggr_root: Root path for the aggregated dataset.
        hf_features: Optional HuggingFace Features schema for proper image typing.
        concatenate: When False, always rotate to a new file instead of appending to the current one.
+        one_row_group_per_episode: True for DATA parquet (emit one row group per episode); False for
+            the episodes-metadata parquet (already one row per episode).

    Returns:
        tuple: (updated_idx, (dst_chunk, dst_file)) where updated_idx is the index dict
@@ -657,6 +662,8 @@ def append_or_create_parquet_file(
        dst_path.parent.mkdir(parents=True, exist_ok=True)
        if contains_images:
            to_parquet_with_hf_images(df, dst_path, features=hf_features)
+        elif one_row_group_per_episode:
+            to_parquet_one_row_group_per_episode(df, dst_path)
        else:
            df.to_parquet(dst_path)
        return idx, (dst_chunk, dst_file)
@@ -683,6 +690,8 @@ def append_or_create_parquet_file(

    if contains_images:
        to_parquet_with_hf_images(final_df, target_path, features=hf_features)
+    elif one_row_group_per_episode:
+        to_parquet_one_row_group_per_episode(final_df, target_path)
    else:
        final_df.to_parquet(target_path)

@@ -15,6 +15,7 @@
 # limitations under the License.
 import contextlib
 from collections.abc import Callable
+from copy import deepcopy
 from pathlib import Path

 import numpy as np
@@ -709,7 +710,7 @@ class LeRobotDatasetMetadata:

        obj.root.mkdir(parents=True, exist_ok=False)

-        features = {**features, **DEFAULT_FEATURES}
+        features = {**deepcopy(features), **DEFAULT_FEATURES}
        _validate_feature_names(features)

        obj.tasks = None
@@ -27,6 +27,7 @@ import logging
 import shutil
 from collections.abc import Callable
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+from copy import deepcopy
 from pathlib import Path

 import datasets
@@ -1101,7 +1102,9 @@ def _copy_episodes_metadata_and_stats(
    if dst_meta.video_keys and src_dataset.meta.video_keys:
        for key in dst_meta.video_keys:
            if key in src_dataset.meta.features:
-                dst_meta.info.features[key]["info"] = src_dataset.meta.info.features[key].get("info", {})
+                dst_meta.info.features[key]["info"] = deepcopy(
+                    src_dataset.meta.info.features[key].get("info", {})
+                )

    write_info(dst_meta.info, dst_meta.root)

@@ -1,890 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-from __future__ import annotations
-
-import contextlib
-import io
-import json
-import threading
-import time
-from collections import OrderedDict
-from concurrent.futures import Future, ThreadPoolExecutor
-from dataclasses import dataclass
-from importlib import metadata
-from pathlib import Path
-from typing import Any
-from urllib.parse import quote, urljoin, urlparse
-
-import fsspec
-import httpx
-import numpy as np
-from huggingface_hub import HfApi, HfFileSystem, constants
-from huggingface_hub.utils import hf_raise_for_status
-
-from lerobot.datasets.dataset_metadata import LeRobotDatasetMetadata
-from lerobot.datasets.mp4 import Mp4Index, Mp4SampleSlice, fetch_mp4_index, synthesize_mp4
-
-
-@dataclass(frozen=True)
-class EpisodeVideoSpan:
-    file_id: int
-    mdat_offset: int
-    mdat_length: int
-    first_pts: float
-    last_pts: float
-    frame_count: int
-    sample_lo: int
-    sample_hi: int
-    source_start_pts: float
-
-
-@dataclass(frozen=True)
-class VideoFileRecord:
-    file_path: str
-    file_size: int
-    mp4: Mp4Index
-
-
-class ThreadLocalRangeFetcher:
-    """Range reader that gives each worker thread independent file handles."""
-
-    def __init__(self, data_root: str | Path, *, block_size: int = 2**20, cache_type: str = "none"):
-        self.data_root = str(data_root).rstrip("/")
-        protocol = "hf" if self.data_root.startswith("hf://") else "file"
-        self.fs = fsspec.filesystem(protocol)
-        self.block_size = block_size
-        self.cache_type = cache_type
-        self._local = threading.local()
-        self._timing_lock = threading.Lock()
-        self._timing_totals = {
-            "range_jobs": 0.0,
-            "range_bytes": 0.0,
-            "range_open_s": 0.0,
-            "range_seek_s": 0.0,
-            "range_read_s": 0.0,
-        }
-
-    def _url(self, relative_path: str) -> str:
-        if self.data_root.startswith("hf://"):
-            return f"{self.data_root}/{relative_path}"
-        return str(Path(self.data_root) / relative_path)
-
-    def _handle(self, relative_path: str):
-        handles = getattr(self._local, "handles", None)
-        if handles is None:
-            handles = {}
-            self._local.handles = handles
-        handle = handles.get(relative_path)
-        if handle is None or getattr(handle, "closed", False):
-            handle = self.fs.open(
-                self._url(relative_path), "rb", block_size=self.block_size, cache_type=self.cache_type
-            )
-            handles[relative_path] = handle
-        return handle
-
-    def info_size(self, relative_path: str) -> int:
-        return int(self.fs.info(self._url(relative_path))["size"])
-
-    def read_range(self, relative_path: str, offset: int, length: int) -> bytes:
-        open_start = time.perf_counter()
-        handle = self._handle(relative_path)
-        open_s = time.perf_counter() - open_start
-        seek_start = time.perf_counter()
-        handle.seek(offset)
-        seek_s = time.perf_counter() - seek_start
-        read_start = time.perf_counter()
-        data = handle.read(length)
-        read_s = time.perf_counter() - read_start
-        self._record_timing(
-            range_jobs=1.0,
-            range_bytes=float(len(data)),
-            range_open_s=open_s,
-            range_seek_s=seek_s,
-            range_read_s=read_s,
-        )
-        return data
-
-    def _record_timing(self, **kwargs: float) -> None:
-        with self._timing_lock:
-            for key, value in kwargs.items():
-                self._timing_totals[key] += value
-
-    def timing_summary(self) -> dict[str, float]:
-        with self._timing_lock:
-            return dict(self._timing_totals)
-
-    def close(self) -> None:
-        handles = getattr(self._local, "handles", None)
-        if handles is None:
-            return
-        for handle in handles.values():
-            with contextlib.suppress(Exception):
-                handle.close()
-        handles.clear()
-
-
-class NativeHTTPRangeFetcher:
-    """Direct pooled HTTP range reader for hf:// paths."""
-
-    _GLOBAL_SOURCE_URLS: dict[tuple[str, str], str] = {}
-    _GLOBAL_RESOLVED_URLS: dict[tuple[str, str], str] = {}
-    _GLOBAL_SIZES: dict[tuple[str, str], int] = {}
-    _GLOBAL_LOCK = threading.Lock()
-
-    _RETRYABLE_EXCEPTIONS = (
-        httpx.ConnectError,
-        httpx.ConnectTimeout,
-        httpx.ReadError,
-        httpx.ReadTimeout,
-        httpx.RemoteProtocolError,
-        httpx.PoolTimeout,
-    )
-
-    def __init__(
-        self,
-        data_root: str | Path,
-        *,
-        max_connections: int = 32,
-        timeout: float = 60.0,
-        max_retries: int = 4,
-    ):
-        self.data_root = str(data_root).rstrip("/")
-        if not self.data_root.startswith("hf://"):
-            raise ValueError("NativeHTTPRangeFetcher only supports hf:// roots")
-        self.max_retries = max_retries
-        self.api = HfApi()
-        self.fs: HfFileSystem | None = None
-        self._bucket_id: str | None = None
-        self._bucket_prefix = ""
-        if self.data_root.startswith("hf://buckets/"):
-            bucket_root = self.data_root.removeprefix("hf://buckets/")
-            parts = bucket_root.split("/", 2)
-            if len(parts) < 2:
-                raise ValueError(f"Invalid bucket root: {self.data_root}")
-            self._bucket_id = f"{parts[0]}/{parts[1]}"
-            self._bucket_prefix = parts[2].strip("/") if len(parts) == 3 else ""
-        else:
-            self.fs = HfFileSystem()
-        self.client = httpx.Client(
-            timeout=timeout,
-            limits=httpx.Limits(max_connections=max_connections, max_keepalive_connections=max_connections),
-            follow_redirects=False,
-        )
-        self._resolved_urls: dict[str, str] = {}
-        self._source_urls: dict[str, str] = {}
-        self._sizes: dict[str, int] = {}
-        self._lock = threading.Lock()
-        self._timing_lock = threading.Lock()
-        self._timing_totals = {
-            "range_jobs": 0.0,
-            "range_bytes": 0.0,
-            "range_resolve_s": 0.0,
-            "range_header_s": 0.0,
-            "range_first_byte_s": 0.0,
-            "range_body_s": 0.0,
-            "range_retry_attempts": 0.0,
-            "range_retry_sleep_s": 0.0,
-            "range_failed_requests": 0.0,
-        }
-
-    def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
-        last_exc: Exception | None = None
-        for attempt in range(self.max_retries + 1):
-            try:
-                return self.client.request(method, url, **kwargs)
-            except self._RETRYABLE_EXCEPTIONS as exc:
-                last_exc = exc
-                if attempt >= self.max_retries:
-                    break
-                time.sleep(min(0.5 * 2**attempt, 5.0))
-        if last_exc is None:
-            raise RuntimeError("HTTP request failed without an exception")
-        raise last_exc
-
-    def _cache_key(self, relative_path: str) -> tuple[str, str]:
-        return self.data_root, relative_path
-
-    def _path(self, relative_path: str) -> str:
-        return f"{self.data_root}/{relative_path}"
-
-    def _bucket_path(self, relative_path: str) -> str:
-        if self._bucket_prefix:
-            return f"{self._bucket_prefix}/{relative_path}"
-        return relative_path
-
-    def _headers_for(self, request_url: str, source_url: str) -> dict[str, str]:
-        headers = self.api._build_hf_headers()
-        if urlparse(request_url).netloc != urlparse(source_url).netloc:
-            headers.pop("authorization", None)
-            headers.pop("Authorization", None)
-        return headers
-
-    def _source_url(self, relative_path: str) -> str:
-        with self._lock:
-            source = self._source_urls.get(relative_path)
-            if source is not None:
-                return source
-        key = self._cache_key(relative_path)
-        with self._GLOBAL_LOCK:
-            source = self._GLOBAL_SOURCE_URLS.get(key)
-        if source is None:
-            if self._bucket_id is not None:
-                source = (
-                    f"{constants.ENDPOINT}/buckets/{self._bucket_id}/resolve/"
-                    f"{quote(self._bucket_path(relative_path))}"
-                )
-            else:
-                if self.fs is None:
-                    raise RuntimeError("HfFileSystem fallback was not initialized")
-                source = self.fs.url(self._path(relative_path))
-            with self._GLOBAL_LOCK:
-                self._GLOBAL_SOURCE_URLS[key] = source
-        with self._lock:
-            self._source_urls[relative_path] = source
-            return source
-
-    def _resolve_url(self, relative_path: str, *, refresh: bool = False) -> str:
-        with self._lock:
-            if not refresh and relative_path in self._resolved_urls:
-                return self._resolved_urls[relative_path]
-        key = self._cache_key(relative_path)
-        if not refresh:
-            with self._GLOBAL_LOCK:
-                resolved = self._GLOBAL_RESOLVED_URLS.get(key)
-                size = self._GLOBAL_SIZES.get(key)
-            if resolved is not None:
-                with self._lock:
-                    self._resolved_urls[relative_path] = resolved
-                    if size is not None:
-                        self._sizes[relative_path] = size
-                return resolved
-
-        source = self._source_url(relative_path)
-        response = self._request("HEAD", source, headers=self.api._build_hf_headers(), follow_redirects=False)
-        try:
-            hf_raise_for_status(response)
-            location = response.headers.get("Location")
-            resolved = urljoin(source, location) if location else source
-            with self._lock:
-                self._resolved_urls[relative_path] = resolved
-                if "Content-Length" in response.headers:
-                    self._sizes[relative_path] = int(response.headers["Content-Length"])
-            with self._GLOBAL_LOCK:
-                self._GLOBAL_RESOLVED_URLS[key] = resolved
-                if "Content-Length" in response.headers:
-                    self._GLOBAL_SIZES[key] = int(response.headers["Content-Length"])
-            return resolved
-        finally:
-            response.close()
-
-    def info_size(self, relative_path: str) -> int:
-        with self._lock:
-            size = self._sizes.get(relative_path)
-            if size is not None:
-                return size
-        key = self._cache_key(relative_path)
-        with self._GLOBAL_LOCK:
-            size = self._GLOBAL_SIZES.get(key)
-        if size is not None:
-            with self._lock:
-                self._sizes[relative_path] = size
-            return size
-
-        resolved = self._resolve_url(relative_path)
-        source = self._source_url(relative_path)
-        response = self._request(
-            "HEAD", resolved, headers=self._headers_for(resolved, source), follow_redirects=True
-        )
-        try:
-            hf_raise_for_status(response)
-            size = int(response.headers["Content-Length"])
-            with self._lock:
-                self._sizes[relative_path] = size
-            with self._GLOBAL_LOCK:
-                self._GLOBAL_SIZES[key] = size
-            return size
-        finally:
-            response.close()
-
-    def read_range(self, relative_path: str, offset: int, length: int) -> bytes:
-        resolve_start = time.perf_counter()
-        resolved = self._resolve_url(relative_path)
-        source = self._source_url(relative_path)
-        resolve_s = time.perf_counter() - resolve_start
-        headers = self._headers_for(resolved, source)
-        headers["Range"] = f"bytes={offset}-{offset + length - 1}"
-        payload, status_code, timings = self._read_range_response(resolved, headers)
-        if status_code == 403:
-            refresh_start = time.perf_counter()
-            resolved = self._resolve_url(relative_path, refresh=True)
-            resolve_s += time.perf_counter() - refresh_start
-            headers = self._headers_for(resolved, source)
-            headers["Range"] = f"bytes={offset}-{offset + length - 1}"
-            payload, status_code, retry_timings = self._read_range_response(resolved, headers)
-            for key, value in retry_timings.items():
-                timings[key] += value
-        if status_code == 403:
-            raise PermissionError(f"HTTP range request returned 403 after URL refresh: {relative_path}")
-        self._record_timing(
-            range_jobs=1.0,
-            range_bytes=float(len(payload)),
-            range_resolve_s=resolve_s,
-            **timings,
-        )
-        return payload
-
-    def _read_range_response(self, url: str, headers: dict[str, str]) -> tuple[bytes, int, dict[str, float]]:
-        last_exc: Exception | None = None
-        retry_attempts = 0.0
-        retry_sleep_s = 0.0
-        for attempt in range(self.max_retries + 1):
-            try:
-                payload, status_code, timings = self._read_range_response_once(url, headers)
-                timings["range_retry_attempts"] = retry_attempts
-                timings["range_retry_sleep_s"] = retry_sleep_s
-                return payload, status_code, timings
-            except self._RETRYABLE_EXCEPTIONS as exc:
-                last_exc = exc
-                if attempt >= self.max_retries:
-                    break
-                retry_attempts += 1.0
-                sleep_s = min(0.5 * 2**attempt, 5.0)
-                retry_sleep_s += sleep_s
-                time.sleep(sleep_s)
-        self._record_timing(
-            range_failed_requests=1.0,
-            range_retry_attempts=retry_attempts,
-            range_retry_sleep_s=retry_sleep_s,
-        )
-        if last_exc is None:
-            raise RuntimeError("HTTP range request failed without an exception")
-        raise last_exc
-
-    def _read_range_response_once(
-        self, url: str, headers: dict[str, str]
-    ) -> tuple[bytes, int, dict[str, float]]:
-        header_start = time.perf_counter()
-        with self.client.stream("GET", url, headers=headers) as response:
-            header_s = time.perf_counter() - header_start
-            if response.status_code == 403:
-                return (
-                    b"",
-                    response.status_code,
-                    {
-                        "range_header_s": header_s,
-                        "range_first_byte_s": 0.0,
-                        "range_body_s": 0.0,
-                    },
-                )
-            hf_raise_for_status(response)
-            chunks = []
-            first_byte_s = 0.0
-            first_chunk = True
-            body_start = time.perf_counter()
-            for chunk in response.iter_bytes():
-                if first_chunk:
-                    first_byte_s = time.perf_counter() - body_start
-                    first_chunk = False
-                chunks.append(chunk)
-            body_s = time.perf_counter() - body_start
-            return (
-                b"".join(chunks),
-                response.status_code,
-                {
-                    "range_header_s": header_s,
-                    "range_first_byte_s": first_byte_s,
-                    "range_body_s": body_s,
-                },
-            )
-
-    def _record_timing(self, **kwargs: float) -> None:
-        with self._timing_lock:
-            for key, value in kwargs.items():
-                self._timing_totals[key] += value
-
-    def timing_summary(self) -> dict[str, float]:
-        with self._timing_lock:
-            return dict(self._timing_totals)
-
-    def close(self) -> None:
-        self.client.close()
-
-
-def make_range_fetcher(
-    data_root: str | Path,
-    *,
-    range_backend: str,
-    workers: int,
-    native_http_connections: int | None = None,
-    native_http_timeout: float = 60.0,
-    native_http_retries: int = 4,
-):
-    if range_backend == "fsspec":
-        return ThreadLocalRangeFetcher(data_root)
-    if range_backend == "native-http":
-        max_connections = native_http_connections or max(8, workers)
-        return NativeHTTPRangeFetcher(
-            data_root,
-            max_connections=max_connections,
-            timeout=native_http_timeout,
-            max_retries=native_http_retries,
-        )
-    raise ValueError(f"Unknown range backend: {range_backend}")
-
-
-class EpisodeVideoManifest:
-    _FILE_SIDECAR_CACHE: dict[str, dict[str, VideoFileRecord]] = {}
-    _FILE_SIDECAR_CACHE_LOCK = threading.Lock()
-
-    def __init__(
-        self,
-        *,
-        video_keys: list[str],
-        files: list[VideoFileRecord],
-        spans: dict[str, np.ndarray],
-    ):
-        self.video_keys = list(video_keys)
-        self._camera_to_id = {key: idx for idx, key in enumerate(self.video_keys)}
-        self.files = files
-        self.spans = spans
-
-    @classmethod
-    def build(
-        cls,
-        meta: LeRobotDatasetMetadata,
-        data_root: str | Path,
-        *,
-        episode_indices: list[int] | range | None = None,
-        range_backend: str = "fsspec",
-        workers: int = 8,
-        header_probe_bytes: int = 4 * 1024 * 1024,
-        max_probe_bytes: int = 64 * 1024 * 1024,
-        keyframe_pad_s: float = 0.1,
-        keyframe_pad_fraction: float = 0.05,
-        sidecar_path: str | Path | None = None,
-    ) -> EpisodeVideoManifest:
-        meta.ensure_readable()
-        video_keys = list(meta.video_keys)
-        if episode_indices is None:
-            episode_indices = range(int(meta.total_episodes))
-        rel_paths = sorted(
-            {str(meta.get_video_file_path(ep_idx, key)) for ep_idx in episode_indices for key in video_keys}
-        )
-        path_to_id = {path: idx for idx, path in enumerate(rel_paths)}
-        if sidecar_path is None:
-            files = cls._build_file_records(
-                rel_paths,
-                data_root,
-                range_backend=range_backend,
-                workers=workers,
-                header_probe_bytes=header_probe_bytes,
-                max_probe_bytes=max_probe_bytes,
-            )
-        else:
-            records = cls.load_file_sidecar(sidecar_path)
-            missing = [path for path in rel_paths if path not in records]
-            if missing:
-                raise ValueError(
-                    f"Sidecar {sidecar_path} is missing {len(missing)} files, first: {missing[0]}"
-                )
-            files = [records[path] for path in rel_paths]
-
-        total = int(meta.total_episodes)
-        num_cameras = len(video_keys)
-        spans: dict[str, np.ndarray] = {
-            "file_id": np.zeros((total, num_cameras), dtype=np.int32),
-            "mdat_offset": np.zeros((total, num_cameras), dtype=np.int64),
-            "mdat_length": np.zeros((total, num_cameras), dtype=np.int64),
-            "first_pts": np.zeros((total, num_cameras), dtype=np.float64),
-            "last_pts": np.zeros((total, num_cameras), dtype=np.float64),
-            "frame_count": np.zeros((total, num_cameras), dtype=np.int32),
-            "sample_lo": np.zeros((total, num_cameras), dtype=np.int32),
-            "sample_hi": np.zeros((total, num_cameras), dtype=np.int32),
-            "source_start_pts": np.zeros((total, num_cameras), dtype=np.float64),
-        }
-
-        for ep_idx in episode_indices:
-            ep = meta.episodes[ep_idx]
-            for cam_idx, key in enumerate(video_keys):
-                rel_path = str(meta.get_video_file_path(ep_idx, key))
-                file_id = path_to_id[rel_path]
-                mp4 = files[file_id].mp4
-                from_ts = float(ep[f"videos/{key}/from_timestamp"])
-                to_ts = float(ep[f"videos/{key}/to_timestamp"])
-                sample_slice = mp4.sample_slice(
-                    from_ts,
-                    to_ts,
-                    keyframe_pad_s=keyframe_pad_s,
-                    keyframe_pad_fraction=keyframe_pad_fraction,
-                    file_size=files[file_id].file_size,
-                )
-                spans["file_id"][ep_idx, cam_idx] = file_id
-                spans["mdat_offset"][ep_idx, cam_idx] = sample_slice.byte_offset
-                spans["mdat_length"][ep_idx, cam_idx] = sample_slice.byte_length
-                spans["first_pts"][ep_idx, cam_idx] = from_ts
-                spans["last_pts"][ep_idx, cam_idx] = to_ts
-                spans["frame_count"][ep_idx, cam_idx] = sample_slice.sample_hi - sample_slice.sample_lo + 1
-                spans["sample_lo"][ep_idx, cam_idx] = sample_slice.sample_lo
-                spans["sample_hi"][ep_idx, cam_idx] = sample_slice.sample_hi
-                spans["source_start_pts"][ep_idx, cam_idx] = sample_slice.source_start_pts
-
-        return cls(video_keys=video_keys, files=files, spans=spans)
-
-    @staticmethod
-    def _build_file_records(
-        rel_paths: list[str],
-        data_root: str | Path,
-        *,
-        range_backend: str,
-        workers: int,
-        header_probe_bytes: int,
-        max_probe_bytes: int,
-    ) -> list[VideoFileRecord]:
-        fetcher = make_range_fetcher(data_root, range_backend=range_backend, workers=workers)
-
-        def build_file(path: str) -> VideoFileRecord:
-            file_size = fetcher.info_size(path)
-            mp4 = fetch_mp4_index(
-                path,
-                fetcher.read_range,
-                file_size=file_size,
-                header_probe_bytes=header_probe_bytes,
-                max_probe_bytes=max_probe_bytes,
-            )
-            return VideoFileRecord(path, file_size, mp4)
-
-        try:
-            with ThreadPoolExecutor(max_workers=workers) as pool:
-                return list(pool.map(build_file, rel_paths))
-        finally:
-            fetcher.close()
-
-    @classmethod
-    def write_file_sidecar(
-        cls,
-        sidecar_path: str | Path,
-        rel_paths: list[str],
-        data_root: str | Path,
-        *,
-        range_backend: str = "native-http",
-        workers: int = 8,
-        header_probe_bytes: int = 4 * 1024 * 1024,
-        max_probe_bytes: int = 64 * 1024 * 1024,
-    ) -> None:
-        records = cls._build_file_records(
-            sorted(set(rel_paths)),
-            data_root,
-            range_backend=range_backend,
-            workers=workers,
-            header_probe_bytes=header_probe_bytes,
-            max_probe_bytes=max_probe_bytes,
-        )
-        cls.save_file_sidecar(sidecar_path, records)
-
-    @staticmethod
-    def save_file_sidecar(sidecar_path: str | Path, records: list[VideoFileRecord]) -> None:
-        sidecar_path = Path(sidecar_path)
-        sidecar_path.parent.mkdir(parents=True, exist_ok=True)
-        payload = {
-            "version": 1,
-            "files": [
-                {"file_path": record.file_path, "file_size": record.file_size, "mp4": record.mp4.to_dict()}
-                for record in records
-            ],
-        }
-        arrays = {}
-        for file_idx, record in enumerate(records):
-            arrays[f"{file_idx}/sample_pts"] = record.mp4.sample_pts
-            arrays[f"{file_idx}/sample_durations"] = record.mp4.sample_durations
-            arrays[f"{file_idx}/sample_sizes"] = record.mp4.sample_sizes
-            arrays[f"{file_idx}/sample_offsets"] = record.mp4.sample_offsets
-            arrays[f"{file_idx}/sync_samples"] = record.mp4.sync_samples
-        np.savez_compressed(sidecar_path, manifest_json=json.dumps(payload).encode("utf-8"), **arrays)
-
-    @staticmethod
-    def load_file_sidecar(sidecar_path: str | Path) -> dict[str, VideoFileRecord]:
-        cache_key = str(Path(sidecar_path).expanduser())
-        with EpisodeVideoManifest._FILE_SIDECAR_CACHE_LOCK:
-            cached = EpisodeVideoManifest._FILE_SIDECAR_CACHE.get(cache_key)
-        if cached is not None:
-            return cached
-
-        with np.load(sidecar_path, allow_pickle=False) as data:
-            payload = json.loads(bytes(data["manifest_json"]).decode("utf-8"))
-            records = {}
-            for file_idx, item in enumerate(payload["files"]):
-                arrays = {
-                    name: data[f"{file_idx}/{name}"]
-                    for name in [
-                        "sample_pts",
-                        "sample_durations",
-                        "sample_sizes",
-                        "sample_offsets",
-                        "sync_samples",
-                    ]
-                }
-                mp4 = Mp4Index.from_dict(item["mp4"], arrays)
-                records[item["file_path"]] = VideoFileRecord(item["file_path"], int(item["file_size"]), mp4)
-        with EpisodeVideoManifest._FILE_SIDECAR_CACHE_LOCK:
-            EpisodeVideoManifest._FILE_SIDECAR_CACHE[cache_key] = records
-        return records
-
-    def camera_id(self, camera_key: str) -> int:
-        return self._camera_to_id[camera_key]
-
-    def lookup(self, episode_index: int, camera_key: str) -> EpisodeVideoSpan:
-        cam = self.camera_id(camera_key)
-        return EpisodeVideoSpan(
-            file_id=int(self.spans["file_id"][episode_index, cam]),
-            mdat_offset=int(self.spans["mdat_offset"][episode_index, cam]),
-            mdat_length=int(self.spans["mdat_length"][episode_index, cam]),
-            first_pts=float(self.spans["first_pts"][episode_index, cam]),
-            last_pts=float(self.spans["last_pts"][episode_index, cam]),
-            frame_count=int(self.spans["frame_count"][episode_index, cam]),
-            sample_lo=int(self.spans["sample_lo"][episode_index, cam]),
-            sample_hi=int(self.spans["sample_hi"][episode_index, cam]),
-            source_start_pts=float(self.spans["source_start_pts"][episode_index, cam]),
-        )
-
-    def file_lookup(self, file_id: int) -> VideoFileRecord:
-        return self.files[file_id]
-
-    def mp4_index(self, episode_index: int, camera_key: str) -> Mp4Index:
-        return self.files[self.lookup(episode_index, camera_key).file_id].mp4
-
-    def sample_slice(self, episode_index: int, camera_key: str) -> Mp4SampleSlice:
-        span = self.lookup(episode_index, camera_key)
-        return Mp4SampleSlice(
-            sample_lo=span.sample_lo,
-            sample_hi=span.sample_hi,
-            byte_offset=span.mdat_offset,
-            byte_length=span.mdat_length,
-            source_start_pts=span.source_start_pts,
-        )
-
-
-class EpisodeByteCache:
-    def __init__(
-        self,
-        manifest: EpisodeVideoManifest,
-        data_root: str | Path,
-        *,
-        byte_budget: int = 80 * 1024**3,
-        workers: int = 8,
-        range_backend: str = "fsspec",
-        native_http_connections: int | None = None,
-        native_http_timeout: float = 60.0,
-        native_http_retries: int = 4,
-        open_decoders: bool = True,
-    ):
-        self.manifest = manifest
-        self.fetcher = make_range_fetcher(
-            data_root,
-            range_backend=range_backend,
-            workers=workers,
-            native_http_connections=native_http_connections,
-            native_http_timeout=native_http_timeout,
-            native_http_retries=native_http_retries,
-        )
-        self.byte_budget = byte_budget
-        self.open_decoders = open_decoders
-        self._pool = ThreadPoolExecutor(max_workers=workers)
-        self._cache: OrderedDict[tuple[int, str], dict[str, Any]] = OrderedDict()
-        self._futures: dict[tuple[int, str], Future[dict[str, Any]]] = {}
-        self._bytes = 0
-        self._lock = threading.Lock()
-        self._timing_totals = {
-            "lookup_s": 0.0,
-            "fetch_s": 0.0,
-            "synthesize_s": 0.0,
-            "store_s": 0.0,
-            "jobs": 0.0,
-        }
-
-    def close(self) -> None:
-        self._pool.shutdown(wait=True)
-        with self._lock:
-            self._cache.clear()
-            self._futures.clear()
-            self._bytes = 0
-        self.fetcher.close()
-
-    def __enter__(self) -> EpisodeByteCache:
-        return self
-
-    def __exit__(self, *_exc) -> None:
-        self.close()
-
-    def submit_prefetch(self, episode_index: int) -> None:
-        for camera_key in self.manifest.video_keys:
-            self._submit(episode_index, camera_key)
-
-    def ensure_ready(self, episode_index: int) -> None:
-        for camera_key in self.manifest.video_keys:
-            self.get_bytes(episode_index, camera_key)
-
-    def get_bytes(self, episode_index: int, camera_key: str) -> bytes:
-        return self._get_entry(episode_index, camera_key)["bytes"]
-
-    def get_decoder(self, episode_index: int, camera_key: str):
-        entry = self._get_entry(episode_index, camera_key)
-        decoder = entry.get("decoder")
-        if decoder is None:
-            decoder = open_video_decoder(io.BytesIO(entry["bytes"]))
-            entry["decoder"] = decoder
-        return decoder
-
-    def get_frames(self, episode_index: int, camera_key: str, timestamps: list[float]):
-        span = self.manifest.lookup(episode_index, camera_key)
-        local_ts = [ts - span.source_start_pts for ts in timestamps]
-        decoder = self.get_decoder(episode_index, camera_key)
-        if hasattr(decoder, "get_frames_played_at"):
-            return decoder.get_frames_played_at(local_ts).data
-        metadata = decoder.metadata
-        fps = getattr(metadata, "average_fps", None)
-        if fps is None:
-            duration = max(getattr(metadata, "end_stream_seconds", 0.0), 1e-9)
-            fps = metadata.num_frames / duration
-        return decoder.get_frames_at(indices=[round(ts * fps) for ts in local_ts]).data
-
-    def timing_summary(self) -> dict[str, float]:
-        with self._lock:
-            summary = dict(self._timing_totals)
-        fetcher_summary = getattr(self.fetcher, "timing_summary", None)
-        if fetcher_summary is not None:
-            summary.update(fetcher_summary())
-        return summary
-
-    def _submit(self, episode_index: int, camera_key: str) -> Future[dict[str, Any]]:
-        key = (episode_index, camera_key)
-        with self._lock:
-            if key in self._cache:
-                future: Future[dict[str, Any]] = Future()
-                future.set_result(self._cache[key])
-                return future
-            future = self._futures.get(key)
-            if future is None:
-                future = self._pool.submit(self._fetch_and_synthesize, episode_index, camera_key)
-                self._futures[key] = future
-            return future
-
-    def _get_entry(self, episode_index: int, camera_key: str) -> dict[str, Any]:
-        key = (episode_index, camera_key)
-        with self._lock:
-            entry = self._cache.get(key)
-            if entry is not None:
-                self._cache.move_to_end(key)
-                return entry
-        future = self._submit(episode_index, camera_key)
-        entry = future.result()
-        store_start = time.perf_counter()
-        with self._lock:
-            self._futures.pop(key, None)
-            existing = self._cache.get(key)
-            if existing is not None:
-                self._cache.move_to_end(key)
-                return existing
-            self._cache[key] = entry
-            self._bytes += len(entry["bytes"])
-            self._evict_locked()
-            timings = entry.pop("_timings", None)
-            if timings is not None:
-                self._timing_totals["lookup_s"] += timings["lookup_s"]
-                self._timing_totals["fetch_s"] += timings["fetch_s"]
-                self._timing_totals["synthesize_s"] += timings["synthesize_s"]
-                self._timing_totals["store_s"] += time.perf_counter() - store_start
-                self._timing_totals["jobs"] += 1
-            return entry
-
-    def _evict_locked(self) -> None:
-        while self._bytes > self.byte_budget and self._cache:
-            _key, entry = self._cache.popitem(last=False)
-            self._bytes -= len(entry["bytes"])
-
-    def _fetch_and_synthesize(self, episode_index: int, camera_key: str) -> dict[str, Any]:
-        lookup_start = time.perf_counter()
-        span = self.manifest.lookup(episode_index, camera_key)
-        file_record = self.manifest.file_lookup(span.file_id)
-        sample_slice = Mp4SampleSlice(
-            sample_lo=span.sample_lo,
-            sample_hi=span.sample_hi,
-            byte_offset=span.mdat_offset,
-            byte_length=span.mdat_length,
-            source_start_pts=span.source_start_pts,
-        )
-        lookup_s = time.perf_counter() - lookup_start
-        fetch_start = time.perf_counter()
-        payload = self.fetcher.read_range(file_record.file_path, span.mdat_offset, span.mdat_length)
-        fetch_s = time.perf_counter() - fetch_start
-        if len(payload) != span.mdat_length:
-            raise OSError(
-                f"Short read for {file_record.file_path}: expected {span.mdat_length}, got {len(payload)}"
-            )
-        synthesize_start = time.perf_counter()
-        mp4_bytes = synthesize_mp4(file_record.mp4, sample_slice, payload)
-        synthesize_s = time.perf_counter() - synthesize_start
-        entry: dict[str, Any] = {
-            "bytes": mp4_bytes,
-            "decoder": None,
-            "_timings": {
-                "lookup_s": lookup_s,
-                "fetch_s": fetch_s,
-                "synthesize_s": synthesize_s,
-            },
-        }
-        if self.open_decoders:
-            entry["decoder"] = open_video_decoder(io.BytesIO(mp4_bytes))
-        return entry
-
-
-def open_video_decoder(file_like_or_bytesio, frame_mappings=None):
-    if frame_mappings is not None:
-        raise ValueError("Synthesized episode videos use a local timeline; pass frame_mappings=None.")
-    from torchcodec.decoders import VideoDecoder
-
-    return VideoDecoder(file_like_or_bytesio, seek_mode="approximate")
-
-
-def assert_hf_hub_range_cache_branch() -> None:
-    """Fail unless huggingface_hub was installed from the required range-cache branch."""
-
-    try:
-        dist = metadata.distribution("huggingface_hub")
-    except metadata.PackageNotFoundError as exc:
-        raise AssertionError("huggingface_hub is not installed") from exc
-
-    candidates = []
-    direct_url = dist.read_text("direct_url.json")
-    if direct_url:
-        candidates.append(direct_url)
-        with contextlib.suppress(json.JSONDecodeError):
-            parsed = json.loads(direct_url)
-            candidates.append(str(parsed.get("url", "")))
-            candidates.append(str(parsed.get("vcs_info", {}).get("requested_revision", "")))
-            candidates.append(str(parsed.get("vcs_info", {}).get("commit_id", "")))
-
-    text = "\n".join(candidates)
-    if "feat/hffs-cache-cdn-range-reads" not in text:
-        raise AssertionError(
-            "huggingface_hub must be installed from "
-            "git+https://github.com/huggingface/huggingface_hub.git@feat/hffs-cache-cdn-range-reads"
-        )
-
-
-@dataclass
-class StageTimer:
-    fetch_ms: float = 0.0
-    decode_ms: float = 0.0
-    bytes_read: int = 0
-    misses: int = 0
-
-    def record_fetch(self, start: float, byte_count: int) -> None:
-        self.fetch_ms += (time.perf_counter() - start) * 1000
-        self.bytes_read += byte_count
-        self.misses += 1
@@ -20,6 +20,7 @@ import datasets
 import numpy as np
 import pandas
 import pandas as pd
+import pyarrow as pa
 import pyarrow.dataset as pa_ds
 import pyarrow.parquet as pq
 import torch
@@ -270,21 +271,49 @@ def hf_transform_to_torch(items_dict: dict[str, list[Any]]) -> dict[str, list[to
    return items_dict


+def write_table_one_row_group_per_episode(table: pa.Table, path: Path) -> None:
+    """Write ``table`` with one parquet row group per episode (in episode order).
+
+    Keeps shards random-access friendly (``read_row_group(i)`` fetches episode i),
+    mirroring the recording writer. ``table`` must carry a contiguous
+    ``episode_index`` column.
+    """
+    episode_index = table.column("episode_index").to_numpy(zero_copy_only=False)
+    starts = np.concatenate(([0], np.nonzero(np.diff(episode_index))[0] + 1))
+    writer = pq.ParquetWriter(str(path), table.schema, compression="snappy", use_dictionary=True)
+    try:
+        for start, stop in zip(starts, np.append(starts[1:], len(episode_index)), strict=True):
+            writer.write_table(table.slice(start, stop - start))  # one episode -> one row group
+    finally:
+        writer.close()
+
+
 def to_parquet_with_hf_images(
    df: pandas.DataFrame, path: Path, features: datasets.Features | None = None
 ) -> None:
-    """This function correctly writes to parquet a panda DataFrame that contains images encoded by HF dataset.
-    This way, it can be loaded by HF dataset and correctly formatted images are returned.
+    """Write a DataFrame with HF-encoded images to parquet, one row group per episode.

-    Args:
-        df: DataFrame to write to parquet.
-        path: Path to write the parquet file.
-        features: Optional HuggingFace Features schema. If provided, ensures image columns
-                  are properly typed as Image() in the parquet schema.
+    Images are embedded into the arrow table first (``ParquetWriter.write_table``
+    does not embed external image files like ``Dataset.to_parquet`` does).
+    ``features`` types image columns as ``Image()`` in the parquet schema.
    """
-    # TODO(qlhoest): replace this weird synthax by `df.to_parquet(path)` only
    ds = datasets.Dataset.from_dict(df.to_dict(orient="list"), features=features)
-    ds.to_parquet(path)
+    ds = embed_images(ds)
+    table = ds.with_format("arrow")[:]
+    if "episode_index" in table.column_names:
+        write_table_one_row_group_per_episode(table, path)
+    else:
+        # No episode boundaries to align row groups to — keep a single write.
+        pq.write_table(table, str(path))
+
+
+def to_parquet_one_row_group_per_episode(df: pandas.DataFrame, path: Path) -> None:
+    """Write a (non-image) DataFrame to parquet with one row group per episode."""
+    table = pa.Table.from_pandas(df, preserve_index=False)
+    if "episode_index" in table.column_names:
+        write_table_one_row_group_per_episode(table, path)
+    else:
+        pq.write_table(table, str(path))


 def item_to_torch(item: dict) -> dict:
@@ -1,666 +0,0 @@
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-from __future__ import annotations
-
-import struct
-from collections.abc import Callable, Iterable
-from dataclasses import dataclass
-
-import numpy as np
-
-
-@dataclass(frozen=True)
-class Box:
-    type: bytes
-    start: int
-    header_size: int
-    end: int
-
-    @property
-    def payload_start(self) -> int:
-        return self.start + self.header_size
-
-    @property
-    def size(self) -> int:
-        return self.end - self.start
-
-
-@dataclass(frozen=True)
-class Mp4SampleSlice:
-    sample_lo: int
-    sample_hi: int
-    byte_offset: int
-    byte_length: int
-    source_start_pts: float
-
-
-@dataclass(frozen=True)
-class Mp4Index:
-    file_path: str
-    file_size: int
-    ftyp: bytes
-    moov_offset: int
-    mdat_offset: int
-    mdat_payload_offset: int
-    mdat_payload_size: int
-    faststart: bool
-    codec: str
-    timescale: int
-    duration: int
-    track_id: int
-    width: int
-    height: int
-    stsd_body: bytes
-    sample_pts: np.ndarray
-    sample_durations: np.ndarray
-    sample_sizes: np.ndarray
-    sample_offsets: np.ndarray
-    sync_samples: np.ndarray
-
-    def sample_slice(
-        self,
-        from_ts: float,
-        to_ts: float,
-        *,
-        keyframe_pad_s: float = 0.1,
-        keyframe_pad_fraction: float = 0.05,
-        file_size: int | None = None,
-    ) -> Mp4SampleSlice:
-        if to_ts < from_ts:
-            raise ValueError(f"Invalid timestamp span: {from_ts=} {to_ts=}")
-        if len(self.sample_pts) == 0:
-            raise ValueError(f"{self.file_path} contains no indexed samples")
-
-        pad = max(keyframe_pad_s, (to_ts - from_ts) * keyframe_pad_fraction)
-        lo_ts = max(0.0, from_ts - pad)
-        hi_ts = to_ts + pad
-        lo = int(np.searchsorted(self.sample_pts, lo_ts, side="left"))
-        hi = int(np.searchsorted(self.sample_pts, hi_ts, side="right")) - 1
-        lo = min(max(lo, 0), len(self.sample_pts) - 1)
-        hi = min(max(hi, lo), len(self.sample_pts) - 1)
-
-        if len(self.sync_samples):
-            prev_sync = self.sync_samples[self.sync_samples <= lo]
-            if len(prev_sync):
-                lo = int(prev_sync[-1])
-            else:
-                lo = int(self.sync_samples[0])
-                if lo > hi:
-                    hi = lo
-
-        offsets = self.sample_offsets[lo : hi + 1]
-        sizes = self.sample_sizes[lo : hi + 1]
-        slice_lo = int(offsets.min())
-        slice_hi = int((offsets + sizes).max())
-        if file_size is not None:
-            slice_hi = min(slice_hi, int(file_size))
-        return Mp4SampleSlice(
-            sample_lo=lo,
-            sample_hi=hi,
-            byte_offset=slice_lo,
-            byte_length=slice_hi - slice_lo,
-            source_start_pts=float(self.sample_pts[lo]),
-        )
-
-    def to_dict(self) -> dict:
-        return {
-            "file_path": self.file_path,
-            "file_size": self.file_size,
-            "ftyp": self.ftyp.hex(),
-            "moov_offset": self.moov_offset,
-            "mdat_offset": self.mdat_offset,
-            "mdat_payload_offset": self.mdat_payload_offset,
-            "mdat_payload_size": self.mdat_payload_size,
-            "faststart": self.faststart,
-            "codec": self.codec,
-            "timescale": self.timescale,
-            "duration": self.duration,
-            "track_id": self.track_id,
-            "width": self.width,
-            "height": self.height,
-            "stsd_body": self.stsd_body.hex(),
-        }
-
-    @classmethod
-    def from_dict(cls, data: dict, arrays: dict[str, np.ndarray]) -> Mp4Index:
-        return cls(
-            file_path=data["file_path"],
-            file_size=int(data["file_size"]),
-            ftyp=bytes.fromhex(data["ftyp"]),
-            moov_offset=int(data["moov_offset"]),
-            mdat_offset=int(data["mdat_offset"]),
-            mdat_payload_offset=int(data["mdat_payload_offset"]),
-            mdat_payload_size=int(data["mdat_payload_size"]),
-            faststart=bool(data["faststart"]),
-            codec=data["codec"],
-            timescale=int(data["timescale"]),
-            duration=int(data["duration"]),
-            track_id=int(data["track_id"]),
-            width=int(data["width"]),
-            height=int(data["height"]),
-            stsd_body=bytes.fromhex(data["stsd_body"]),
-            sample_pts=arrays["sample_pts"],
-            sample_durations=arrays["sample_durations"],
-            sample_sizes=arrays["sample_sizes"],
-            sample_offsets=arrays["sample_offsets"],
-            sync_samples=arrays["sync_samples"],
-        )
-
-
-def fetch_mp4_index(
-    path: str,
-    read_range: Callable[[str, int, int], bytes],
-    *,
-    file_size: int,
-    header_probe_bytes: int = 4 * 1024 * 1024,
-    max_probe_bytes: int = 64 * 1024 * 1024,
-) -> Mp4Index:
-    probe_size = min(header_probe_bytes, file_size)
-    while True:
-        data = read_range(path, 0, probe_size)
-        top = list(iter_boxes(data, 0, len(data), absolute_base=0, allow_truncated=True))
-        has_mdat = any(box.type == b"mdat" for box in top)
-        has_moov = any(box.type == b"moov" and box.end <= len(data) for box in top)
-        if has_mdat and has_moov:
-            return parse_mp4_index(path, data, file_size=file_size)
-        if probe_size >= min(max_probe_bytes, file_size):
-            if has_mdat and not has_moov:
-                tail_index = _fetch_tail_moov_index(path, read_range, data, top, file_size, max_probe_bytes)
-                if tail_index is not None:
-                    return tail_index
-            missing = []
-            if not has_mdat:
-                missing.append("mdat")
-            if not has_moov:
-                missing.append("moov")
-            raise ValueError(
-                f"Could not find complete {'/'.join(missing)} in first {probe_size} bytes of {path}"
-            )
-        probe_size = min(probe_size * 2, max_probe_bytes, file_size)
-
-
-def _fetch_tail_moov_index(
-    path: str,
-    read_range: Callable[[str, int, int], bytes],
-    prefix: bytes,
-    top_boxes: list[Box],
-    file_size: int,
-    max_probe_bytes: int,
-) -> Mp4Index | None:
-    mdat_box = _one(top_boxes, b"mdat")
-    if mdat_box is None or mdat_box.end >= file_size:
-        return None
-    tail_offset = mdat_box.end
-    tail_length = min(max_probe_bytes, file_size - tail_offset)
-    tail = read_range(path, tail_offset, tail_length)
-    tail_boxes = list(iter_boxes(tail, 0, len(tail), absolute_base=tail_offset, allow_truncated=True))
-    moov_box = next(
-        (box for box in tail_boxes if box.type == b"moov" and box.end <= tail_offset + len(tail)), None
-    )
-    if moov_box is None:
-        return None
-    ftyp_box = _one(top_boxes, b"ftyp", required=False)
-    ftyp = (
-        prefix[ftyp_box.start : ftyp_box.end]
-        if ftyp_box is not None
-        else _box(b"ftyp", b"isom\0\0\2\0isomiso2mp41")
-    )
-    moov_start = moov_box.payload_start - tail_offset
-    moov_end = moov_box.end - tail_offset
-    return _parse_mp4_index_from_layout(
-        path,
-        file_size=file_size,
-        ftyp=ftyp,
-        moov_offset=moov_box.start,
-        moov=tail[moov_start:moov_end],
-        mdat_box=mdat_box,
-    )
-
-
-def parse_mp4_index(path: str, data: bytes, *, file_size: int | None = None) -> Mp4Index:
-    if file_size is None:
-        file_size = len(data)
-    top = list(iter_boxes(data, 0, len(data), absolute_base=0, allow_truncated=True))
-    ftyp_box = _one(top, b"ftyp", required=False)
-    moov_box = _one(top, b"moov")
-    mdat_box = _one(top, b"mdat")
-    if moov_box.end > len(data):
-        raise ValueError(f"{path}: moov box is truncated")
-
-    moov = data[moov_box.payload_start : moov_box.end]
-    ftyp = (
-        data[ftyp_box.start : ftyp_box.end]
-        if ftyp_box is not None
-        else _box(b"ftyp", b"isom\0\0\2\0isomiso2mp41")
-    )
-    return _parse_mp4_index_from_layout(
-        path,
-        file_size=file_size,
-        ftyp=ftyp,
-        moov_offset=moov_box.start,
-        moov=moov,
-        mdat_box=mdat_box,
-    )
-
-
-def _parse_mp4_index_from_layout(
-    path: str,
-    *,
-    file_size: int,
-    ftyp: bytes,
-    moov_offset: int,
-    moov: bytes,
-    mdat_box: Box,
-) -> Mp4Index:
-    mvhd_timescale, mvhd_duration = _parse_mvhd(_find_descendant(moov, [b"mvhd"]))
-    trak_box, trak_payload = _find_video_trak(moov)
-    _ = trak_box
-    tkhd = _parse_tkhd(_find_descendant(trak_payload, [b"tkhd"]))
-    mdhd_timescale, mdhd_duration = _parse_mdhd(_find_descendant(trak_payload, [b"mdia", b"mdhd"]))
-    stbl = _find_descendant(trak_payload, [b"mdia", b"minf", b"stbl"])
-
-    stsd = _find_child(stbl, b"stsd")
-    stsd_body = stbl[stsd.payload_start : stsd.end]
-    codec = _parse_stsd_codec(stsd_body)
-    stts = _parse_stts(_payload(stbl, b"stts"))
-    sample_sizes = _parse_stsz(_payload(stbl, b"stsz"))
-    stsc = _parse_stsc(_payload(stbl, b"stsc"))
-    chunk_offsets = _parse_chunk_offsets(stbl)
-    sync_samples = _parse_stss(stbl, len(sample_sizes))
-
-    sample_durations = _expand_stts(stts, len(sample_sizes))
-    sample_pts_units = np.empty(len(sample_durations), dtype=np.int64)
-    if len(sample_durations):
-        sample_pts_units[0] = 0
-        if len(sample_durations) > 1:
-            sample_pts_units[1:] = np.cumsum(sample_durations[:-1], dtype=np.int64)
-    sample_pts = sample_pts_units.astype(np.float64) / float(mdhd_timescale)
-    sample_offsets = _sample_offsets(stsc, chunk_offsets, sample_sizes)
-
-    return Mp4Index(
-        file_path=path,
-        file_size=file_size,
-        ftyp=ftyp,
-        moov_offset=moov_offset,
-        mdat_offset=mdat_box.start,
-        mdat_payload_offset=mdat_box.payload_start,
-        mdat_payload_size=mdat_box.end - mdat_box.payload_start
-        if mdat_box.end <= file_size
-        else file_size - mdat_box.payload_start,
-        faststart=moov_offset < mdat_box.start,
-        codec=codec,
-        timescale=mdhd_timescale,
-        duration=mdhd_duration or mvhd_duration,
-        track_id=tkhd["track_id"],
-        width=tkhd["width"],
-        height=tkhd["height"],
-        stsd_body=stsd_body,
-        sample_pts=sample_pts,
-        sample_durations=sample_durations,
-        sample_sizes=sample_sizes,
-        sample_offsets=sample_offsets,
-        sync_samples=sync_samples,
-    )
-
-
-def synthesize_mp4(index: Mp4Index, sample_slice: Mp4SampleSlice, mdat_payload: bytes) -> bytes:
-    lo = sample_slice.sample_lo
-    hi = sample_slice.sample_hi + 1
-    if lo < 0 or hi > len(index.sample_sizes) or lo >= hi:
-        raise ValueError(f"Invalid sample range [{lo}, {hi}) for {index.file_path}")
-
-    offsets = index.sample_offsets[lo:hi]
-    sizes = index.sample_sizes[lo:hi]
-    rel_offsets = offsets - sample_slice.byte_offset
-    if int(rel_offsets.min()) != 0:
-        raise ValueError("Sample slice must start at the minimum referenced sample offset")
-    if int((rel_offsets + sizes).max()) > len(mdat_payload):
-        raise ValueError("Sample slice does not cover all referenced samples")
-
-    durations = index.sample_durations[lo:hi]
-    sync = index.sync_samples[(index.sync_samples >= lo) & (index.sync_samples < hi)] - lo + 1
-    moov = _make_moov(index, durations, sizes, rel_offsets, sync, mdat_data_offset=0)
-    header_size = len(index.ftyp) + len(moov)
-    moov = _make_moov(index, durations, sizes, rel_offsets, sync, mdat_data_offset=header_size + 8)
-    return index.ftyp + moov + _box(b"mdat", mdat_payload)
-
-
-def iter_boxes(
-    data: bytes,
-    start: int,
-    end: int,
-    *,
-    absolute_base: int = 0,
-    allow_truncated: bool = False,
-) -> Iterable[Box]:
-    pos = start
-    while pos + 8 <= end:
-        size = struct.unpack_from(">I", data, pos)[0]
-        typ = data[pos + 4 : pos + 8]
-        header_size = 8
-        if size == 1:
-            if pos + 16 > end:
-                break
-            size = struct.unpack_from(">Q", data, pos + 8)[0]
-            header_size = 16
-        elif size == 0:
-            size = end - pos
-        if size < header_size:
-            break
-        box_end = pos + size
-        if box_end > end and not allow_truncated:
-            break
-        yield Box(typ, absolute_base + pos, header_size, absolute_base + box_end)
-        pos = box_end
-
-
-def _find_video_trak(moov: bytes) -> tuple[Box, bytes]:
-    for trak in _children(moov, 0, len(moov)):
-        if trak.type != b"trak":
-            continue
-        payload = moov[trak.payload_start : trak.end]
-        hdlr = _find_descendant(payload, [b"mdia", b"hdlr"])
-        if hdlr[8:12] == b"vide":
-            return trak, payload
-    raise ValueError("No video track found")
-
-
-def _find_descendant(data: bytes, path: list[bytes]) -> bytes:
-    current = data
-    for typ in path:
-        box = _find_child(current, typ)
-        current = current[box.payload_start : box.end]
-    return current
-
-
-def _find_child(data: bytes, typ: bytes) -> Box:
-    for box in _children(data, 0, len(data)):
-        if box.type == typ:
-            return box
-    raise ValueError(f"Missing MP4 box {typ.decode('latin1')}")
-
-
-def _children(data: bytes, start: int, end: int) -> Iterable[Box]:
-    return iter_boxes(data, start, end, absolute_base=0)
-
-
-def _one(boxes: list[Box], typ: bytes, *, required: bool = True) -> Box | None:
-    matches = [box for box in boxes if box.type == typ]
-    if not matches and required:
-        raise ValueError(f"Missing MP4 box {typ.decode('latin1')}")
-    return matches[0] if matches else None
-
-
-def _payload(parent: bytes, typ: bytes) -> bytes:
-    box = _find_child(parent, typ)
-    return parent[box.payload_start : box.end]
-
-
-def _parse_mvhd(payload: bytes) -> tuple[int, int]:
-    version = payload[0]
-    if version == 1:
-        return struct.unpack_from(">IQ", payload, 20)
-    return struct.unpack_from(">II", payload, 12)
-
-
-def _parse_mdhd(payload: bytes) -> tuple[int, int]:
-    version = payload[0]
-    if version == 1:
-        return struct.unpack_from(">IQ", payload, 20)
-    return struct.unpack_from(">II", payload, 12)
-
-
-def _parse_tkhd(payload: bytes) -> dict[str, int]:
-    version = payload[0]
-    if version == 1:
-        track_id = struct.unpack_from(">I", payload, 20)[0]
-        duration = struct.unpack_from(">Q", payload, 28)[0]
-        width, height = struct.unpack_from(">II", payload, 88)
-    else:
-        track_id = struct.unpack_from(">I", payload, 12)[0]
-        duration = struct.unpack_from(">I", payload, 20)[0]
-        width, height = struct.unpack_from(">II", payload, 76)
-    return {"track_id": track_id, "duration": duration, "width": width >> 16, "height": height >> 16}
-
-
-def _parse_stsd_codec(stsd_body: bytes) -> str:
-    if len(stsd_body) < 16:
-        return "unknown"
-    return stsd_body[12:16].decode("latin1")
-
-
-def _parse_stts(payload: bytes) -> list[tuple[int, int]]:
-    count = struct.unpack_from(">I", payload, 4)[0]
-    out = []
-    offset = 8
-    for _ in range(count):
-        out.append(struct.unpack_from(">II", payload, offset))
-        offset += 8
-    return out
-
-
-def _expand_stts(entries: list[tuple[int, int]], sample_count: int) -> np.ndarray:
-    values = np.empty(sample_count, dtype=np.int64)
-    pos = 0
-    for count, delta in entries:
-        values[pos : pos + count] = delta
-        pos += count
-    if pos != sample_count:
-        raise ValueError(f"stts describes {pos} samples, stsz describes {sample_count}")
-    return values
-
-
-def _parse_stsz(payload: bytes) -> np.ndarray:
-    sample_size, sample_count = struct.unpack_from(">II", payload, 4)
-    if sample_size:
-        return np.full(sample_count, sample_size, dtype=np.int64)
-    offset = 12
-    values = np.empty(sample_count, dtype=np.int64)
-    for idx in range(sample_count):
-        values[idx] = struct.unpack_from(">I", payload, offset)[0]
-        offset += 4
-    return values
-
-
-def _parse_stsc(payload: bytes) -> list[tuple[int, int, int]]:
-    count = struct.unpack_from(">I", payload, 4)[0]
-    out = []
-    offset = 8
-    for _ in range(count):
-        out.append(struct.unpack_from(">III", payload, offset))
-        offset += 12
-    return out
-
-
-def _parse_chunk_offsets(stbl: bytes) -> np.ndarray:
-    with_stco = None
-    with_co64 = None
-    for box in _children(stbl, 0, len(stbl)):
-        if box.type == b"stco":
-            with_stco = stbl[box.payload_start : box.end]
-        elif box.type == b"co64":
-            with_co64 = stbl[box.payload_start : box.end]
-    if with_co64 is not None:
-        count = struct.unpack_from(">I", with_co64, 4)[0]
-        return np.array(
-            [struct.unpack_from(">Q", with_co64, 8 + idx * 8)[0] for idx in range(count)], dtype=np.int64
-        )
-    if with_stco is None:
-        raise ValueError("Missing stco/co64 chunk offsets")
-    count = struct.unpack_from(">I", with_stco, 4)[0]
-    return np.array(
-        [struct.unpack_from(">I", with_stco, 8 + idx * 4)[0] for idx in range(count)], dtype=np.int64
-    )
-
-
-def _parse_stss(stbl: bytes, sample_count: int) -> np.ndarray:
-    for box in _children(stbl, 0, len(stbl)):
-        if box.type == b"stss":
-            payload = stbl[box.payload_start : box.end]
-            count = struct.unpack_from(">I", payload, 4)[0]
-            return np.array(
-                [struct.unpack_from(">I", payload, 8 + idx * 4)[0] - 1 for idx in range(count)],
-                dtype=np.int64,
-            )
-    return np.arange(sample_count, dtype=np.int64)
-
-
-def _sample_offsets(
-    stsc: list[tuple[int, int, int]], chunk_offsets: np.ndarray, sample_sizes: np.ndarray
-) -> np.ndarray:
-    if not stsc:
-        raise ValueError("stsc is empty")
-    offsets = np.empty(len(sample_sizes), dtype=np.int64)
-    sample_idx = 0
-    for entry_idx, (first_chunk, samples_per_chunk, _desc_idx) in enumerate(stsc):
-        next_first = stsc[entry_idx + 1][0] if entry_idx + 1 < len(stsc) else len(chunk_offsets) + 1
-        for chunk_number in range(first_chunk, next_first):
-            if chunk_number < 1 or chunk_number > len(chunk_offsets):
-                raise ValueError("stsc references a chunk outside stco/co64")
-            chunk_pos = int(chunk_offsets[chunk_number - 1])
-            for _ in range(samples_per_chunk):
-                if sample_idx >= len(sample_sizes):
-                    return offsets
-                offsets[sample_idx] = chunk_pos
-                chunk_pos += int(sample_sizes[sample_idx])
-                sample_idx += 1
-    if sample_idx != len(sample_sizes):
-        raise ValueError(f"stsc describes {sample_idx} samples, stsz describes {len(sample_sizes)}")
-    return offsets
-
-
-def _make_moov(
-    index: Mp4Index,
-    durations: np.ndarray,
-    sizes: np.ndarray,
-    rel_offsets: np.ndarray,
-    sync_samples: np.ndarray,
-    *,
-    mdat_data_offset: int,
-) -> bytes:
-    duration = int(durations.sum())
-    stco_values = [int(mdat_data_offset + value) for value in rel_offsets]
-    if any(value > 0xFFFFFFFF for value in stco_values):
-        offset_box = _co64(stco_values)
-    else:
-        offset_box = _stco(stco_values)
-    stbl = _box(
-        b"stbl",
-        _box(b"stsd", index.stsd_body)
-        + _stts(durations)
-        + _stsc_one_sample_per_chunk(len(sizes))
-        + _stsz(sizes)
-        + offset_box
-        + (_stss(sync_samples) if len(sync_samples) else b""),
-    )
-    minf = _box(b"minf", _vmhd() + _dinf() + stbl)
-    mdia = _box(b"mdia", _mdhd(index.timescale, duration) + _hdlr() + minf)
-    trak = _box(b"trak", _tkhd(index.track_id, duration, index.width, index.height) + mdia)
-    return _box(b"moov", _mvhd(index.timescale, duration, index.track_id + 1) + trak)
-
-
-def _full_box(typ: bytes, version: int, flags: int, payload: bytes = b"") -> bytes:
-    return _box(typ, bytes([version]) + flags.to_bytes(3, "big") + payload)
-
-
-def _box(typ: bytes, payload: bytes) -> bytes:
-    size = len(payload) + 8
-    if size <= 0xFFFFFFFF:
-        return struct.pack(">I4s", size, typ) + payload
-    return struct.pack(">I4sQ", 1, typ, size + 8) + payload
-
-
-def _mvhd(timescale: int, duration: int, next_track_id: int) -> bytes:
-    matrix = struct.pack(">9I", 0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000)
-    payload = (
-        struct.pack(">IIII", 0, 0, timescale, duration)
-        + struct.pack(">IHH", 0x00010000, 0x0100, 0)
-        + b"\0" * 8
-        + matrix
-        + b"\0" * 24
-        + struct.pack(">I", next_track_id)
-    )
-    return _full_box(b"mvhd", 0, 0, payload)
-
-
-def _tkhd(track_id: int, duration: int, width: int, height: int) -> bytes:
-    matrix = struct.pack(">9I", 0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000)
-    payload = (
-        struct.pack(">IIIII", 0, 0, track_id, 0, duration)
-        + b"\0" * 8
-        + struct.pack(">hhhh", 0, 0, 0, 0)
-        + matrix
-        + struct.pack(">II", width << 16, height << 16)
-    )
-    return _full_box(b"tkhd", 0, 7, payload)
-
-
-def _mdhd(timescale: int, duration: int) -> bytes:
-    return _full_box(b"mdhd", 0, 0, struct.pack(">IIIIH", 0, 0, timescale, duration, 0x55C4) + b"\0\0")
-
-
-def _hdlr() -> bytes:
-    return _full_box(b"hdlr", 0, 0, b"\0" * 4 + b"vide" + b"\0" * 12 + b"VideoHandler\0")
-
-
-def _vmhd() -> bytes:
-    return _full_box(b"vmhd", 0, 1, struct.pack(">HHHH", 0, 0, 0, 0))
-
-
-def _dinf() -> bytes:
-    url = _full_box(b"url ", 0, 1)
-    dref = _full_box(b"dref", 0, 0, struct.pack(">I", 1) + url)
-    return _box(b"dinf", dref)
-
-
-def _stts(durations: np.ndarray) -> bytes:
-    runs = []
-    for duration in durations.tolist():
-        if runs and runs[-1][1] == int(duration):
-            runs[-1][0] += 1
-        else:
-            runs.append([1, int(duration)])
-    payload = struct.pack(">I", len(runs)) + b"".join(
-        struct.pack(">II", count, delta) for count, delta in runs
-    )
-    return _full_box(b"stts", 0, 0, payload)
-
-
-def _stsc_one_sample_per_chunk(sample_count: int) -> bytes:
-    return _full_box(b"stsc", 0, 0, struct.pack(">IIII", 1, 1, 1, 1))
-
-
-def _stsz(sizes: np.ndarray) -> bytes:
-    return _full_box(
-        b"stsz",
-        0,
-        0,
-        struct.pack(">II", 0, len(sizes)) + b"".join(struct.pack(">I", int(size)) for size in sizes.tolist()),
-    )
-
-
-def _stco(values: list[int]) -> bytes:
-    return _full_box(
-        b"stco", 0, 0, struct.pack(">I", len(values)) + b"".join(struct.pack(">I", v) for v in values)
-    )
-
-
-def _co64(values: list[int]) -> bytes:
-    return _full_box(
-        b"co64", 0, 0, struct.pack(">I", len(values)) + b"".join(struct.pack(">Q", v) for v in values)
-    )
-
-
-def _stss(values: np.ndarray) -> bytes:
-    return _full_box(
-        b"stss",
-        0,
-        0,
-        struct.pack(">I", len(values)) + b"".join(struct.pack(">I", int(value)) for value in values.tolist()),
-    )
@@ -28,6 +28,7 @@ import pytest
 pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])")
 pytest.importorskip("pandas", reason="pandas is required (install lerobot[dataset])")

+import pandas as pd  # noqa: E402
 import pyarrow.parquet as pq  # noqa: E402

 from lerobot.annotations.steerable_pipeline.reader import iter_episodes  # noqa: E402
@@ -344,6 +345,78 @@ def test_annotation_metadata_sync_allows_non_streaming_load(
    assert len(dataset) == 24


+def _build_packed_dataset(root: Path, episode_lengths: list[int], *, fps: int = 10) -> Path:
+    """Pack several episodes into a single shard (vs build_annotation_dataset's one-per-file),
+    so the writer's rewrite must re-emit one row group per episode instead of collapsing them."""
+    from lerobot.datasets.io_utils import write_tasks
+    from lerobot.utils.io_utils import write_json
+
+    data_dir = root / "data" / "chunk-000"
+    data_dir.mkdir(parents=True, exist_ok=True)
+
+    episode_index, frame_index, timestamp, task_index, subtask_index = [], [], [], [], []
+    for ep, length in enumerate(episode_lengths):
+        episode_index += [ep] * length
+        frame_index += list(range(length))
+        timestamp += [round(i / fps, 6) for i in range(length)]
+        task_index += [0] * length
+        subtask_index += [0] * length  # legacy column the writer must drop
+    pd.DataFrame(
+        {
+            "episode_index": episode_index,
+            "frame_index": frame_index,
+            "timestamp": timestamp,
+            "task_index": task_index,
+            "subtask_index": subtask_index,
+        }
+    ).to_parquet(data_dir / "file-000.parquet", index=False)
+
+    tasks_df = pd.DataFrame({"task_index": [0]}, index=pd.Index(["do the thing"], name="task"))
+    write_tasks(tasks_df, root)
+    write_json(
+        {"codebase_version": "v3.1", "fps": fps, "features": {}, "total_episodes": len(episode_lengths)},
+        root / "meta" / "info.json",
+    )
+    return root
+
+
+def test_writer_one_row_group_per_episode(tmp_path: Path) -> None:
+    """Rewriting a packed shard must keep one row group per episode, not collapse
+    every episode into a single giant row group."""
+    episode_lengths = [4, 6, 5]  # unequal lengths, all in one shard
+    root = _build_packed_dataset(tmp_path / "ds", episode_lengths)
+    shard = root / "data" / "chunk-000" / "file-000.parquet"
+    assert pq.ParquetFile(shard).metadata.num_row_groups == 1, "fixture should start collapsed"
+
+    staging_dir = tmp_path / "stage"
+    for ep in range(len(episode_lengths)):
+        _stage_episode(
+            staging_dir,
+            ep,
+            plan=[
+                {
+                    "role": "assistant",
+                    "content": f"subtask for ep {ep}",
+                    "style": "subtask",
+                    "timestamp": 0.0,
+                    "tool_calls": None,
+                }
+            ],
+        )
+
+    records = list(iter_episodes(root))
+    LanguageColumnsWriter().write_all(records, staging_dir, root)
+
+    # One row group per episode, with row counts matching the episode lengths.
+    md = pq.ParquetFile(shard).metadata
+    assert md.num_row_groups == len(episode_lengths)
+    assert [md.row_group(i).num_rows for i in range(md.num_row_groups)] == episode_lengths
+    # Language columns are still present after the per-episode rewrite.
+    table = pq.read_table(shard)
+    assert "language_persistent" in table.column_names
+    assert "language_events" in table.column_names
+
+
 def test_speech_atom_shape_matches_plan_spec() -> None:
    atom = speech_atom(2.5, "I'm cleaning up!")
    assert atom["role"] == "assistant"
@@ -32,6 +32,26 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from tests.fixtures.constants import DUMMY_REPO_ID


+def assert_data_shards_one_row_group_per_episode(root):
+    """Every aggregated DATA shard must have exactly one parquet row group per episode."""
+    import pyarrow.parquet as pq
+
+    shards = sorted((root / "data").rglob("*.parquet"))
+    assert shards, f"no data shards found under {root}/data"
+    n_episodes = 0
+    for shard in shards:
+        pf = pq.ParquetFile(shard)
+        episodes = pf.read(columns=["episode_index"]).column("episode_index").to_pylist()
+        assert pf.metadata.num_row_groups == len(set(episodes)), shard
+        for i in range(pf.metadata.num_row_groups):
+            rg_episodes = set(
+                pf.read_row_group(i, columns=["episode_index"]).column("episode_index").to_pylist()
+            )
+            assert len(rg_episodes) == 1, f"{shard} row group {i} spans episodes {rg_episodes}"
+        n_episodes += len(set(episodes))
+    return n_episodes
+
+
 def assert_episode_and_frame_counts(aggr_ds, expected_episodes, expected_frames):
    """Test that total number of episodes and frames are correctly aggregated."""
    assert aggr_ds.num_episodes == expected_episodes, (
@@ -566,6 +586,41 @@ def assert_image_frames_integrity(aggr_ds, ds_0, ds_1):
            )


+@pytest.mark.parametrize("use_videos", [True, False], ids=["video", "image"])
+def test_aggregate_one_row_group_per_episode(tmp_path, lerobot_dataset_factory, use_videos):
+    """Aggregated DATA shards keep one row group per episode (not one collapsed group).
+
+    Covers both the non-image (``df.to_parquet``) and image
+    (``to_parquet_with_hf_images``) write branches, including the merge-into-
+    existing-file branch via a low file-size threshold that forces packing.
+    """
+    ds_0 = lerobot_dataset_factory(
+        root=tmp_path / "rg_0",
+        repo_id=f"{DUMMY_REPO_ID}_rg_0",
+        total_episodes=3,
+        total_frames=60,
+        use_videos=use_videos,
+    )
+    ds_1 = lerobot_dataset_factory(
+        root=tmp_path / "rg_1",
+        repo_id=f"{DUMMY_REPO_ID}_rg_1",
+        total_episodes=4,
+        total_frames=80,
+        use_videos=use_videos,
+    )
+
+    aggr_root = tmp_path / "rg_aggr"
+    aggregate_datasets(
+        repo_ids=[ds_0.repo_id, ds_1.repo_id],
+        roots=[ds_0.root, ds_1.root],
+        aggr_repo_id=f"{DUMMY_REPO_ID}_rg_aggr",
+        aggr_root=aggr_root,
+    )
+
+    n_episodes = assert_data_shards_one_row_group_per_episode(aggr_root)
+    assert n_episodes == ds_0.num_episodes + ds_1.num_episodes
+
+
 def test_aggregate_image_datasets(tmp_path, lerobot_dataset_factory):
    """Test aggregation of image-based datasets preserves HuggingFace Image schema.

@@ -51,7 +51,7 @@ from lerobot.robots import make_robot_from_config
 from lerobot.transforms import ImageTransforms, ImageTransformsConfig
 from lerobot.utils.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, OBS_STR, REWARD
 from lerobot.utils.feature_utils import hw_to_dataset_features
-from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID
+from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_MOTOR_FEATURES, DUMMY_REPO_ID
 from tests.mocks.mock_robot import MockRobotConfig
 from tests.utils import require_x86_64_kernel

@@ -133,6 +133,21 @@ def test_dataset_feature_with_forward_slash_raises_error():
        )


+def test_create_does_not_mutate_input_features(tmp_path, empty_lerobot_dataset_factory):
+    # ``create`` must deep-copy features so a dataset built from another's features stays independent.
+    dataset = empty_lerobot_dataset_factory(
+        root=tmp_path / "ds1", features=DUMMY_MOTOR_FEATURES, use_videos=False
+    )
+    dataset_copy = empty_lerobot_dataset_factory(
+        root=tmp_path / "ds2", features=dataset.meta.features, use_videos=False
+    )
+
+    original_shape = dataset.meta.info.features["state"]["shape"]
+    dataset_copy.meta.info.features["state"]["shape"] = (999,)
+
+    assert dataset.meta.info.features["state"]["shape"] == original_shape
+
+
 def test_add_frame_missing_task(tmp_path, empty_lerobot_dataset_factory):
    features = {"state": {"dtype": "float32", "shape": (1,), "names": None}}
    dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features)
@@ -1,121 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-import json
-import struct
-
-import numpy as np
-import pytest
-
-from lerobot.datasets.episode_video_streaming import assert_hf_hub_range_cache_branch
-from lerobot.datasets.mp4 import (
-    _box,
-    _co64,
-    _dinf,
-    _hdlr,
-    _mdhd,
-    _mvhd,
-    _stco,
-    _stsc_one_sample_per_chunk,
-    _stss,
-    _stsz,
-    _stts,
-    _tkhd,
-    _vmhd,
-    parse_mp4_index,
-    synthesize_mp4,
-)
-
-
-def _minimal_mp4(sample_offsets: list[int], *, use_co64: bool = False) -> bytes:
-    ftyp = _box(b"ftyp", b"isom\0\0\2\0isomiso2mp41")
-    sizes = np.array([10, 10, 10], dtype=np.int64)
-    durations = np.array([1000, 1000, 1000], dtype=np.int64)
-    stsd_body = struct.pack(">II", 0, 1) + struct.pack(">I4s", 16, b"avc1") + b"\0" * 8
-    offsets = _co64(sample_offsets) if use_co64 else _stco(sample_offsets)
-    stbl = _box(
-        b"stbl",
-        _box(b"stsd", stsd_body)
-        + _stts(durations)
-        + _stsc_one_sample_per_chunk(len(sizes))
-        + _stsz(sizes)
-        + offsets
-        + _stss(np.array([1], dtype=np.int64)),
-    )
-    minf = _box(b"minf", _vmhd() + _dinf() + stbl)
-    mdia = _box(b"mdia", _mdhd(1000, 3000) + _hdlr() + minf)
-    trak = _box(b"trak", _tkhd(1, 3000, 64, 48) + mdia)
-    moov = _box(b"moov", _mvhd(1000, 3000, 2) + trak)
-    mdat_payload_start = 10_000
-    free_size = mdat_payload_start - 8 - len(ftyp) - len(moov)
-    assert free_size >= 8
-    free = _box(b"free", b"\0" * (free_size - 8))
-    return ftyp + moov + free + _box(b"mdat", b"x" * 128)
-
-
-def test_episode_slice_uses_min_max_sample_offsets_for_reordered_chunks():
-    mp4 = parse_mp4_index("test.mp4", _minimal_mp4([10_000, 10_050, 10_025]))
-
-    sample_slice = mp4.sample_slice(0.0, 2.0, keyframe_pad_s=0, keyframe_pad_fraction=0)
-
-    assert sample_slice.byte_offset == 10_000
-    assert sample_slice.byte_length == 60
-    assert sample_slice.sample_lo == 0
-    assert sample_slice.sample_hi == 2
-
-
-def test_synthesized_mp4_rebases_one_chunk_per_sample_offsets():
-    mp4 = parse_mp4_index("test.mp4", _minimal_mp4([10_000, 10_050, 10_025]))
-    sample_slice = mp4.sample_slice(0.0, 2.0, keyframe_pad_s=0, keyframe_pad_fraction=0)
-
-    mini = synthesize_mp4(mp4, sample_slice, b"x" * sample_slice.byte_length)
-    mini_index = parse_mp4_index("mini.mp4", mini)
-
-    expected = np.array([0, 50, 25], dtype=np.int64) + mini_index.mdat_payload_offset
-    np.testing.assert_array_equal(mini_index.sample_offsets, expected)
-    np.testing.assert_array_equal(mini_index.sample_sizes, np.array([10, 10, 10]))
-
-
-def test_parser_accepts_co64_chunk_offsets():
-    mp4 = parse_mp4_index("test.mp4", _minimal_mp4([10_000, 10_050, 10_025], use_co64=True))
-
-    np.testing.assert_array_equal(mp4.sample_offsets, np.array([10_000, 10_050, 10_025]))
-
-
-def test_hf_hub_branch_assertion_accepts_requested_revision(monkeypatch):
-    class FakeDist:
-        def read_text(self, name):
-            assert name == "direct_url.json"
-            return json.dumps(
-                {
-                    "url": "https://github.com/huggingface/huggingface_hub.git",
-                    "vcs_info": {"requested_revision": "feat/hffs-cache-cdn-range-reads"},
-                }
-            )
-
-    monkeypatch.setattr(
-        "lerobot.datasets.episode_video_streaming.metadata.distribution", lambda _: FakeDist()
-    )
-
-    assert_hf_hub_range_cache_branch()
-
-
-def test_hf_hub_branch_assertion_rejects_plain_install(monkeypatch):
-    class FakeDist:
-        def read_text(self, name):
-            assert name == "direct_url.json"
-            return json.dumps({"url": "https://github.com/huggingface/huggingface_hub.git"})
-
-    monkeypatch.setattr(
-        "lerobot.datasets.episode_video_streaming.metadata.distribution", lambda _: FakeDist()
-    )
-
-    with pytest.raises(AssertionError):
-        assert_hf_hub_range_cache_branch()
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
    "(python_full_version >= '3.15' and platform_machine == 'AMD64' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'x86_64' and sys_platform == 'linux')",
@@ -10,42 +10,26 @@ resolution-markers = [
    "(python_full_version == '3.14.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version == '3.13.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'linux')",
-    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'linux'",
+    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
    "python_full_version >= '3.15' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'darwin'",
-    "(python_full_version >= '3.15' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version < '3.13' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version < '3.13' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'win32'",
+    "(python_full_version >= '3.15' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version >= '3.15' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version >= '3.15' and sys_platform == 'emscripten'",
+    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version < '3.13' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version == '3.14.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
+    "python_full_version < '3.13' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.15' and sys_platform == 'win32'",
+    "python_full_version == '3.14.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
 ]

 [[package]]
@@ -224,15 +208,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d

 [[package]]
 name = "anyio"
-version = "4.13.0"
+version = "4.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "idna" },
    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/b5/001890774a9552aff22502b8da382593109ce0c95314abaebbb116567545/anyio-4.14.0.tar.gz", hash = "sha256:b47c1f9ccf73e67021df785332508f99379c68fa7d0684e8e3492cb1d4b23f89", size = 253586, upload-time = "2026-06-15T22:00:49.021Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/16/9826f089383c593cdfc4a6e5aca94d9e91ae1692c57af82c3b2aa5e810f7/anyio-4.14.0-py3-none-any.whl", hash = "sha256:dd9b7a2a9799ed6552fde617b2c5df02b7fdd7d88392fc48101e51bae46164d9", size = 123506, upload-time = "2026-06-15T22:00:47.595Z" },
 ]

 [[package]]
@@ -1089,8 +1073,8 @@ wheels = [

 [[package]]
 name = "datasets"
-version = "5.0.1.dev0"
-source = { git = "https://github.com/huggingface/datasets.git?branch=main#06fcc085fcdd22fc5cc741954f6187dd879543b6" }
+version = "4.8.5"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "dill" },
    { name = "filelock" },
@@ -1107,6 +1091,10 @@ dependencies = [
    { name = "tqdm" },
    { name = "xxhash" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/66/34/14cd8e76f907f7d4dca2334cfeec9f81d30fd15c25a015f99aaea694eaed/datasets-4.8.5.tar.gz", hash = "sha256:0f0c1c3d56ffff2c93b2f4c63c95bac94f3d7e8621aea2a2a576275233bba772", size = 605649, upload-time = "2026-04-27T15:43:57.384Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/99/00f3196036501b53032c4b1ab8337a0b978dee832ed276dae3815df4e8b5/datasets-4.8.5-py3-none-any.whl", hash = "sha256:5079900781719c0e063a8efdd2cd95a31ad0c63209178669cd23cf1b926149ff", size = 528973, upload-time = "2026-04-27T15:43:53.702Z" },
+]

 [[package]]
 name = "debugpy"
@@ -1143,7 +1131,7 @@ name = "decord"
 version = "0.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "(platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/11/79/936af42edf90a7bd4e41a6cac89c913d4b47fa48a26b042d5129a9242ee3/decord-0.6.0-py3-none-manylinux2010_x86_64.whl", hash = "sha256:51997f20be8958e23b7c4061ba45d0efcd86bffd5fe81c695d0befee0d442976", size = 13602299, upload-time = "2021-06-14T21:30:55.486Z" },
@@ -1433,7 +1421,7 @@ wheels = [

 [[package]]
 name = "fastapi"
-version = "0.137.0"
+version = "0.137.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "annotated-doc" },
@@ -1442,9 +1430,9 @@ dependencies = [
    { name = "typing-extensions" },
    { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/da/fe/fb25c287ff7e0f79fc6acf2e8b812725dad28d2a1446c0410bab1422ac90/fastapi-0.137.0.tar.gz", hash = "sha256:d0565d551f65a803ecff245390840867186f456ef98971f750724eed16e1541c", size = 408023, upload-time = "2026-06-14T12:51:30.672Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d5/b1/e5b92c59d2c37817e77c1a8c2fc1f79cdcc04c68253e5406b43e3204cba7/fastapi-0.137.1.tar.gz", hash = "sha256:822360704230d9533d8d9475399613525968aa2f0b5bd2a3ccc9f18c88fd541c", size = 408293, upload-time = "2026-06-15T11:28:20.79Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/f1/b38481428e50131e5345b535414d11d196f14990122fe69c9020c64e5683/fastapi-0.137.0-py3-none-any.whl", hash = "sha256:6dcbde8d464f92117c1accb9e42720f8e423fa9b86cb563b1f5862f785a06498", size = 121777, upload-time = "2026-06-14T12:51:29.067Z" },
+    { url = "https://files.pythonhosted.org/packages/da/35/380b9a5922f4340e51c309cde09e5bd32e62f02302971bee30dc15aa0624/fastapi-0.137.1-py3-none-any.whl", hash = "sha256:64f6983c59e45c4b9fdc44e57cb8035c2451ee91ea8e8ec042aca37de7cf6b69", size = 121877, upload-time = "2026-06-15T11:28:19.523Z" },
 ]

 [[package]]
@@ -2046,8 +2034,8 @@ wheels = [

 [[package]]
 name = "huggingface-hub"
-version = "1.20.0.dev0"
-source = { git = "https://github.com/huggingface/huggingface_hub.git?branch=feat%2Fhffs-cache-cdn-range-reads#5319b287faa73239bb40df16d69c39e5d6daf0f7" }
+version = "1.19.0"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "click" },
    { name = "filelock" },
@@ -2060,6 +2048,10 @@ dependencies = [
    { name = "typer" },
    { name = "typing-extensions" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/88/27/629cfe58c582f92ded066c4a07d1a057ff617118ab7973200f770bd853cb/huggingface_hub-1.19.0.tar.gz", hash = "sha256:fd771622182d40977272a923953ee3b1b13538f9f8a7f5d78398f10af0f1c0bd", size = 824721, upload-time = "2026-06-11T12:33:18.665Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a5/558da89f66464d8d0229ff497e8b8666977de2d8cf48c28a2862ecf1250f/huggingface_hub-1.19.0-py3-none-any.whl", hash = "sha256:1dc72e1f6b4d6df6b30eb72e57d00514ef453d660f04af2b87f0e67267f31ee0", size = 693398, upload-time = "2026-06-11T12:33:16.695Z" },
+]

 [[package]]
 name = "hydra-core"
@@ -3179,7 +3171,7 @@ requires-dist = [
    { name = "av", marker = "extra == 'av-dep'", specifier = ">=15.0.0,<16.0.0" },
    { name = "cmake", specifier = ">=3.29.0.1,<4.2.0" },
    { name = "contourpy", marker = "extra == 'matplotlib-dep'", specifier = ">=1.3.0,<2.0.0" },
-    { name = "datasets", marker = "extra == 'dataset'", git = "https://github.com/huggingface/datasets.git?branch=main" },
+    { name = "datasets", marker = "extra == 'dataset'", specifier = ">=4.7.0,<5.0.0" },
    { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.1,<1.9.0" },
    { name = "decord", marker = "(platform_machine == 'AMD64' and extra == 'groot') or (platform_machine == 'x86_64' and extra == 'groot')", specifier = ">=0.6.0,<1.0.0" },
    { name = "deepdiff", marker = "extra == 'deepdiff-dep'", specifier = ">=7.0.1,<9.0.0" },
@@ -3202,7 +3194,7 @@ requires-dist = [
    { name = "hebi-py", marker = "extra == 'phone'", specifier = ">=2.8.0,<2.12.0" },
    { name = "hf-libero", marker = "sys_platform == 'linux' and extra == 'libero'", specifier = ">=0.1.4,<0.2.0" },
    { name = "hidapi", marker = "extra == 'gamepad'", specifier = ">=0.14.0,<0.15.0" },
-    { name = "huggingface-hub", git = "https://github.com/huggingface/huggingface_hub.git?branch=feat%2Fhffs-cache-cdn-range-reads" },
+    { name = "huggingface-hub", specifier = ">=1.0.0,<2.0.0" },
    { name = "ipykernel", marker = "extra == 'notebook'", specifier = ">=6.0.0,<7.0.0" },
    { name = "jsonlines", marker = "extra == 'dataset'", specifier = ">=4.0.0,<5.0.0" },
    { name = "jupyter", marker = "extra == 'notebook'", specifier = ">=1.0.0,<2.0.0" },
@@ -4420,7 +4412,7 @@ wheels = [

 [[package]]
 name = "onnx"
-version = "1.21.0"
+version = "1.22.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "ml-dtypes" },
@@ -4428,29 +4420,25 @@ dependencies = [
    { name = "protobuf" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c5/93/942d2a0f6a70538eea042ce0445c8aefd46559ad153469986f29a743c01c/onnx-1.21.0.tar.gz", hash = "sha256:4d8b67d0aaec5864c87633188b91cc520877477ec0254eda122bef8be43cd764", size = 12074608, upload-time = "2026-03-27T21:33:36.118Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/19/8ea73a64b368b75fe339771a20a02bc61ea1f551484c9e3d9d0bfbd0450f/onnx-1.22.0.tar.gz", hash = "sha256:ef40c0aaf0b643857ea9306fc7eddce17eaf9fb0407e4801f1fc5758443a38e0", size = 12024721, upload-time = "2026-06-15T12:50:05.354Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/ae/cb644ec84c25e63575d9d8790fdcc5d1a11d67d3f62f872edb35fa38d158/onnx-1.21.0-cp312-abi3-macosx_12_0_universal2.whl", hash = "sha256:fc2635400fe39ff37ebc4e75342cc54450eadadf39c540ff132c319bf4960095", size = 17965930, upload-time = "2026-03-27T21:32:48.089Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/b6/eeb5903586645ef8a49b4b7892580438741acc3df91d7a5bd0f3a59ea9cb/onnx-1.21.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9003d5206c01fa2ff4b46311566865d8e493e1a6998d4009ec6de39843f1b59b", size = 17531344, upload-time = "2026-03-27T21:32:50.837Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/00/4823f06357892d1e60d6f34e7299d2ba4ed2108c487cc394f7ce85a3ff14/onnx-1.21.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9261bd580fb8548c9c37b3c6750387eb8f21ea43c63880d37b2c622e1684285", size = 17613697, upload-time = "2026-03-27T21:32:54.222Z" },
-    { url = "https://files.pythonhosted.org/packages/23/1d/391f3c567ae068c8ac4f1d1316bae97c9eb45e702f05975fe0e17ad441f0/onnx-1.21.0-cp312-abi3-win32.whl", hash = "sha256:9ea4e824964082811938a9250451d89c4ec474fe42dd36c038bfa5df31993d1e", size = 16287200, upload-time = "2026-03-27T21:32:57.277Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/a6/5eefbe5b40ea96de95a766bd2e0e751f35bdea2d4b951991ec9afaa69531/onnx-1.21.0-cp312-abi3-win_amd64.whl", hash = "sha256:458d91948ad9a7729a347550553b49ab6939f9af2cddf334e2116e45467dc61f", size = 16441045, upload-time = "2026-03-27T21:33:00.081Z" },
-    { url = "https://files.pythonhosted.org/packages/63/c4/0ed8dc037a39113d2a4d66e0005e07751c299c46b993f1ad5c2c35664c20/onnx-1.21.0-cp312-abi3-win_arm64.whl", hash = "sha256:ca14bc4842fccc3187eb538f07eabeb25a779b39388b006db4356c07403a7bbb", size = 16403134, upload-time = "2026-03-27T21:33:03.987Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/89/0e1a9beb536401e2f45ac88735e123f2735e12fc7b56ff6c11727e097526/onnx-1.21.0-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:257d1d1deb6a652913698f1e3f33ef1ca0aa69174892fe38946d4572d89dd94f", size = 17975430, upload-time = "2026-03-27T21:33:07.005Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/46/e6dc71a7b3b317265591b20a5f71d0ff5c0d26c24e52283139dc90c66038/onnx-1.21.0-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7cd7cb8f6459311bdb557cbf6c0ccc6d8ace11c304d1bba0a30b4a4688e245f8", size = 17537435, upload-time = "2026-03-27T21:33:09.765Z" },
-    { url = "https://files.pythonhosted.org/packages/49/2e/27affcac63eaf2ef183a44fd1a1354b11da64a6c72fe6f3fdcf5571bcee5/onnx-1.21.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b58a4cfec8d9311b73dc083e4c1fa362069267881144c05139b3eba5dc3a840", size = 17617687, upload-time = "2026-03-27T21:33:12.619Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/5c/ac8ed15e941593a3672ce424280b764979026317811f2e8508432bfc3429/onnx-1.21.0-cp313-cp313t-win_amd64.whl", hash = "sha256:1a9baf882562c4cebf79589bebb7cd71a20e30b51158cac3e3bbaf27da6163bd", size = 16449402, upload-time = "2026-03-27T21:33:15.555Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/aa/d2231e0dcaad838217afc64c306c8152a080134d2034e247cc973d577674/onnx-1.21.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bba12181566acf49b35875838eba49536a327b2944664b17125577d230c637ad", size = 16408273, upload-time = "2026-03-27T21:33:18.599Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/0a/8905b14694def6ad23edf1011fdd581500384062f8c4c567e114be7aa272/onnx-1.21.0-cp314-cp314t-macosx_12_0_universal2.whl", hash = "sha256:7ee9d8fd6a4874a5fa8b44bbcabea104ce752b20469b88bc50c7dcf9030779ad", size = 17975331, upload-time = "2026-03-27T21:33:21.69Z" },
-    { url = "https://files.pythonhosted.org/packages/61/28/f4e401e5199d1b9c8b76c7e7ae1169e050515258e877b58fa8bb49d3bdcc/onnx-1.21.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5489f25fe461e7f32128218251a466cabbeeaf1eaa791c79daebf1a80d5a2cc9", size = 17537430, upload-time = "2026-03-27T21:33:24.547Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/cf/5d13320eb3660d5af360ea3b43aa9c63a70c92a9b4d1ea0d34501a32fcb8/onnx-1.21.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:db17fc0fec46180b6acbd1d5d8650a04e5527c02b09381da0b5b888d02a204c8", size = 17617662, upload-time = "2026-03-27T21:33:27.418Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/50/3eaa1878338247be021e6423696813d61e77e534dccbd15a703a144e703d/onnx-1.21.0-cp314-cp314t-win_amd64.whl", hash = "sha256:19d9971a3e52a12968ae6c70fd0f86c349536de0b0c33922ecdbe52d1972fe60", size = 16463688, upload-time = "2026-03-27T21:33:30.229Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/48/38d46b43bbb525e0b6a4c2c4204cc6795d67e45687a2f7403e06d8e7053d/onnx-1.21.0-cp314-cp314t-win_arm64.whl", hash = "sha256:efba467efb316baf2a9452d892c2f982b9b758c778d23e38c7f44fa211b30bb9", size = 16423387, upload-time = "2026-03-27T21:33:33.446Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/6a/481561f1093834376ed493e4ca42a73e5be0d50031f2969c86593bdc7c96/onnx-1.22.0-cp312-abi3-macosx_12_0_universal2.whl", hash = "sha256:596fbf0490947533c1c1045ba860851dc9fb77471023dac9a71ba5b42ceab103", size = 20167081, upload-time = "2026-06-15T12:49:32.078Z" },
+    { url = "https://files.pythonhosted.org/packages/84/55/b34fc2aa30aa54b4a775402d24c4082242c720283a274fe976ac8eb94480/onnx-1.22.0-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae5a563f281cd9d2845622cecf6c092a57e4ee1b138f66fdbbdd4200567a5e16", size = 18889249, upload-time = "2026-06-15T12:49:34.7Z" },
+    { url = "https://files.pythonhosted.org/packages/09/a6/bd32357e6cc1ecb473afd78193d7231724f284435d2db25696ecfaaa1503/onnx-1.22.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:955e02e1f6d385b53d52f9cd7b9cdf5caf417c300bcfe3c64c6d542be763845b", size = 19106514, upload-time = "2026-06-15T12:49:37.424Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9d/3af461ac6c714b8b369cb71499659932f4f12cfb066250b62f7567c3d530/onnx-1.22.0-cp312-abi3-pyemscripten_2025_0_wasm32.whl", hash = "sha256:82e9f27fc1223cb06d68a56bed6f9d3caf3d0dad1b61bce45006d529b15bd94c", size = 16966387, upload-time = "2026-06-15T12:49:40.918Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/f0/68195b5e5a53e333faf2660f5352ee43738d0e42fc5216cc6b1871a9fbfb/onnx-1.22.0-cp312-abi3-win32.whl", hash = "sha256:cc8b66b312f8f03a53e268afb67180a2d97dd12cc79e2b61361c6c0073448016", size = 17081568, upload-time = "2026-06-15T12:49:43.398Z" },
+    { url = "https://files.pythonhosted.org/packages/13/a8/734725bb703c5fabb687f79c79e51249475212b3eb37771ac4a4ac9b487f/onnx-1.22.0-cp312-abi3-win_amd64.whl", hash = "sha256:72ccebab3bac07215c204ce8848d42e78eaaa666badbf72d25cd359b9f269e3a", size = 17213290, upload-time = "2026-06-15T12:49:45.933Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/2a/8ce48d8ae26a8761ad4e5dc771961b155c5c3c7c8540ec7f2f2d71b69af0/onnx-1.22.0-cp312-abi3-win_arm64.whl", hash = "sha256:f3c120dcdb70ad738f3c061b32798f408ea299eb69f84dd69ab4a6bf3c2ec01f", size = 17207030, upload-time = "2026-06-15T12:49:48.635Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/13/47323b97846387848efb1044ded11bb94b83526f3d1fbdb37c6480d4520f/onnx-1.22.0-cp314-cp314t-macosx_12_0_universal2.whl", hash = "sha256:19e45e4af88e3fe3261458d4b8cc461957ae2782a358a3560503569bf3b23b72", size = 20176465, upload-time = "2026-06-15T12:49:51.311Z" },
+    { url = "https://files.pythonhosted.org/packages/13/0c/d3b8a7e7eee123938586c608bb9894b5723f2342b9450c0eec59fbec7099/onnx-1.22.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c21a0e59fd967a95b358e4a6e756d1f1eec2d304a83480f329f66e30d2bf0223", size = 18894028, upload-time = "2026-06-15T12:49:54.451Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/8a/da2a97ab46fe6e0cd9beb3ac14603a22f5be492f9ca347faf8233a07bb33/onnx-1.22.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2632406b8f523ef2e2873c363f90b20a3d88c0fbcfac757d3addffccf8f452c2", size = 19110420, upload-time = "2026-06-15T12:49:57.665Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/a3/ce984063017518307ebfaa545782fc400e593dc2d7fdf4f23ce4be1ed197/onnx-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:a3a39fc4643867aecb33417fdddb11e308ee79d2d4a584b9d50cc7aec2091b13", size = 17237547, upload-time = "2026-06-15T12:50:00.382Z" },
+    { url = "https://files.pythonhosted.org/packages/00/50/257a880384a1dd502d543b0067945074d63cd17d0840e958355bc8197da8/onnx-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:8e268cdc0547e3949799ffd4a44451dc2b9080b57d0824a2db680b6ec65506f0", size = 17231391, upload-time = "2026-06-15T12:50:03.047Z" },
 ]

 [[package]]
 name = "onnxruntime"
-version = "1.26.0"
+version = "1.27.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "flatbuffers" },
@@ -4459,25 +4447,25 @@ dependencies = [
    { name = "protobuf" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/b1/d111b1df656761f980d9e298a60039a9cb66036b1d039e777537743d0ac3/onnxruntime-1.26.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05b028781b322ad74b57ce5b50aa5280bb1fe96ceec334628ade681e0b24c1ac", size = 18016624, upload-time = "2026-05-12T00:41:01.735Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/a0/3f9d896a0385a36bd04345d6d0b802821a5782adde562e7e135f6bb71c73/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91f2bb870a4b9224eba0a6728c1fa7a9e552b8e59e1083c51fbbc3d013f2b5c0", size = 16052692, upload-time = "2026-05-08T19:07:13.829Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/43/2a4e04f8dbeffad19bbcced4bcd4289bf478921518437404d6b92bdf213b/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b6dd70599005bd1bf29779f04a91978b92b5e719c11a20068a8f8e535f725b6", size = 18185439, upload-time = "2026-05-08T19:07:36.299Z" },
-    { url = "https://files.pythonhosted.org/packages/44/fc/026d0a7162b9c2153dac292baea9e027c42304dc1d9dc6f8ff5b4cfbaedd/onnxruntime-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:a26374dc7fbcaae593601086b242120e13f2310558df0991da6dd8b8fac00414", size = 13026427, upload-time = "2026-05-08T19:08:03.503Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/27/1dcf88e45e4c69db5f7b106f2dacc3801ba98994e082ca03e1dfdf7bfe57/onnxruntime-1.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:54a8053410fd31fd66469bd754fcfe8a4df9f7eb44756b4b5479bf50c842d948", size = 12796647, upload-time = "2026-05-08T19:07:52.108Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/a2/c801242685e0ce48a4ca51dfafbb588765e0446397e123be53ba5598f3f5/onnxruntime-1.26.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccce19c5f771b8268902f77d9fed9e88f9499465d6780808faa6611a789d33f0", size = 18016563, upload-time = "2026-05-08T19:07:28.081Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/64/0492c0b1db04e29b2630c87cfa36f9d6872b1ca8614b90c5cad58fac7d76/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdbed8cf3b672b66acb032f33a253bc27f42bce6ece48ae3fab4fa483a5e96e0", size = 16052634, upload-time = "2026-05-08T19:07:16.885Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/26/4d09ddc755a84fc8d5e192991626b0e0680e8f6c5d58f4f1d05c42bc48cf/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c07af6fc6d5557835f2b6ee7a96d8b3235d0c57a8e230efdedaee106a8a3cbc6", size = 18185632, upload-time = "2026-05-08T19:07:38.756Z" },
-    { url = "https://files.pythonhosted.org/packages/77/89/3e52249aa08fa301e217ecba07b5246a8338fa2b401e109326e3fc5be0f9/onnxruntime-1.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:61bec80655efa460591c2bc655392d57d2650ce85533a6b9b3b7a790d7ea7916", size = 13026751, upload-time = "2026-05-08T19:08:06.2Z" },
-    { url = "https://files.pythonhosted.org/packages/06/b3/c1c8782b14af6797c303de132d6eef26a9fb80dfacd3750ce57911d11c6b/onnxruntime-1.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:a6677545ff451e3539a02746d2f207d8c5baa4a0a818886bb9d6a6eb9511ee89", size = 12796807, upload-time = "2026-05-08T19:07:54.879Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/f5/47b0676408abec652c14b84d7173e389837832d850c24f87184277313e8d/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e016edc15d3c19f36807e1c6b10be5b27807688c32720f91b5ae480a95215d0", size = 16057265, upload-time = "2026-05-08T19:07:19.603Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/45/33ab6deeef010ca844c877dd618cebc079590bbe52d2a3678e7223b1b908/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5fc48a91a046a6a5c9b147f83fb41d65d24d24923373b222cdd248f0f4f4aac", size = 18197590, upload-time = "2026-05-08T19:07:41.422Z" },
-    { url = "https://files.pythonhosted.org/packages/40/89/17546c1c20f6bfc3ae41c22152378a26edfea918af3129e2139dcd7c99f3/onnxruntime-1.26.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:33a791f31432a3af1a96db5e54818b37aba5e5eefc2e6af5794c10a9118a9993", size = 18019724, upload-time = "2026-05-08T19:07:30.723Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/24/89457a35f6af29538a76647f2c18c3a28277e6c19234c847e7b4b7c19860/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e90c00732c4553618103149d93f688e8c3063017938f8983e21a71d9f3b6d22e", size = 16054821, upload-time = "2026-05-08T19:07:22.348Z" },
-    { url = "https://files.pythonhosted.org/packages/12/f9/15b2e1815cf570d238e0135529f80d2dce64e8e8818a1489cae83823c5c6/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01498e80ba8988428d08c2d51b1338f89e3de2a93e6ffe555f79c68f26a5c06b", size = 18185815, upload-time = "2026-05-08T19:07:44.179Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/65/2e11055faf015e4b07f45b513fa49b391baf2e19d92d77d73ebee13c1004/onnxruntime-1.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:7ead61450d8405167c87dd3a31d8da1d576b490a57dab1aa8b82a7da6825f5aa", size = 13349887, upload-time = "2026-05-08T19:08:08.671Z" },
-    { url = "https://files.pythonhosted.org/packages/19/e4/0f9d1a5718b1781c610c1e354765a3820597081754277a6a9a2b50705702/onnxruntime-1.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:31d71a53490e46910877d0902b5ad99c69a5955e5c7ea6c82863519410e1ba7c", size = 13140121, upload-time = "2026-05-08T19:07:57.804Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/42/3b8e635f067d06d9f45bede470b8d539d101a4166c272213158dfd08b6ce/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b6d258fb78fdfcf049795bcfaa74dcb90ae7baa277afd21e6fd28b83f2c496", size = 16057240, upload-time = "2026-05-08T19:07:25.163Z" },
-    { url = "https://files.pythonhosted.org/packages/93/99/f2be40a31b908d96b861ae0ce98582fa376c18a7f816b9d5eb4cd6aa0a4c/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4eefd386a45202aefb7a5132b94f32df9d506c9edcc7faf2fc60d65183f4b183", size = 18197382, upload-time = "2026-05-08T19:07:46.965Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/b7/dd3a524ed93a820dff1af902d0412957ab12499953333e9daa01af5bc480/onnxruntime-1.27.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a14c2ce45312def86b77aea651f46565e45960cf5f0721bfdff449165086ab76", size = 18433506, upload-time = "2026-06-15T22:43:47.026Z" },
+    { url = "https://files.pythonhosted.org/packages/84/86/c3b6b17745a1997d784dadc9bd88d713d2e6721139a5a0e885b28cfb79b1/onnxruntime-1.27.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fddce0539a4898c7bef35b052ffd37935b2190e35488eab99ce91887743ea1", size = 16438140, upload-time = "2026-06-15T22:42:40.666Z" },
+    { url = "https://files.pythonhosted.org/packages/26/81/24dd9b31b0fb912ee19ca53ac1c9764bfd79d58a2ccef564eb693be831a5/onnxruntime-1.27.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c65a7438632d55dfbc8a02ee60bd6cf7dd9d1ba05a43d4b851452f32338e194", size = 18658316, upload-time = "2026-06-15T22:43:04.012Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/88/8ec9db1a4d126bb8b758992beb40d1249df171917d75f44a327eb5f20dda/onnxruntime-1.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:20c321cf187ba496e648acf6b4cf90b4d398b0d17c2a77fdaeba365b908cc1c1", size = 13358769, upload-time = "2026-06-15T22:43:34.581Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/9f/fdad359dfcba7e7cd8815569b304a596531d4efa77a75d77f8b4981891a2/onnxruntime-1.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:d0d1f68868e2ef30ef70998ba9bbbc5c305e9b17041e3936751c1b8aa6aade06", size = 13104440, upload-time = "2026-06-15T22:43:22.893Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/2b/54208fd03ad410480bc17edf4869376362da8bbf46fe186ddf4cb5cc20fe/onnxruntime-1.27.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:b3e5b58b8c89c2b20e086e890aa9527377e5c240dc3ecc1640d18e07705eeb1c", size = 18432958, upload-time = "2026-06-15T22:42:53.105Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/24fc51fcbb126da6d032372314e47b55c3faad58f2aa78c0e199ccd20b9c/onnxruntime-1.27.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48b3d87eb560ff6a772240506f3c78d6d27c63cafedd5c775672e1194f968cfd", size = 16438180, upload-time = "2026-06-15T22:42:43.093Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/19/14929c3c2fe0b79b41cce24463062bf3afa4cdd3c19dccf00319caa92bff/onnxruntime-1.27.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6872443f236a554921cda6f318c900e2d0c226792cf3534d00e5057c6926e5d2", size = 18658445, upload-time = "2026-06-15T22:43:08.053Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/76/59ed932b0244acd7bbbd6449480053a6d958ea66357f022f932872e19287/onnxruntime-1.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:760021bca514d64a811837820d351a08a41741f16f8b4c26450da708fecf14e6", size = 13357856, upload-time = "2026-06-15T22:43:37.315Z" },
+    { url = "https://files.pythonhosted.org/packages/79/51/d1ec60ec7b1e2ae2d7340ba52b8a13529140039cd4407ba8dddbbc046582/onnxruntime-1.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:2fdfa9df40a0ded0028ce6f9cd863264237f3970559dea2b81456e9ac4622b94", size = 13104412, upload-time = "2026-06-15T22:43:27.457Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/7d/e6bb1c6445c94f708c38cd8fbb7bf0264108c33498b9445c93e60fe6d329/onnxruntime-1.27.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54c0c4e9202c36c4ecdb1f3443f5dfbfd5ee3b54d1362c4b4c6134110e74fb32", size = 16443331, upload-time = "2026-06-15T22:42:45.649Z" },
+    { url = "https://files.pythonhosted.org/packages/72/1b/b18b31e806eabc41077810199fbbb36fbc2d5f19912416e5ccfbf73053d1/onnxruntime-1.27.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b215aa662c8f983f7d6dedafe65a9be72c26e5338e0fe98b3e0422c32c85428", size = 18670967, upload-time = "2026-06-15T22:43:10.621Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/37/48ab79c39b58a7c9f6f5aac1fa0ff2b993eb2643393d6ed9e839ddb6f347/onnxruntime-1.27.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0874edc171f470fc4dd2bbb60bc0989612ed1a8b89b365cda016630a93227f13", size = 18433941, upload-time = "2026-06-15T22:42:58.867Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/24/d535ca8a09dbf697f853377c8dc0820dbcaae5f334316b400b953afbcba8/onnxruntime-1.27.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5b51c014cf1a4fcd93c29a97eac8071fa27710dae05a4d0380bb60a66d60a62c", size = 16439970, upload-time = "2026-06-15T22:42:48.023Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b1/ea9ee80c0bdaa4efb13f29f8c236f3740f6655e8c092a2d119515a5a652c/onnxruntime-1.27.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:445fb702ea5241ba813a3ce2febe2e9408a64f6ad2eb610924322c536165f7cd", size = 18659240, upload-time = "2026-06-15T22:43:13.165Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/f2/1404507d76a21940e8bf46f414e3d1abd94dc888cb89a30f4a540275846f/onnxruntime-1.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:49e416be0d717338b6d041b99911b716d70c397d277056450724f93bdded3fc2", size = 13685306, upload-time = "2026-06-15T22:43:40.416Z" },
+    { url = "https://files.pythonhosted.org/packages/10/e5/ca5cf012ccccb806c70e94aadfebca5606acc62b33eb88cec13352d0778f/onnxruntime-1.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:856032937dd3bc7a7c141909c8d7ae4fde3e3f59bddf061ae627b9a051bda95c", size = 13456280, upload-time = "2026-06-15T22:43:29.693Z" },
+    { url = "https://files.pythonhosted.org/packages/67/7b/dca330a8397e9d816c976d7aed4e24a4a2d279bb1e551e3d0221d1389b1d/onnxruntime-1.27.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6197a02e3f620c4dc13cff51b80672409fc1ffab3aa2593911b19fd322ff48b", size = 16443274, upload-time = "2026-06-15T22:42:50.467Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/f6/2bac21f722aa45d876d4a51f26bd0ef30e704068a3cd5021a5a7cd784271/onnxruntime-1.27.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:370d211e1ceeac4cd5f45301655463ac59e27cdc74d9f7aeb2d19ff4b7a76715", size = 18670781, upload-time = "2026-06-15T22:43:17.151Z" },
 ]

 [[package]]
@@ -6160,15 +6148,15 @@ wheels = [

 [[package]]
 name = "sentry-sdk"
-version = "2.62.0"
+version = "2.63.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "certifi" },
    { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/5d/a343201726150e05f2036eeb6e493e2e2f8bf8a66f5aa70f2f4ac96f9ca3/sentry_sdk-2.62.0.tar.gz", hash = "sha256:3c870b9f50d9fd15b58c817dbde1c7cfaa9fe3f05df0a4c6edd5571cb82f5491", size = 463986, upload-time = "2026-06-08T13:23:49.223Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/c8/b3c970a5b186722d276cd40a05b3254e03bccc0208560aff20f612e018e8/sentry_sdk-2.63.0.tar.gz", hash = "sha256:2a1502bf864769275dbc8c2c9fc7a0f7f5e18358180b615d262d13a31ffba216", size = 912449, upload-time = "2026-06-16T12:45:57.553Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/07/05440381627877aae223fd68f330df9b9fc6641d08bf65328b55235617a2/sentry_sdk-2.62.0-py3-none-any.whl", hash = "sha256:27f61d13a86c3c1648dec666dd5a64f79772dd6a84b446f11866601ecab24f6f", size = 490586, upload-time = "2026-06-08T13:23:47.486Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/57/cb205f7d93373120f666b9c5736dc0815524d96a9b278e7a728f018dc22a/sentry_sdk-2.63.0-py3-none-any.whl", hash = "sha256:3a9b5ddd403f79eb73bd670f75f04485819db53d28f76ced7bc09041cb0dfd6a", size = 495950, upload-time = "2026-06-16T12:45:55.819Z" },
 ]

 [[package]]
@@ -6496,30 +6484,18 @@ resolution-markers = [
    "python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'darwin'",
-    "(python_full_version >= '3.15' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version < '3.13' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version < '3.13' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'win32'",
+    "(python_full_version >= '3.15' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version >= '3.15' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version >= '3.15' and sys_platform == 'emscripten'",
+    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version < '3.13' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version == '3.14.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
+    "python_full_version < '3.13' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.15' and sys_platform == 'win32'",
+    "python_full_version == '3.14.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
 ]
 dependencies = [
    { name = "filelock", marker = "sys_platform != 'linux'" },
@@ -6556,14 +6532,10 @@ resolution-markers = [
    "(python_full_version == '3.14.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version == '3.13.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'linux')",
-    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'linux'",
+    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
 ]
 dependencies = [
    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
@@ -6636,30 +6608,18 @@ resolution-markers = [
    "python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
    "python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'darwin'",
-    "(python_full_version >= '3.15' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version < '3.13' and platform_machine != 'arm64' and platform_machine != 's390x' and sys_platform == 'darwin') or (python_full_version < '3.13' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'emscripten'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'emscripten'",
-    "python_full_version >= '3.15' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 's390x' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'win32'",
+    "(python_full_version >= '3.15' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version >= '3.15' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version >= '3.15' and sys_platform == 'emscripten'",
+    "(python_full_version == '3.14.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.14.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version == '3.13.*' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "(python_full_version < '3.13' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')",
+    "python_full_version == '3.14.*' and sys_platform == 'emscripten'",
+    "python_full_version == '3.13.*' and sys_platform == 'emscripten'",
+    "python_full_version < '3.13' and sys_platform == 'emscripten'",
+    "python_full_version >= '3.15' and sys_platform == 'win32'",
+    "python_full_version == '3.14.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
 ]
 dependencies = [
    { name = "numpy", marker = "sys_platform != 'linux'" },
@@ -6692,14 +6652,10 @@ resolution-markers = [
    "(python_full_version == '3.14.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.14.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version == '3.13.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'linux')",
    "(python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'linux')",
-    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version >= '3.15' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version == '3.14.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version == '3.13.*' and platform_machine == 's390x' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 's390x' and sys_platform == 'linux'",
+    "python_full_version >= '3.15' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.14.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version == '3.13.*' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and sys_platform == 'linux'",
 ]
 dependencies = [
    { name = "numpy", marker = "sys_platform == 'linux'" },
@@ -6949,7 +6905,7 @@ wheels = [

 [[package]]
 name = "virtualenv"
-version = "21.5.0"
+version = "21.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "distlib" },
@@ -6957,9 +6913,9 @@ dependencies = [
    { name = "platformdirs" },
    { name = "python-discovery" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cd/0e/933bacb37b57ae7928b0030eef205a3dbb3e37afdbdde5be2e113318958f/virtualenv-21.5.0.tar.gz", hash = "sha256:98847aadf5e2037e0e4d2e19528eb3aca6f23906422e59a510bff231a6d32fce", size = 4577424, upload-time = "2026-06-13T20:36:45.066Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/a5/81f987504738e6defeed61ec1c47e2aefab3c35d8eeb87e1b3f38cf28254/virtualenv-21.5.1.tar.gz", hash = "sha256:dca3bf98275a59c652b69d68e73433e597d977c2da9198882479d1a7188009c8", size = 4578798, upload-time = "2026-06-16T16:23:58.603Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/87/b0667ede418386ab631e48924b845d326f366d61e6bd08fe68a748fae4d4/virtualenv-21.5.0-py3-none-any.whl", hash = "sha256:8f7c38605023688c89789f566959006af6d61c99eeeb9e58342eb780c5761e5e", size = 4557937, upload-time = "2026-06-13T20:36:42.967Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/02/3623e6169bed617ed1e2d372f7c69f92ec28d54c4dfc997055c8578ec148/virtualenv-21.5.1-py3-none-any.whl", hash = "sha256:55aa670b67bbfb991b03fda39bd3276d92c419d702376e98c5df1c9989a26783", size = 4558820, upload-time = "2026-06-16T16:23:56.963Z" },
 ]

 [[package]]