fix(video becnhmark)

* fixing typos on PyAV decoders names * adding torchcodec among video backends * updating images datasets to v3.0
2026-07-15 05:51:52 +00:00 · 2025-09-12 17:03:36 +02:00
parent d602e8169c
commit dbb32ead5f
2 changed files with 12 additions and 23 deletions
@@ -37,14 +37,14 @@ from tqdm import tqdm

 from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.datasets.video_utils import (
-    decode_video_frames_torchvision,
+    decode_video_frames,
    encode_video_frames,
 )
 from lerobot.utils.benchmark import TimeBenchmark

 BASE_ENCODING = OrderedDict(
    [
-        ("vcodec", "libx264"),
+        ("vcodec", "h264"),
        ("pix_fmt", "yuv444p"),
        ("g", 2),
        ("crf", None),
@@ -147,18 +147,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
    return [idx / fps for idx in frame_indexes]


-def decode_video_frames(
-    video_path: str,
-    timestamps: list[float],
-    tolerance_s: float,
-    backend: str,
-) -> torch.Tensor:
-    if backend in ["pyav", "video_reader"]:
-        return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
-    else:
-        raise NotImplementedError(backend)
-
-
 def benchmark_decoding(
    imgs_dir: Path,
    video_path: Path,
@@ -406,9 +394,9 @@ if __name__ == "__main__":
        nargs="*",
        default=[
            "lerobot/pusht_image",
-            "aliberts/aloha_mobile_shrimp_image",
-            "aliberts/paris_street",
-            "aliberts/kitchen",
+            "CarolinePascal/aloha_mobile_shrimp_image",
+            "CarolinePascal/paris_street",
+            "CarolinePascal/kitchen",
        ],
        help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
    )
@@ -416,7 +404,7 @@ if __name__ == "__main__":
        "--vcodec",
        type=str,
        nargs="*",
-        default=["libx264", "hevc", "libsvtav1"],
+        default=["h264", "hevc", "libsvtav1"],
        help="Video codecs to be tested",
    )
    parser.add_argument(
@@ -446,7 +434,7 @@ if __name__ == "__main__":
    #     nargs="*",
    #     default=[0, 1],
    #     help="Use the fastdecode tuning option. 0 disables it. "
-    #         "For libx264 and libx265/hevc, only 1 is possible. "
+    #         "For h264 and h265/hevc, only 1 is possible. "
    #         "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
    # )
    parser.add_argument(
@@ -465,8 +453,8 @@ if __name__ == "__main__":
        "--backends",
        type=str,
        nargs="*",
-        default=["pyav", "video_reader"],
-        help="Torchvision decoding backend to be tested.",
+        default=["torchcodec", "pyav", "video_reader"],
+        help="Video decoding backend to be tested.",
    )
    parser.add_argument(
        "--num-samples",