fix(video becnhmark)

* fixing typos on PyAV decoders names
* adding torchcodec among video backends
* updating images datasets to v3.0
This commit is contained in:
CarolinePascal
2025-09-12 17:03:36 +02:00
parent d602e8169c
commit dbb32ead5f
2 changed files with 12 additions and 23 deletions
+9 -21
View File
@@ -37,14 +37,14 @@ from tqdm import tqdm
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.video_utils import (
decode_video_frames_torchvision,
decode_video_frames,
encode_video_frames,
)
from lerobot.utils.benchmark import TimeBenchmark
BASE_ENCODING = OrderedDict(
[
("vcodec", "libx264"),
("vcodec", "h264"),
("pix_fmt", "yuv444p"),
("g", 2),
("crf", None),
@@ -147,18 +147,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
return [idx / fps for idx in frame_indexes]
def decode_video_frames(
video_path: str,
timestamps: list[float],
tolerance_s: float,
backend: str,
) -> torch.Tensor:
if backend in ["pyav", "video_reader"]:
return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
else:
raise NotImplementedError(backend)
def benchmark_decoding(
imgs_dir: Path,
video_path: Path,
@@ -406,9 +394,9 @@ if __name__ == "__main__":
nargs="*",
default=[
"lerobot/pusht_image",
"aliberts/aloha_mobile_shrimp_image",
"aliberts/paris_street",
"aliberts/kitchen",
"CarolinePascal/aloha_mobile_shrimp_image",
"CarolinePascal/paris_street",
"CarolinePascal/kitchen",
],
help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
)
@@ -416,7 +404,7 @@ if __name__ == "__main__":
"--vcodec",
type=str,
nargs="*",
default=["libx264", "hevc", "libsvtav1"],
default=["h264", "hevc", "libsvtav1"],
help="Video codecs to be tested",
)
parser.add_argument(
@@ -446,7 +434,7 @@ if __name__ == "__main__":
# nargs="*",
# default=[0, 1],
# help="Use the fastdecode tuning option. 0 disables it. "
# "For libx264 and libx265/hevc, only 1 is possible. "
# "For h264 and h265/hevc, only 1 is possible. "
# "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
# )
parser.add_argument(
@@ -465,8 +453,8 @@ if __name__ == "__main__":
"--backends",
type=str,
nargs="*",
default=["pyav", "video_reader"],
help="Torchvision decoding backend to be tested.",
default=["torchcodec", "pyav", "video_reader"],
help="Video decoding backend to be tested.",
)
parser.add_argument(
"--num-samples",