fix(video becnhmark)

* fixing typos on PyAV decoders names
* adding torchcodec among video backends
* updating images datasets to v3.0
This commit is contained in:
CarolinePascal
2025-09-12 17:03:36 +02:00
parent d602e8169c
commit dbb32ead5f
2 changed files with 12 additions and 23 deletions
+9 -21
View File
@@ -37,14 +37,14 @@ from tqdm import tqdm
from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.video_utils import ( from lerobot.datasets.video_utils import (
decode_video_frames_torchvision, decode_video_frames,
encode_video_frames, encode_video_frames,
) )
from lerobot.utils.benchmark import TimeBenchmark from lerobot.utils.benchmark import TimeBenchmark
BASE_ENCODING = OrderedDict( BASE_ENCODING = OrderedDict(
[ [
("vcodec", "libx264"), ("vcodec", "h264"),
("pix_fmt", "yuv444p"), ("pix_fmt", "yuv444p"),
("g", 2), ("g", 2),
("crf", None), ("crf", None),
@@ -147,18 +147,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
return [idx / fps for idx in frame_indexes] return [idx / fps for idx in frame_indexes]
def decode_video_frames(
video_path: str,
timestamps: list[float],
tolerance_s: float,
backend: str,
) -> torch.Tensor:
if backend in ["pyav", "video_reader"]:
return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
else:
raise NotImplementedError(backend)
def benchmark_decoding( def benchmark_decoding(
imgs_dir: Path, imgs_dir: Path,
video_path: Path, video_path: Path,
@@ -406,9 +394,9 @@ if __name__ == "__main__":
nargs="*", nargs="*",
default=[ default=[
"lerobot/pusht_image", "lerobot/pusht_image",
"aliberts/aloha_mobile_shrimp_image", "CarolinePascal/aloha_mobile_shrimp_image",
"aliberts/paris_street", "CarolinePascal/paris_street",
"aliberts/kitchen", "CarolinePascal/kitchen",
], ],
help="Datasets repo-ids to test against. First episodes only are used. Must be images.", help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
) )
@@ -416,7 +404,7 @@ if __name__ == "__main__":
"--vcodec", "--vcodec",
type=str, type=str,
nargs="*", nargs="*",
default=["libx264", "hevc", "libsvtav1"], default=["h264", "hevc", "libsvtav1"],
help="Video codecs to be tested", help="Video codecs to be tested",
) )
parser.add_argument( parser.add_argument(
@@ -446,7 +434,7 @@ if __name__ == "__main__":
# nargs="*", # nargs="*",
# default=[0, 1], # default=[0, 1],
# help="Use the fastdecode tuning option. 0 disables it. " # help="Use the fastdecode tuning option. 0 disables it. "
# "For libx264 and libx265/hevc, only 1 is possible. " # "For h264 and h265/hevc, only 1 is possible. "
# "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization", # "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
# ) # )
parser.add_argument( parser.add_argument(
@@ -465,8 +453,8 @@ if __name__ == "__main__":
"--backends", "--backends",
type=str, type=str,
nargs="*", nargs="*",
default=["pyav", "video_reader"], default=["torchcodec", "pyav", "video_reader"],
help="Torchvision decoding backend to be tested.", help="Video decoding backend to be tested.",
) )
parser.add_argument( parser.add_argument(
"--num-samples", "--num-samples",
+3 -2
View File
@@ -440,8 +440,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
download_videos (bool, optional): Flag to download the videos. Note that when set to True but the download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
video files are already present on local disk, they won't be downloaded again. Defaults to video files are already present on local disk, they won't be downloaded again. Defaults to
True. True.
video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'. video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to 'torchcodec'
You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision. when available on the platform; otherwise, defaults to torchvision's default backend : 'pyav'.
You can also use 'video_reader' which is another decoder of torchvision.
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
""" """