Compare commits

...

1 Commits

Author SHA1 Message Date
CarolinePascal dbb32ead5f fix(video becnhmark)
* fixing typos on PyAV decoders names
* adding torchcodec among video backends
* updating images datasets to v3.0
2025-09-12 17:03:36 +02:00
2 changed files with 12 additions and 23 deletions
+9 -21
View File
@@ -37,14 +37,14 @@ from tqdm import tqdm
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.video_utils import (
decode_video_frames_torchvision,
decode_video_frames,
encode_video_frames,
)
from lerobot.utils.benchmark import TimeBenchmark
BASE_ENCODING = OrderedDict(
[
("vcodec", "libx264"),
("vcodec", "h264"),
("pix_fmt", "yuv444p"),
("g", 2),
("crf", None),
@@ -147,18 +147,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
return [idx / fps for idx in frame_indexes]
def decode_video_frames(
video_path: str,
timestamps: list[float],
tolerance_s: float,
backend: str,
) -> torch.Tensor:
if backend in ["pyav", "video_reader"]:
return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
else:
raise NotImplementedError(backend)
def benchmark_decoding(
imgs_dir: Path,
video_path: Path,
@@ -406,9 +394,9 @@ if __name__ == "__main__":
nargs="*",
default=[
"lerobot/pusht_image",
"aliberts/aloha_mobile_shrimp_image",
"aliberts/paris_street",
"aliberts/kitchen",
"CarolinePascal/aloha_mobile_shrimp_image",
"CarolinePascal/paris_street",
"CarolinePascal/kitchen",
],
help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
)
@@ -416,7 +404,7 @@ if __name__ == "__main__":
"--vcodec",
type=str,
nargs="*",
default=["libx264", "hevc", "libsvtav1"],
default=["h264", "hevc", "libsvtav1"],
help="Video codecs to be tested",
)
parser.add_argument(
@@ -446,7 +434,7 @@ if __name__ == "__main__":
# nargs="*",
# default=[0, 1],
# help="Use the fastdecode tuning option. 0 disables it. "
# "For libx264 and libx265/hevc, only 1 is possible. "
# "For h264 and h265/hevc, only 1 is possible. "
# "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
# )
parser.add_argument(
@@ -465,8 +453,8 @@ if __name__ == "__main__":
"--backends",
type=str,
nargs="*",
default=["pyav", "video_reader"],
help="Torchvision decoding backend to be tested.",
default=["torchcodec", "pyav", "video_reader"],
help="Video decoding backend to be tested.",
)
parser.add_argument(
"--num-samples",
+3 -2
View File
@@ -440,8 +440,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
video files are already present on local disk, they won't be downloaded again. Defaults to
True.
video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'.
You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to 'torchcodec'
when available on the platform; otherwise, defaults to torchvision's default backend : 'pyav'.
You can also use 'video_reader' which is another decoder of torchvision.
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
"""