mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-20 19:19:56 +00:00
fix(video becnhmark)
* fixing typos on PyAV decoders names * adding torchcodec among video backends * updating images datasets to v3.0
This commit is contained in:
@@ -37,14 +37,14 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||||
from lerobot.datasets.video_utils import (
|
from lerobot.datasets.video_utils import (
|
||||||
decode_video_frames_torchvision,
|
decode_video_frames,
|
||||||
encode_video_frames,
|
encode_video_frames,
|
||||||
)
|
)
|
||||||
from lerobot.utils.benchmark import TimeBenchmark
|
from lerobot.utils.benchmark import TimeBenchmark
|
||||||
|
|
||||||
BASE_ENCODING = OrderedDict(
|
BASE_ENCODING = OrderedDict(
|
||||||
[
|
[
|
||||||
("vcodec", "libx264"),
|
("vcodec", "h264"),
|
||||||
("pix_fmt", "yuv444p"),
|
("pix_fmt", "yuv444p"),
|
||||||
("g", 2),
|
("g", 2),
|
||||||
("crf", None),
|
("crf", None),
|
||||||
@@ -147,18 +147,6 @@ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> lis
|
|||||||
return [idx / fps for idx in frame_indexes]
|
return [idx / fps for idx in frame_indexes]
|
||||||
|
|
||||||
|
|
||||||
def decode_video_frames(
|
|
||||||
video_path: str,
|
|
||||||
timestamps: list[float],
|
|
||||||
tolerance_s: float,
|
|
||||||
backend: str,
|
|
||||||
) -> torch.Tensor:
|
|
||||||
if backend in ["pyav", "video_reader"]:
|
|
||||||
return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(backend)
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_decoding(
|
def benchmark_decoding(
|
||||||
imgs_dir: Path,
|
imgs_dir: Path,
|
||||||
video_path: Path,
|
video_path: Path,
|
||||||
@@ -406,9 +394,9 @@ if __name__ == "__main__":
|
|||||||
nargs="*",
|
nargs="*",
|
||||||
default=[
|
default=[
|
||||||
"lerobot/pusht_image",
|
"lerobot/pusht_image",
|
||||||
"aliberts/aloha_mobile_shrimp_image",
|
"CarolinePascal/aloha_mobile_shrimp_image",
|
||||||
"aliberts/paris_street",
|
"CarolinePascal/paris_street",
|
||||||
"aliberts/kitchen",
|
"CarolinePascal/kitchen",
|
||||||
],
|
],
|
||||||
help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
|
help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
|
||||||
)
|
)
|
||||||
@@ -416,7 +404,7 @@ if __name__ == "__main__":
|
|||||||
"--vcodec",
|
"--vcodec",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="*",
|
nargs="*",
|
||||||
default=["libx264", "hevc", "libsvtav1"],
|
default=["h264", "hevc", "libsvtav1"],
|
||||||
help="Video codecs to be tested",
|
help="Video codecs to be tested",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -446,7 +434,7 @@ if __name__ == "__main__":
|
|||||||
# nargs="*",
|
# nargs="*",
|
||||||
# default=[0, 1],
|
# default=[0, 1],
|
||||||
# help="Use the fastdecode tuning option. 0 disables it. "
|
# help="Use the fastdecode tuning option. 0 disables it. "
|
||||||
# "For libx264 and libx265/hevc, only 1 is possible. "
|
# "For h264 and h265/hevc, only 1 is possible. "
|
||||||
# "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
|
# "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
|
||||||
# )
|
# )
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -465,8 +453,8 @@ if __name__ == "__main__":
|
|||||||
"--backends",
|
"--backends",
|
||||||
type=str,
|
type=str,
|
||||||
nargs="*",
|
nargs="*",
|
||||||
default=["pyav", "video_reader"],
|
default=["torchcodec", "pyav", "video_reader"],
|
||||||
help="Torchvision decoding backend to be tested.",
|
help="Video decoding backend to be tested.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num-samples",
|
"--num-samples",
|
||||||
|
|||||||
@@ -440,8 +440,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
|
|||||||
download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
|
download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
|
||||||
video files are already present on local disk, they won't be downloaded again. Defaults to
|
video files are already present on local disk, they won't be downloaded again. Defaults to
|
||||||
True.
|
True.
|
||||||
video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'.
|
video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to 'torchcodec'
|
||||||
You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
|
when available on the platform; otherwise, defaults to torchvision's default backend : 'pyav'.
|
||||||
|
You can also use 'video_reader' which is another decoder of torchvision.
|
||||||
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos.
|
||||||
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1.
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user