test(new): adding new tests for encoding related features

This commit is contained in:
CarolinePascal
2026-04-24 17:15:41 +02:00
parent 57a619ab02
commit a2376389f9
7 changed files with 641 additions and 141 deletions
+88 -141
View File
@@ -14,11 +14,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for streaming video encoding and hardware-accelerated encoding.""" """Tests for streaming video encoding."""
import queue import queue
import threading import threading
from unittest.mock import patch
import numpy as np import numpy as np
import pytest import pytest
@@ -27,112 +26,20 @@ pytest.importorskip("av", reason="av is required (install lerobot[dataset])")
import av # noqa: E402 import av # noqa: E402
from lerobot.datasets.pyav_utils import get_codec
from lerobot.datasets.video_utils import ( from lerobot.datasets.video_utils import (
VALID_VIDEO_CODECS,
StreamingVideoEncoder, StreamingVideoEncoder,
VideoEncoderConfig,
_CameraEncoderThread, _CameraEncoderThread,
_get_codec_options,
detect_available_hw_encoders,
resolve_vcodec,
) )
from lerobot.utils.constants import OBS_IMAGES from lerobot.utils.constants import OBS_IMAGES
# ─── _get_codec_options tests ─── # Cross-codec validation tests only fire when the target codec is present
# in the local FFmpeg build; on other platforms validate() is a no-op.
_has_videotoolbox = get_codec("h264_videotoolbox") is not None
class TestGetCodecOptions: _videotoolbox_only = pytest.mark.skipif(
def test_libsvtav1_defaults(self): not _has_videotoolbox, reason="h264_videotoolbox not in local FFmpeg build"
opts = _get_codec_options("libsvtav1") )
assert opts["g"] == "2"
assert opts["crf"] == "30"
assert opts["preset"] == "12"
def test_libsvtav1_custom_preset(self):
opts = _get_codec_options("libsvtav1", preset=8)
assert opts["preset"] == "8"
def test_h264_options(self):
opts = _get_codec_options("h264", g=10, crf=23)
assert opts["g"] == "10"
assert opts["crf"] == "23"
assert "preset" not in opts
def test_videotoolbox_options(self):
opts = _get_codec_options("h264_videotoolbox", g=2, crf=30)
assert opts["g"] == "2"
# CRF 30 maps to quality = max(1, min(100, 100 - 30*2)) = 40
assert opts["q:v"] == "40"
assert "crf" not in opts
def test_nvenc_options(self):
opts = _get_codec_options("h264_nvenc", g=2, crf=25)
assert opts["rc"] == "constqp"
assert opts["qp"] == "25"
assert "crf" not in opts
# NVENC doesn't support g
assert "g" not in opts
def test_vaapi_options(self):
opts = _get_codec_options("h264_vaapi", crf=28)
assert opts["qp"] == "28"
def test_qsv_options(self):
opts = _get_codec_options("h264_qsv", crf=25)
assert opts["global_quality"] == "25"
def test_no_g_no_crf(self):
opts = _get_codec_options("h264", g=None, crf=None)
assert "g" not in opts
assert "crf" not in opts
# ─── HW encoder detection tests ───
class TestHWEncoderDetection:
def test_detect_available_hw_encoders_returns_list(self):
result = detect_available_hw_encoders()
assert isinstance(result, list)
def test_detect_available_hw_encoders_only_valid(self):
from lerobot.datasets.video_utils import HW_ENCODERS
result = detect_available_hw_encoders()
for encoder in result:
assert encoder in HW_ENCODERS
def test_resolve_vcodec_passthrough(self):
assert resolve_vcodec("libsvtav1") == "libsvtav1"
assert resolve_vcodec("h264") == "h264"
def test_resolve_vcodec_auto_fallback(self):
"""When no HW encoders are available, auto should fall back to libsvtav1."""
with patch("lerobot.datasets.video_utils.detect_available_hw_encoders", return_value=[]):
assert resolve_vcodec("auto") == "libsvtav1"
def test_resolve_vcodec_auto_picks_hw(self):
"""When a HW encoder is available, auto should pick it."""
with patch(
"lerobot.datasets.video_utils.detect_available_hw_encoders",
return_value=["h264_videotoolbox"],
):
assert resolve_vcodec("auto") == "h264_videotoolbox"
def test_resolve_vcodec_auto_returns_valid(self):
"""Test that resolve_vcodec('auto') returns a known valid codec."""
result = resolve_vcodec("auto")
assert result in VALID_VIDEO_CODECS
def test_hw_encoder_names_accepted_in_validation(self):
"""Test that HW encoder names pass validation in VALID_VIDEO_CODECS."""
assert "auto" in VALID_VIDEO_CODECS
assert "h264_videotoolbox" in VALID_VIDEO_CODECS
assert "h264_nvenc" in VALID_VIDEO_CODECS
def test_resolve_vcodec_invalid_raises(self):
"""Test that resolve_vcodec raises ValueError for invalid codecs."""
with pytest.raises(ValueError, match="Invalid vcodec"):
resolve_vcodec("not_a_real_codec")
# ─── _CameraEncoderThread tests ─── # ─── _CameraEncoderThread tests ───
@@ -150,14 +57,13 @@ class TestCameraEncoderThread:
result_queue: queue.Queue = queue.Queue(maxsize=1) result_queue: queue.Queue = queue.Queue(maxsize=1)
stop_event = threading.Event() stop_event = threading.Event()
enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13)
encoder_thread = _CameraEncoderThread( encoder_thread = _CameraEncoderThread(
video_path=video_path, video_path=video_path,
fps=fps, fps=fps,
vcodec="libsvtav1", vcodec=enc_cfg.vcodec,
pix_fmt="yuv420p", pix_fmt=enc_cfg.pix_fmt,
g=2, codec_options=enc_cfg.get_codec_options(),
crf=30,
preset=13,
frame_queue=frame_queue, frame_queue=frame_queue,
result_queue=result_queue, result_queue=result_queue,
stop_event=stop_event, stop_event=stop_event,
@@ -202,14 +108,13 @@ class TestCameraEncoderThread:
result_queue: queue.Queue = queue.Queue(maxsize=1) result_queue: queue.Queue = queue.Queue(maxsize=1)
stop_event = threading.Event() stop_event = threading.Event()
enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13)
encoder_thread = _CameraEncoderThread( encoder_thread = _CameraEncoderThread(
video_path=video_path, video_path=video_path,
fps=fps, fps=fps,
vcodec="libsvtav1", vcodec=enc_cfg.vcodec,
pix_fmt="yuv420p", pix_fmt=enc_cfg.pix_fmt,
g=2, codec_options=enc_cfg.get_codec_options(),
crf=30,
preset=13,
frame_queue=frame_queue, frame_queue=frame_queue,
result_queue=result_queue, result_queue=result_queue,
stop_event=stop_event, stop_event=stop_event,
@@ -237,14 +142,13 @@ class TestCameraEncoderThread:
result_queue: queue.Queue = queue.Queue(maxsize=1) result_queue: queue.Queue = queue.Queue(maxsize=1)
stop_event = threading.Event() stop_event = threading.Event()
enc_cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13)
encoder_thread = _CameraEncoderThread( encoder_thread = _CameraEncoderThread(
video_path=video_path, video_path=video_path,
fps=fps, fps=fps,
vcodec="libsvtav1", vcodec=enc_cfg.vcodec,
pix_fmt="yuv420p", pix_fmt=enc_cfg.pix_fmt,
g=2, codec_options=enc_cfg.get_codec_options(),
crf=30,
preset=13,
frame_queue=frame_queue, frame_queue=frame_queue,
result_queue=result_queue, result_queue=result_queue,
stop_event=stop_event, stop_event=stop_event,
@@ -266,11 +170,20 @@ class TestCameraEncoderThread:
class TestStreamingVideoEncoder: class TestStreamingVideoEncoder:
def _make_encoder_config(self, **kwargs):
"""Helper to build a VideoEncoderConfig."""
return VideoEncoderConfig(**kwargs)
def test_single_camera_episode(self, tmp_path): def test_single_camera_episode(self, tmp_path):
"""Test encoding a single camera episode.""" """Test encoding a single camera episode."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13)
video_keys = [f"{OBS_IMAGES}.laptop"] video_keys = [f"{OBS_IMAGES}.laptop"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13
),
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
num_frames = 20 num_frames = 20
@@ -295,9 +208,13 @@ class TestStreamingVideoEncoder:
def test_multi_camera_episode(self, tmp_path): def test_multi_camera_episode(self, tmp_path):
"""Test encoding multiple cameras simultaneously.""" """Test encoding multiple cameras simultaneously."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30)
video_keys = [f"{OBS_IMAGES}.laptop", f"{OBS_IMAGES}.phone"] video_keys = [f"{OBS_IMAGES}.laptop", f"{OBS_IMAGES}.phone"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30
),
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
num_frames = 15 num_frames = 15
@@ -319,8 +236,13 @@ class TestStreamingVideoEncoder:
def test_sequential_episodes(self, tmp_path): def test_sequential_episodes(self, tmp_path):
"""Test that multiple sequential episodes work correctly.""" """Test that multiple sequential episodes work correctly."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30)
video_keys = [f"{OBS_IMAGES}.cam"] video_keys = [f"{OBS_IMAGES}.cam"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30
),
)
for ep in range(3): for ep in range(3):
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
@@ -342,8 +264,13 @@ class TestStreamingVideoEncoder:
def test_cancel_episode(self, tmp_path): def test_cancel_episode(self, tmp_path):
"""Test that canceling an episode cleans up properly.""" """Test that canceling an episode cleans up properly."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30)
video_keys = [f"{OBS_IMAGES}.cam"] video_keys = [f"{OBS_IMAGES}.cam"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30
),
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
@@ -365,28 +292,33 @@ class TestStreamingVideoEncoder:
def test_feed_without_start_raises(self, tmp_path): def test_feed_without_start_raises(self, tmp_path):
"""Test that feeding frames without starting an episode raises.""" """Test that feeding frames without starting an episode raises."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") encoder = StreamingVideoEncoder(fps=30)
with pytest.raises(RuntimeError, match="No active episode"): with pytest.raises(RuntimeError, match="No active episode"):
encoder.feed_frame("cam", np.zeros((64, 96, 3), dtype=np.uint8)) encoder.feed_frame("cam", np.zeros((64, 96, 3), dtype=np.uint8))
encoder.close() encoder.close()
def test_finish_without_start_raises(self, tmp_path): def test_finish_without_start_raises(self, tmp_path):
"""Test that finishing without starting raises.""" """Test that finishing without starting raises."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") encoder = StreamingVideoEncoder(fps=30)
with pytest.raises(RuntimeError, match="No active episode"): with pytest.raises(RuntimeError, match="No active episode"):
encoder.finish_episode() encoder.finish_episode()
encoder.close() encoder.close()
def test_close_is_idempotent(self, tmp_path): def test_close_is_idempotent(self, tmp_path):
"""Test that close() can be called multiple times safely.""" """Test that close() can be called multiple times safely."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") encoder = StreamingVideoEncoder(fps=30)
encoder.close() encoder.close()
encoder.close() # Should not raise encoder.close() # Should not raise
def test_video_duration_matches_frame_count(self, tmp_path): def test_video_duration_matches_frame_count(self, tmp_path):
"""Test that encoded video duration matches num_frames / fps.""" """Test that encoded video duration matches num_frames / fps."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13)
video_keys = [f"{OBS_IMAGES}.cam"] video_keys = [f"{OBS_IMAGES}.cam"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13
),
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
num_frames = 90 # 3 seconds at 30fps num_frames = 90 # 3 seconds at 30fps
@@ -417,9 +349,13 @@ class TestStreamingVideoEncoder:
def test_multi_camera_start_episode_called_once(self, tmp_path): def test_multi_camera_start_episode_called_once(self, tmp_path):
"""Test that with multiple cameras, no frames are lost due to double start_episode.""" """Test that with multiple cameras, no frames are lost due to double start_episode."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30)
video_keys = [f"{OBS_IMAGES}.cam1", f"{OBS_IMAGES}.cam2"] video_keys = [f"{OBS_IMAGES}.cam1", f"{OBS_IMAGES}.cam2"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30
),
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
num_frames = 30 num_frames = 30
@@ -446,17 +382,24 @@ class TestStreamingVideoEncoder:
def test_encoder_threads_passed_to_thread(self, tmp_path): def test_encoder_threads_passed_to_thread(self, tmp_path):
"""Test that encoder_threads is stored and passed through to encoder threads.""" """Test that encoder_threads is stored and passed through to encoder threads."""
encoder = StreamingVideoEncoder(
fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, encoder_threads=2
)
assert encoder.encoder_threads == 2
video_keys = [f"{OBS_IMAGES}.cam"] video_keys = [f"{OBS_IMAGES}.cam"]
cfg = VideoEncoderConfig(
vcodec="libsvtav1",
pix_fmt="yuv420p",
g=2,
crf=30,
)
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=cfg,
encoder_threads=2,
)
assert encoder._encoder_threads == 2
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
# Verify the thread received the encoder_threads value # Verify codec options include thread tuning for libsvtav1 (lp=…)
thread = encoder._threads[f"{OBS_IMAGES}.cam"] thread = encoder._threads[f"{OBS_IMAGES}.cam"]
assert thread.encoder_threads == 2 assert "svtav1-params" in thread.codec_options or "threads" in thread.codec_options
# Feed some frames and finish to ensure it works end-to-end # Feed some frames and finish to ensure it works end-to-end
num_frames = 10 num_frames = 10
@@ -478,16 +421,20 @@ class TestStreamingVideoEncoder:
def test_encoder_threads_none_by_default(self, tmp_path): def test_encoder_threads_none_by_default(self, tmp_path):
"""Test that encoder_threads defaults to None (codec auto-detect).""" """Test that encoder_threads defaults to None (codec auto-detect)."""
encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") encoder = StreamingVideoEncoder(fps=30)
assert encoder.encoder_threads is None assert encoder._encoder_threads is None
encoder.close() encoder.close()
def test_graceful_frame_dropping(self, tmp_path): def test_graceful_frame_dropping(self, tmp_path):
"""Test that full queue drops frames instead of crashing.""" """Test that full queue drops frames instead of crashing."""
encoder = StreamingVideoEncoder(
fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, queue_maxsize=1
)
video_keys = [f"{OBS_IMAGES}.cam"] video_keys = [f"{OBS_IMAGES}.cam"]
encoder = StreamingVideoEncoder(
fps=30,
camera_encoder_config=self._make_encoder_config(
vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13
),
queue_maxsize=1,
)
encoder.start_episode(video_keys, tmp_path) encoder.start_episode(video_keys, tmp_path)
# Feed many frames quickly - with queue_maxsize=1, some will be dropped # Feed many frames quickly - with queue_maxsize=1, some will be dropped
+538
View File
@@ -0,0 +1,538 @@
#!/usr/bin/env python
# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for ``lerobot.datasets.video_utils`` encoding functions and ``VideoEncoderConfig`` config class."""
import json
from pathlib import Path
import numpy as np
import pytest
pytest.importorskip("av", reason="av is required (install lerobot[dataset])")
import av # noqa: E402
from lerobot.datasets.image_writer import write_image
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.datasets.pyav_utils import detect_available_encoders_pyav, get_codec
from lerobot.datasets.utils import INFO_PATH
from lerobot.datasets.video_utils import (
VALID_VIDEO_CODECS,
VideoEncoderConfig,
concatenate_video_files,
encode_video_frames,
get_video_info,
)
# Per-codec skip markers — validation tests only fire when the codec is available
def _require_encoder(vcodec: str) -> pytest.MarkDecorator:
"""Skip the test if ``vcodec`` is not available in the local FFmpeg build."""
return pytest.mark.skipif(
get_codec(vcodec) is None, reason=f"{vcodec!r} not in local FFmpeg build"
)
require_libsvtav1 = _require_encoder("libsvtav1")
require_h264 = _require_encoder("h264")
require_videotoolbox = _require_encoder("h264_videotoolbox")
require_nvenc = _require_encoder("h264_nvenc")
require_vaapi = _require_encoder("h264_vaapi")
require_qsv = _require_encoder("h264_qsv")
# ─── VideoEncoderConfig / codec options ──────────────────────────────
class TestCodecOptions:
@require_libsvtav1
def test_libsvtav1_defaults(self):
cfg = VideoEncoderConfig()
opts = cfg.get_codec_options()
assert opts["g"] == "2"
assert opts["crf"] == "30"
assert opts["preset"] == "12"
@require_libsvtav1
def test_libsvtav1_custom_preset(self):
cfg = VideoEncoderConfig(preset=8)
assert cfg.get_codec_options()["preset"] == "8"
@require_h264
def test_h264_options(self):
cfg = VideoEncoderConfig(vcodec="h264", g=10, crf=23, preset=None)
opts = cfg.get_codec_options()
assert opts["g"] == "10"
assert opts["crf"] == "23"
assert "preset" not in opts
@require_videotoolbox
def test_videotoolbox_options(self):
cfg = VideoEncoderConfig(vcodec="h264_videotoolbox", g=2, crf=30, preset=None)
opts = cfg.get_codec_options()
assert opts["g"] == "2"
assert opts["q:v"] == "40"
assert "crf" not in opts
@_require_encoder("h264_nvenc")
def test_nvenc_options(self):
cfg = VideoEncoderConfig(vcodec="h264_nvenc", g=2, crf=25, preset=None)
opts = cfg.get_codec_options()
assert opts["rc"] == "constqp"
assert opts["qp"] == "25"
assert "crf" not in opts
assert "g" not in opts
@_require_encoder("h264_vaapi")
def test_vaapi_options(self):
cfg = VideoEncoderConfig(vcodec="h264_vaapi", crf=28, preset=None)
assert cfg.get_codec_options()["qp"] == "28"
@_require_encoder("h264_qsv")
def test_qsv_options(self):
cfg = VideoEncoderConfig(vcodec="h264_qsv", crf=25, preset=None)
assert cfg.get_codec_options()["global_quality"] == "25"
@require_h264
def test_no_g_no_crf(self):
cfg = VideoEncoderConfig(vcodec="h264", g=None, crf=None, preset=None)
opts = cfg.get_codec_options()
assert "g" not in opts
assert "crf" not in opts
@require_libsvtav1
def test_encoder_threads_libsvtav1(self):
cfg = VideoEncoderConfig(fast_decode=0)
opts = cfg.get_codec_options(encoder_threads=4)
assert "lp=4" in opts.get("svtav1-params", "")
@require_h264
def test_encoder_threads_h264(self):
cfg = VideoEncoderConfig(vcodec="h264", preset=None)
assert cfg.get_codec_options(encoder_threads=2)["threads"] == "2"
@require_libsvtav1
def test_fast_decode_libsvtav1(self):
cfg = VideoEncoderConfig(fast_decode=1)
opts = cfg.get_codec_options()
assert "fast-decode=1" in opts.get("svtav1-params", "")
@require_h264
def test_fast_decode_h264(self):
cfg = VideoEncoderConfig(vcodec="h264", fast_decode=1, preset=None)
assert cfg.get_codec_options()["tune"] == "fastdecode"
@require_libsvtav1
def test_pix_fmt_unsupported_raises(self):
"""Passing an unsupported pix_fmt is a hard error."""
with pytest.raises(ValueError, match="pix_fmt"):
VideoEncoderConfig(pix_fmt="yuv444p") # libsvtav1 only supports yuv420p variants
@require_libsvtav1
@require_h264
def test_preset_default_behaviour(self):
"""Empty constructor picks preset=12 (libsvtav1 path); other codecs stay None."""
assert VideoEncoderConfig().preset == 12
assert VideoEncoderConfig(vcodec="libsvtav1").preset == 12
assert VideoEncoderConfig(vcodec="h264").preset is None
assert VideoEncoderConfig(vcodec="h264", preset=None).preset is None
@require_h264
def test_preset_string_on_h264(self):
"""h264 accepts string presets and forwards them to FFmpeg."""
cfg = VideoEncoderConfig(vcodec="h264", preset="slow")
assert cfg.get_codec_options()["preset"] == "slow"
@require_videotoolbox
def test_preset_on_videotoolbox_raises(self):
"""videotoolbox has no preset option at all."""
with pytest.raises(ValueError, match="preset"):
VideoEncoderConfig(vcodec="h264_videotoolbox", preset="slow")
@require_libsvtav1
def test_libsvtav1_preset_out_of_range_raises(self):
"""libsvtav1 preset must sit in [-2, 13] as exposed by PyAV."""
with pytest.raises(ValueError, match="out of range"):
VideoEncoderConfig(vcodec="libsvtav1", preset=100)
with pytest.raises(ValueError, match="out of range"):
VideoEncoderConfig(vcodec="libsvtav1", preset=-3)
@require_libsvtav1
def test_libsvtav1_crf_out_of_range_raises(self):
"""libsvtav1 crf must sit in [0, 63]."""
with pytest.raises(ValueError, match="crf.*out of range"):
VideoEncoderConfig(vcodec="libsvtav1", crf=64)
@require_h264
def test_h264_crf_accepts_float_and_int(self):
"""x264 exposes crf as a FLOAT option, so both int and float are accepted."""
assert VideoEncoderConfig(vcodec="h264", crf=23).get_codec_options()["crf"] == "23"
assert VideoEncoderConfig(vcodec="h264", crf=23.5).get_codec_options()["crf"] == "23.5"
@require_libsvtav1
def test_validate_is_rerunnable(self):
"""After mutating a field, validate() re-checks and surfaces new issues."""
cfg = VideoEncoderConfig(vcodec="libsvtav1")
cfg.preset = 100 # now out of range
with pytest.raises(ValueError, match="out of range"):
cfg.validate()
@require_videotoolbox
def test_fast_decode_on_videotoolbox_raises(self):
"""videotoolbox has no `tune` option; fast_decode must not be silently dropped."""
with pytest.raises(ValueError, match="fast_decode"):
VideoEncoderConfig(vcodec="h264_videotoolbox", preset=None, fast_decode=1)
class TestExtraOptions:
@require_libsvtav1
def test_default_is_empty_dict(self):
cfg = VideoEncoderConfig()
assert cfg.extra_options == {}
@require_libsvtav1
def test_unknown_key_passes_through(self):
"""Keys not published as AVOptions are forwarded to FFmpeg."""
cfg = VideoEncoderConfig(extra_options={"totally_made_up_option": "value"})
assert cfg.extra_options == {"totally_made_up_option": "value"}
@require_libsvtav1
def test_numeric_value_in_range_ok(self):
"""libsvtav1 exposes ``qp`` as INT in [0, 63]."""
cfg = VideoEncoderConfig(extra_options={"qp": 30})
assert cfg.extra_options == {"qp": 30}
@require_libsvtav1
def test_numeric_out_of_range_raises(self):
with pytest.raises(ValueError, match=r"extra_options\['qp'\].*out of range"):
VideoEncoderConfig(extra_options={"qp": 999})
@require_libsvtav1
def test_numeric_string_accepted_in_range(self):
"""Numeric strings are accepted for numeric options (mirrors FFmpeg)."""
cfg = VideoEncoderConfig(extra_options={"qp": "18"})
assert cfg.extra_options == {"qp": "18"}
@require_libsvtav1
def test_numeric_string_out_of_range_raises(self):
with pytest.raises(ValueError, match=r"extra_options\['qp'\].*out of range"):
VideoEncoderConfig(extra_options={"qp": "999"})
@require_libsvtav1
def test_non_numeric_string_on_numeric_option_raises(self):
with pytest.raises(ValueError, match=r"extra_options\['qp'\].*not numeric"):
VideoEncoderConfig(extra_options={"qp": "medium"})
@require_libsvtav1
def test_bool_on_numeric_option_raises(self):
"""``bool`` is explicitly rejected for numeric options."""
with pytest.raises(ValueError, match=r"extra_options\['qp'\].*not numeric"):
VideoEncoderConfig(extra_options={"qp": True})
@require_h264
def test_string_option_passes_through_unchecked(self):
"""String-typed AVOptions are NOT enum-checked (too many accept freeform)."""
cfg = VideoEncoderConfig(vcodec="h264", preset=None, extra_options={"tune": "some-future-tune"})
assert cfg.extra_options == {"tune": "some-future-tune"}
@require_libsvtav1
def test_merged_into_codec_options_and_stringified(self):
"""extra_options are merged into get_codec_options() as strings."""
cfg = VideoEncoderConfig(extra_options={"qp": 20})
opts = cfg.get_codec_options()
assert opts["qp"] == "20"
assert isinstance(opts["qp"], str)
@require_libsvtav1
def test_structured_fields_win_on_collision(self):
"""A colliding extra_options key is discarded; the structured field wins."""
cfg = VideoEncoderConfig(crf=30, extra_options={"crf": 18})
assert cfg.get_codec_options()["crf"] == "30"
class TestEncoderDetection:
@require_h264
def test_explicit_codec_kept_when_available(self):
cfg = VideoEncoderConfig(vcodec="h264")
assert cfg.vcodec == "h264"
@require_videotoolbox
def test_auto_picks_videotoolbox_when_available(self):
"""``h264_videotoolbox`` sits at the top of ``HW_ENCODERS`` so it wins when present."""
cfg = VideoEncoderConfig(vcodec="auto")
assert cfg.vcodec == "h264_videotoolbox"
def test_invalid_codec_raises(self):
with pytest.raises(ValueError, match="Invalid vcodec"):
VideoEncoderConfig(vcodec="not_a_real_codec")
def test_hw_encoder_names_listed_as_valid(self):
assert "auto" in VALID_VIDEO_CODECS
assert "h264_videotoolbox" in VALID_VIDEO_CODECS
assert "h264_nvenc" in VALID_VIDEO_CODECS
ARTIFACTS = Path(__file__).parent.parent / "fixtures" / "artifacts" / "videos"
# Default video feature set used by persistence tests.
VIDEO_FEATURES = {
"observation.images.cam": {
"dtype": "video",
"shape": (64, 96, 3),
"names": ["height", "width", "channels"],
},
"action": {"dtype": "float32", "shape": (2,), "names": ["a", "b"]},
}
VIDEO_KEY = "observation.images.cam"
def _write_frames(imgs_dir: Path, num_frames: int = 4, height: int = 64, width: int = 96) -> None:
imgs_dir.mkdir(parents=True, exist_ok=True)
for i in range(num_frames):
arr = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
write_image(arr, imgs_dir / f"frame-{i:06d}.png")
def _encode_video(path: Path, num_frames: int = 4, fps: int = 30, cfg: VideoEncoderConfig | None = None) -> Path:
imgs_dir = path.parent / f"imgs_{path.stem}"
_write_frames(imgs_dir, num_frames=num_frames)
encode_video_frames(imgs_dir, path, fps=fps, camera_encoder_config=cfg, overwrite=True)
return path
def _read_feature_info(dataset: LeRobotDataset) -> dict:
info = json.loads((dataset.root / INFO_PATH).read_text())
return info["features"][VIDEO_KEY]["info"]
def _add_frames(dataset: LeRobotDataset, num_frames: int) -> None:
shape = dataset.meta.features[VIDEO_KEY]["shape"]
for _ in range(num_frames):
dataset.add_frame({
VIDEO_KEY: np.random.randint(0, 256, shape, dtype=np.uint8),
"action": np.zeros(2, dtype=np.float32),
"task": "test",
})
class TestGetVideoInfo:
def test_returns_all_stream_fields(self):
info = get_video_info(ARTIFACTS / "clip_4frames.mp4")
assert info["video.height"] == 64
assert info["video.width"] == 96
assert info["video.pix_fmt"] == "yuv420p"
assert info["video.fps"] == 30
assert info["video.channels"] == 3
assert info["video.is_depth_map"] is False
assert info["has_audio"] is False
assert "video.g" not in info
assert "video.crf" not in info
assert "video.preset" not in info
@require_libsvtav1
def test_merges_encoder_config_as_video_prefixed_entries(self):
cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12)
info = get_video_info(ARTIFACTS / "clip_4frames.mp4", camera_encoder_config=cfg)
assert info["video.g"] == 2
assert info["video.crf"] == 30
assert info["video.preset"] == 12
assert info["video.fast_decode"] == 0
assert info["video.video_backend"] == "pyav"
assert info["video.extra_options"] == {}
@require_libsvtav1
def test_stream_derived_keys_take_precedence_over_config(self):
cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p")
info = get_video_info(ARTIFACTS / "clip_4frames.mp4", camera_encoder_config=cfg)
assert info["video.codec"] # populated from stream, not from config's vcodec
assert info["video.pix_fmt"] == "yuv420p"
class TestEncodeVideoFrames:
@require_libsvtav1
def test_produces_readable_mp4(self, tmp_path):
video_path = _encode_video(tmp_path / "out.mp4")
assert video_path.exists()
info = get_video_info(video_path)
assert info["video.height"] == 64
assert info["video.width"] == 96
@require_libsvtav1
def test_frame_count_and_duration_match_input(self, tmp_path):
num_frames = 10
fps = 30
video_path = _encode_video(tmp_path / "out.mp4", num_frames=num_frames, fps=fps)
with av.open(str(video_path)) as container:
stream = container.streams.video[0]
actual_frames = sum(1 for _ in container.decode(stream))
duration = (
float(stream.duration * stream.time_base)
if stream.duration is not None
else float(container.duration / av.time_base)
)
assert actual_frames == num_frames
assert abs(duration - num_frames / fps) < 0.1
def test_overwrite_false_skips_existing_file(self, tmp_path):
imgs_dir = tmp_path / "imgs"
_write_frames(imgs_dir)
video_path = tmp_path / "out.mp4"
sentinel = b"pre-existing content"
video_path.write_bytes(sentinel)
encode_video_frames(imgs_dir, video_path, fps=30, overwrite=False)
assert video_path.read_bytes() == sentinel
@require_libsvtav1
def test_overwrite_true_replaces_existing_file(self, tmp_path):
imgs_dir = tmp_path / "imgs"
_write_frames(imgs_dir)
video_path = tmp_path / "out.mp4"
video_path.write_bytes(b"stale content")
encode_video_frames(imgs_dir, video_path, fps=30, overwrite=True)
info = get_video_info(video_path)
assert info["video.height"] == 64
@require_libsvtav1
def test_custom_encoder_config_fields_stored_in_info(self, tmp_path):
"""All stream-derived and encoder config fields are present after encoding."""
cfg = VideoEncoderConfig(vcodec="libsvtav1", g=4, crf=25, preset=10)
video_path = _encode_video(tmp_path / "out.mp4", num_frames=4, fps=30, cfg=cfg)
info = get_video_info(video_path, camera_encoder_config=cfg)
# Stream-derived
assert info["video.height"] == 64
assert info["video.width"] == 96
assert info["video.channels"] == 3
assert info["video.codec"] == "av1"
assert info["video.pix_fmt"] == "yuv420p"
assert info["video.fps"] == 30
assert info["video.is_depth_map"] is False
assert info["has_audio"] is False
# Encoder config
assert info["video.g"] == 4
assert info["video.crf"] == 25
assert info["video.preset"] == 10
assert info["video.fast_decode"] == 0
assert info["video.video_backend"] == "pyav"
assert info["video.extra_options"] == {}
class TestConcatenateVideoFiles:
def test_two_clips_frame_count(self, tmp_path):
"""Output frame count equals the sum of the two input frame counts."""
out = tmp_path / "out.mp4"
concatenate_video_files([ARTIFACTS / "clip_6frames.mp4", ARTIFACTS / "clip_4frames.mp4"], out)
with av.open(str(out)) as container:
total = sum(1 for _ in container.decode(video=0))
assert total == 10
def test_three_clips_frame_count(self, tmp_path):
out = tmp_path / "out.mp4"
clip = ARTIFACTS / "clip_5frames.mp4"
concatenate_video_files([clip, clip, clip], out)
with av.open(str(out)) as container:
total = sum(1 for _ in container.decode(video=0))
assert total == 15
@require_libsvtav1
def test_geometry_preserved(self, tmp_path):
"""Output resolution, fps, codec and pixel format must match the inputs."""
out = tmp_path / "out.mp4"
concatenate_video_files([ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_4frames.mp4"], out)
info = get_video_info(out)
assert info["video.height"] == 64
assert info["video.width"] == 96
assert info["video.fps"] == 30
assert info["video.codec"] == "av1"
assert info["video.pix_fmt"] == "yuv420p"
def test_compatibility_check_raises_on_different_codec(self, tmp_path):
with pytest.raises(ValueError):
concatenate_video_files(
[ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_h264.mp4"],
tmp_path / "out.mp4",
compatibilty_check=True,
)
def test_compatibility_check_raises_on_different_resolution(self, tmp_path):
with pytest.raises(ValueError):
concatenate_video_files(
[ARTIFACTS / "clip_4frames.mp4", ARTIFACTS / "clip_32x48.mp4"],
tmp_path / "out.mp4",
compatibilty_check=True,
)
class TestEncoderConfigPersistence:
"""Encoder config must be stored as ``video.<field>`` entries in
``info["features"][key]["info"]`` when the first episode is saved.
"""
@require_libsvtav1
def test_first_episode_save_persists_encoder_config(self, tmp_path, empty_lerobot_dataset_factory):
cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12)
dataset = empty_lerobot_dataset_factory(
root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg
)
_add_frames(dataset, num_frames=4)
dataset.save_episode()
dataset.finalize()
info = _read_feature_info(dataset)
assert info["video.height"] == 64
assert info["video.width"] == 96
assert info["video.fps"] == 30
assert info["video.g"] == 2
assert info["video.crf"] == 30
assert info["video.preset"] == 12
assert info["video.fast_decode"] == 0
assert info["video.video_backend"] == "pyav"
assert info["video.extra_options"] == {}
@require_libsvtav1
def test_second_episode_does_not_overwrite_encoder_fields(self, tmp_path, empty_lerobot_dataset_factory):
cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12)
dataset = empty_lerobot_dataset_factory(
root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg
)
_add_frames(dataset, num_frames=4)
dataset.save_episode()
first_info = dict(_read_feature_info(dataset))
_add_frames(dataset, num_frames=4)
dataset.save_episode()
dataset.finalize()
assert _read_feature_info(dataset) == first_info
+3
View File
@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2191cd86e9e32ecbe18e33ad68d49060e479723ab5a3212bbb26df3025ccb568
size 5815
+3
View File
@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3e0ebf563ba3ed9c24b691a0f0b29e0294a1fa9b51422e1ece296155f1465768
size 16236
+3
View File
@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8475bfd5e6c4c780df46200e2b027e262b38436c57d01078bd943a5b87c65b8f
size 20726
+3
View File
@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6434322d1c671a7d132367619f841a775317cb9ff973f3f4505831e3ed74076d
size 23808
+3
View File
@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8efc84375e92a3499cef93100e04d8fb354670f3d9e0db2097b52575927284fc
size 12237