#!/usr/bin/env python # Copyright 2026 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for streaming video encoding and hardware-accelerated encoding.""" import queue import threading from unittest.mock import patch import numpy as np import pytest pytest.importorskip("av", reason="av is required (install lerobot[dataset])") import av # noqa: E402 from lerobot.datasets.video_utils import ( VALID_VIDEO_CODECS, StreamingVideoEncoder, _CameraEncoderThread, _get_codec_options, detect_available_hw_encoders, resolve_vcodec, ) from lerobot.utils.constants import OBS_IMAGES # ─── _get_codec_options tests ─── class TestGetCodecOptions: def test_libsvtav1_defaults(self): opts = _get_codec_options("libsvtav1") assert opts["g"] == "2" assert opts["crf"] == "30" assert opts["preset"] == "12" def test_libsvtav1_custom_preset(self): opts = _get_codec_options("libsvtav1", preset=8) assert opts["preset"] == "8" def test_h264_options(self): opts = _get_codec_options("h264", g=10, crf=23) assert opts["g"] == "10" assert opts["crf"] == "23" assert "preset" not in opts def test_videotoolbox_options(self): opts = _get_codec_options("h264_videotoolbox", g=2, crf=30) assert opts["g"] == "2" # CRF 30 maps to quality = max(1, min(100, 100 - 30*2)) = 40 assert opts["q:v"] == "40" assert "crf" not in opts def test_nvenc_options(self): opts = _get_codec_options("h264_nvenc", g=2, crf=25) assert opts["rc"] == "constqp" assert opts["qp"] == "25" assert "crf" not in opts # NVENC doesn't support g assert "g" not in opts def test_vaapi_options(self): opts = _get_codec_options("h264_vaapi", crf=28) assert opts["qp"] == "28" def test_qsv_options(self): opts = _get_codec_options("h264_qsv", crf=25) assert opts["global_quality"] == "25" def test_no_g_no_crf(self): opts = _get_codec_options("h264", g=None, crf=None) assert "g" not in opts assert "crf" not in opts # ─── HW encoder detection tests ─── class TestHWEncoderDetection: def test_detect_available_hw_encoders_returns_list(self): result = detect_available_hw_encoders() assert isinstance(result, list) def test_detect_available_hw_encoders_only_valid(self): from lerobot.datasets.video_utils import HW_ENCODERS result = detect_available_hw_encoders() for encoder in result: assert encoder in HW_ENCODERS def test_resolve_vcodec_passthrough(self): assert resolve_vcodec("libsvtav1") == "libsvtav1" assert resolve_vcodec("h264") == "h264" def test_resolve_vcodec_auto_fallback(self): """When no HW encoders are available, auto should fall back to libsvtav1.""" with patch("lerobot.datasets.video_utils.detect_available_hw_encoders", return_value=[]): assert resolve_vcodec("auto") == "libsvtav1" def test_resolve_vcodec_auto_picks_hw(self): """When a HW encoder is available, auto should pick it.""" with patch( "lerobot.datasets.video_utils.detect_available_hw_encoders", return_value=["h264_videotoolbox"], ): assert resolve_vcodec("auto") == "h264_videotoolbox" def test_resolve_vcodec_auto_returns_valid(self): """Test that resolve_vcodec('auto') returns a known valid codec.""" result = resolve_vcodec("auto") assert result in VALID_VIDEO_CODECS def test_hw_encoder_names_accepted_in_validation(self): """Test that HW encoder names pass validation in VALID_VIDEO_CODECS.""" assert "auto" in VALID_VIDEO_CODECS assert "h264_videotoolbox" in VALID_VIDEO_CODECS assert "h264_nvenc" in VALID_VIDEO_CODECS def test_resolve_vcodec_invalid_raises(self): """Test that resolve_vcodec raises ValueError for invalid codecs.""" with pytest.raises(ValueError, match="Invalid vcodec"): resolve_vcodec("not_a_real_codec") # ─── _CameraEncoderThread tests ─── class TestCameraEncoderThread: def test_encodes_valid_mp4(self, tmp_path): """Test that the encoder thread creates a valid MP4 file with correct frame count.""" num_frames = 30 height, width = 64, 96 fps = 30 video_path = tmp_path / "test_output" / "test.mp4" frame_queue: queue.Queue = queue.Queue(maxsize=60) result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, ) encoder_thread.start() # Feed frames (HWC uint8) for _ in range(num_frames): frame = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8) frame_queue.put(frame) # Send sentinel frame_queue.put(None) encoder_thread.join(timeout=60) assert not encoder_thread.is_alive() # Check result status, data = result_queue.get(timeout=5) assert status == "ok" assert data is not None # Stats should be returned assert "mean" in data assert "std" in data assert "min" in data assert "max" in data assert "count" in data # Verify the MP4 file is valid assert video_path.exists() with av.open(str(video_path)) as container: stream = container.streams.video[0] # The frame count should match total_frames = sum(1 for _ in container.decode(stream)) assert total_frames == num_frames def test_handles_chw_input(self, tmp_path): """Test that CHW format input is handled correctly.""" num_frames = 5 fps = 30 video_path = tmp_path / "test_chw" / "test.mp4" frame_queue: queue.Queue = queue.Queue(maxsize=60) result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, ) encoder_thread.start() # Feed CHW frames for _ in range(num_frames): frame = np.random.randint(0, 255, (3, 64, 96), dtype=np.uint8) frame_queue.put(frame) frame_queue.put(None) encoder_thread.join(timeout=60) status, _ = result_queue.get(timeout=5) assert status == "ok" assert video_path.exists() def test_stop_event_cancellation(self, tmp_path): """Test that setting the stop event causes the thread to exit.""" fps = 30 video_path = tmp_path / "test_cancel" / "test.mp4" frame_queue: queue.Queue = queue.Queue(maxsize=60) result_queue: queue.Queue = queue.Queue(maxsize=1) stop_event = threading.Event() encoder_thread = _CameraEncoderThread( video_path=video_path, fps=fps, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, frame_queue=frame_queue, result_queue=result_queue, stop_event=stop_event, ) encoder_thread.start() # Feed a few frames for _ in range(3): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) frame_queue.put(frame) # Signal stop instead of sending sentinel stop_event.set() encoder_thread.join(timeout=10) assert not encoder_thread.is_alive() # ─── StreamingVideoEncoder tests ─── class TestStreamingVideoEncoder: def test_single_camera_episode(self, tmp_path): """Test encoding a single camera episode.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) video_keys = [f"{OBS_IMAGES}.laptop"] encoder.start_episode(video_keys, tmp_path) num_frames = 20 for _ in range(num_frames): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.laptop", frame) results = encoder.finish_episode() assert f"{OBS_IMAGES}.laptop" in results mp4_path, stats = results[f"{OBS_IMAGES}.laptop"] assert mp4_path.exists() assert stats is not None # Verify frame count with av.open(str(mp4_path)) as container: stream = container.streams.video[0] total_frames = sum(1 for _ in container.decode(stream)) assert total_frames == num_frames encoder.close() def test_multi_camera_episode(self, tmp_path): """Test encoding multiple cameras simultaneously.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.laptop", f"{OBS_IMAGES}.phone"] encoder.start_episode(video_keys, tmp_path) num_frames = 15 for _ in range(num_frames): frame0 = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) frame1 = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(video_keys[0], frame0) encoder.feed_frame(video_keys[1], frame1) results = encoder.finish_episode() for key in video_keys: assert key in results mp4_path, stats = results[key] assert mp4_path.exists() assert stats is not None encoder.close() def test_sequential_episodes(self, tmp_path): """Test that multiple sequential episodes work correctly.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.cam"] for ep in range(3): encoder.start_episode(video_keys, tmp_path) num_frames = 10 + ep * 5 for _ in range(num_frames): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) results = encoder.finish_episode() mp4_path, stats = results[f"{OBS_IMAGES}.cam"] assert mp4_path.exists() with av.open(str(mp4_path)) as container: stream = container.streams.video[0] total_frames = sum(1 for _ in container.decode(stream)) assert total_frames == num_frames encoder.close() def test_cancel_episode(self, tmp_path): """Test that canceling an episode cleans up properly.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.cam"] encoder.start_episode(video_keys, tmp_path) for _ in range(5): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) encoder.cancel_episode() # Should be able to start a new episode after cancel encoder.start_episode(video_keys, tmp_path) for _ in range(5): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) results = encoder.finish_episode() assert f"{OBS_IMAGES}.cam" in results encoder.close() def test_feed_without_start_raises(self, tmp_path): """Test that feeding frames without starting an episode raises.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") with pytest.raises(RuntimeError, match="No active episode"): encoder.feed_frame("cam", np.zeros((64, 96, 3), dtype=np.uint8)) encoder.close() def test_finish_without_start_raises(self, tmp_path): """Test that finishing without starting raises.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") with pytest.raises(RuntimeError, match="No active episode"): encoder.finish_episode() encoder.close() def test_close_is_idempotent(self, tmp_path): """Test that close() can be called multiple times safely.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") encoder.close() encoder.close() # Should not raise def test_video_duration_matches_frame_count(self, tmp_path): """Test that encoded video duration matches num_frames / fps.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13) video_keys = [f"{OBS_IMAGES}.cam"] encoder.start_episode(video_keys, tmp_path) num_frames = 90 # 3 seconds at 30fps for _ in range(num_frames): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) results = encoder.finish_episode() mp4_path, _ = results[f"{OBS_IMAGES}.cam"] expected_duration = num_frames / 30.0 # 3.0 seconds with av.open(str(mp4_path)) as container: stream = container.streams.video[0] total_frames = sum(1 for _ in container.decode(stream)) if stream.duration is not None: actual_duration = float(stream.duration * stream.time_base) else: actual_duration = float(container.duration / av.time_base) assert total_frames == num_frames # Allow small tolerance for duration due to codec framing assert abs(actual_duration - expected_duration) < 0.5, ( f"Video duration {actual_duration:.2f}s != expected {expected_duration:.2f}s" ) encoder.close() def test_multi_camera_start_episode_called_once(self, tmp_path): """Test that with multiple cameras, no frames are lost due to double start_episode.""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30) video_keys = [f"{OBS_IMAGES}.cam1", f"{OBS_IMAGES}.cam2"] encoder.start_episode(video_keys, tmp_path) num_frames = 30 for _ in range(num_frames): frame0 = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) frame1 = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(video_keys[0], frame0) encoder.feed_frame(video_keys[1], frame1) results = encoder.finish_episode() # Both cameras should have all frames for key in video_keys: mp4_path, stats = results[key] assert mp4_path.exists() with av.open(str(mp4_path)) as container: stream = container.streams.video[0] total_frames = sum(1 for _ in container.decode(stream)) assert total_frames == num_frames, ( f"Camera {key}: expected {num_frames} frames, got {total_frames}" ) encoder.close() def test_encoder_threads_passed_to_thread(self, tmp_path): """Test that encoder_threads is stored and passed through to encoder threads.""" encoder = StreamingVideoEncoder( fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, encoder_threads=2 ) assert encoder.encoder_threads == 2 video_keys = [f"{OBS_IMAGES}.cam"] encoder.start_episode(video_keys, tmp_path) # Verify the thread received the encoder_threads value thread = encoder._threads[f"{OBS_IMAGES}.cam"] assert thread.encoder_threads == 2 # Feed some frames and finish to ensure it works end-to-end num_frames = 10 for _ in range(num_frames): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) results = encoder.finish_episode() mp4_path, stats = results[f"{OBS_IMAGES}.cam"] assert mp4_path.exists() assert stats is not None with av.open(str(mp4_path)) as container: stream = container.streams.video[0] total_frames = sum(1 for _ in container.decode(stream)) assert total_frames == num_frames encoder.close() def test_encoder_threads_none_by_default(self, tmp_path): """Test that encoder_threads defaults to None (codec auto-detect).""" encoder = StreamingVideoEncoder(fps=30, vcodec="libsvtav1", pix_fmt="yuv420p") assert encoder.encoder_threads is None encoder.close() def test_graceful_frame_dropping(self, tmp_path): """Test that full queue drops frames instead of crashing.""" encoder = StreamingVideoEncoder( fps=30, vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13, queue_maxsize=1 ) video_keys = [f"{OBS_IMAGES}.cam"] encoder.start_episode(video_keys, tmp_path) # Feed many frames quickly - with queue_maxsize=1, some will be dropped num_frames = 50 for _ in range(num_frames): frame = np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8) encoder.feed_frame(f"{OBS_IMAGES}.cam", frame) # Should not raise - frames are dropped gracefully results = encoder.finish_episode() assert f"{OBS_IMAGES}.cam" in results mp4_path, _ = results[f"{OBS_IMAGES}.cam"] assert mp4_path.exists() # Some frames should have been dropped (queue was tiny) dropped = encoder._dropped_frames.get(f"{OBS_IMAGES}.cam", 0) # We can't guarantee drops but can verify no crash occurred assert dropped >= 0 encoder.close() # ─── Integration tests with LeRobotDataset ─── class TestStreamingEncoderIntegration: def test_add_frame_save_episode_streaming(self, tmp_path): """Full integration test: add_frame -> save_episode with streaming encoding.""" from lerobot.datasets.lerobot_dataset import LeRobotDataset features = { "observation.images.cam": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "action": {"dtype": "float32", "shape": (6,), "names": ["j1", "j2", "j3", "j4", "j5", "j6"]}, } dataset = LeRobotDataset.create( repo_id="test/streaming", fps=30, features=features, root=tmp_path / "streaming_test", use_videos=True, streaming_encoding=True, ) assert dataset.writer._streaming_encoder is not None num_frames = 20 for _ in range(num_frames): frame = { "observation.images.cam": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(6).astype(np.float32), "task": "test task", } dataset.add_frame(frame) dataset.save_episode() # Verify dataset metadata assert dataset.meta.total_episodes == 1 assert dataset.meta.total_frames == num_frames # Verify stats exist for the video key assert dataset.meta.stats is not None assert "observation.images.cam" in dataset.meta.stats assert "action" in dataset.meta.stats dataset.finalize() def test_streaming_disabled_creates_pngs(self, tmp_path): """Test that disabling streaming encoding falls back to PNG path.""" from lerobot.datasets.lerobot_dataset import LeRobotDataset features = { "observation.images.cam": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "action": {"dtype": "float32", "shape": (6,), "names": ["j1", "j2", "j3", "j4", "j5", "j6"]}, } dataset = LeRobotDataset.create( repo_id="test/no_streaming", fps=30, features=features, root=tmp_path / "no_streaming_test", use_videos=True, streaming_encoding=False, ) assert dataset.writer._streaming_encoder is None num_frames = 5 for _ in range(num_frames): frame = { "observation.images.cam": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(6).astype(np.float32), "task": "test task", } dataset.add_frame(frame) # With streaming disabled, PNG files should be written images_dir = dataset.root / "images" assert images_dir.exists() dataset.save_episode() dataset.finalize() def test_multi_episode_streaming(self, tmp_path): """Test recording multiple episodes with streaming encoding.""" from lerobot.datasets.lerobot_dataset import LeRobotDataset features = { "observation.images.cam": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "action": {"dtype": "float32", "shape": (2,), "names": ["j1", "j2"]}, } dataset = LeRobotDataset.create( repo_id="test/multi_ep", fps=30, features=features, root=tmp_path / "multi_ep_test", use_videos=True, streaming_encoding=True, ) for ep in range(3): num_frames = 10 + ep * 5 for _ in range(num_frames): frame = { "observation.images.cam": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(2).astype(np.float32), "task": f"task_{ep}", } dataset.add_frame(frame) dataset.save_episode() assert dataset.meta.total_episodes == 3 assert dataset.meta.total_frames == 10 + 15 + 20 dataset.finalize() def test_clear_episode_buffer_cancels_streaming(self, tmp_path): """Test that clearing episode buffer cancels streaming encoding.""" from lerobot.datasets.lerobot_dataset import LeRobotDataset features = { "observation.images.cam": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "action": {"dtype": "float32", "shape": (2,), "names": ["j1", "j2"]}, } dataset = LeRobotDataset.create( repo_id="test/cancel", fps=30, features=features, root=tmp_path / "cancel_test", use_videos=True, streaming_encoding=True, ) # Add some frames for _ in range(5): frame = { "observation.images.cam": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(2).astype(np.float32), "task": "task", } dataset.add_frame(frame) # Cancel and re-record dataset.clear_episode_buffer() # Record a new episode for _ in range(10): frame = { "observation.images.cam": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(2).astype(np.float32), "task": "task", } dataset.add_frame(frame) dataset.save_episode() assert dataset.meta.total_episodes == 1 assert dataset.meta.total_frames == 10 dataset.finalize() def test_multi_camera_add_frame_streaming(self, tmp_path): """Test that start_episode is called once with multiple video keys.""" from lerobot.datasets.lerobot_dataset import LeRobotDataset features = { "observation.images.cam1": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "observation.images.cam2": { "dtype": "video", "shape": (64, 96, 3), "names": ["height", "width", "channels"], }, "action": {"dtype": "float32", "shape": (2,), "names": ["j1", "j2"]}, } dataset = LeRobotDataset.create( repo_id="test/multi_cam", fps=30, features=features, root=tmp_path / "multi_cam_test", use_videos=True, streaming_encoding=True, ) num_frames = 15 for _ in range(num_frames): frame = { "observation.images.cam1": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "observation.images.cam2": np.random.randint(0, 255, (64, 96, 3), dtype=np.uint8), "action": np.random.randn(2).astype(np.float32), "task": "test task", } dataset.add_frame(frame) dataset.save_episode() assert dataset.meta.total_episodes == 1 assert dataset.meta.total_frames == num_frames dataset.finalize()