diff --git a/docs/source/streaming_video_encoding.mdx b/docs/source/streaming_video_encoding.mdx index 9867ab559..ec5feefd7 100644 --- a/docs/source/streaming_video_encoding.mdx +++ b/docs/source/streaming_video_encoding.mdx @@ -19,7 +19,7 @@ This makes `save_episode()` near-instant (the video is already encoded by the ti | `streaming_encoding` | `--dataset.streaming_encoding` | `bool` | `True` | Enable real-time encoding during capture | | `vcodec` | `--dataset.camera_encoder_config.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder | | `encoder_threads` | `--dataset.encoder_threads` | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide | -| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `60` | Max buffered frames per camera (~2s at 30fps). Consumes RAM | +| `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `30` | Max buffered frames per camera (~2s at 30fps). Consumes RAM | ## 3. Performance Considerations diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index 57b967ac5..bbc8f6f89 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -23,6 +23,7 @@ import pyarrow as pa import pyarrow.parquet as pq from huggingface_hub import snapshot_download +from lerobot.configs import VideoEncoderConfig from lerobot.utils.constants import DEFAULT_FEATURES, HF_LEROBOT_HOME, HF_LEROBOT_HUB_CACHE from lerobot.utils.feature_utils import _validate_feature_names from lerobot.utils.utils import flatten_dict @@ -48,7 +49,7 @@ from .utils import ( is_valid_version, update_chunk_file_indices, ) -from .video_utils import VideoEncoderConfig, get_video_info +from .video_utils import get_video_info CODEBASE_VERSION = "v3.0" diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 9aa810b3f..63ba47df7 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -36,6 +36,7 @@ import pyarrow.parquet as pq import torch from tqdm import tqdm +from lerobot.configs import VideoEncoderConfig, camera_encoder_defaults from lerobot.utils.constants import ACTION, HF_LEROBOT_HOME, OBS_IMAGE, OBS_STATE from lerobot.utils.utils import flatten_dict @@ -63,8 +64,6 @@ from .utils import ( update_chunk_file_indices, ) from .video_utils import ( - VideoEncoderConfig, - camera_encoder_defaults, encode_video_frames, get_video_info, ) @@ -107,7 +106,7 @@ def delete_episodes( output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. camera_encoder_config: Video encoder settings used when re-encoding video segments - (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). + (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). """ if not episode_indices: raise ValueError("No episodes to delete") @@ -172,7 +171,7 @@ def split_dataset( split names to fractions (must sum to <= 1.0). output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. camera_encoder_config: Video encoder settings used when re-encoding video segments - (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). + (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). Examples: Split by specific episodes @@ -606,7 +605,7 @@ def _keep_episodes_from_video_with_av( is inclusive and end_frame is exclusive. fps: Frame rate of the video. camera_encoder_config: Video encoder settings - (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). + (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). """ if camera_encoder_config is None: camera_encoder_config = camera_encoder_defaults() @@ -714,7 +713,7 @@ def _copy_and_reindex_videos( dst_meta: Destination metadata object episode_mapping: Mapping from old episode indices to new indices camera_encoder_config: Video encoder settings used when re-encoding segments - (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). + (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). Returns: dict mapping episode index to its video metadata (chunk_index, file_index, timestamps) @@ -1664,7 +1663,7 @@ def convert_image_to_video_dataset( output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. camera_encoder_config: Video encoder settings - (``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`). + (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). episode_indices: List of episode indices to convert (None = all episodes) num_workers: Number of threads for parallel processing (default: 4) max_episodes_per_batch: Maximum episodes per video batch to avoid memory issues (None = no limit) diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index da646d785..76b740d73 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -31,6 +31,8 @@ import PIL.Image import pyarrow.parquet as pq import torch +from lerobot.configs import VideoEncoderConfig, camera_encoder_defaults + from .compute_stats import compute_episode_stats from .dataset_metadata import LeRobotDatasetMetadata from .feature_utils import ( @@ -52,8 +54,6 @@ from .utils import ( ) from .video_utils import ( StreamingVideoEncoder, - VideoEncoderConfig, - camera_encoder_defaults, concatenate_video_files, encode_video_frames, get_video_duration_in_s, @@ -109,7 +109,7 @@ class DatasetWriter: settings, and episode persistence). root: Local dataset root directory. camera_encoder_config: Video encoder settings applied to all cameras. - ``None`` uses :func:`~lerobot.datasets.video_utils.camera_encoder_defaults`. + ``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. batch_encoding_size: Number of episodes to accumulate before diff --git a/src/lerobot/datasets/feature_utils.py b/src/lerobot/datasets/feature_utils.py index ce9208134..d5a550a4c 100644 --- a/src/lerobot/datasets/feature_utils.py +++ b/src/lerobot/datasets/feature_utils.py @@ -140,7 +140,6 @@ def features_equal_for_merge(features_a: dict[str, dict], features_b: dict[str, continue if _without_encoder_info_keys(fa_key) != _without_encoder_info_keys(fb_key): - raise ValueError(f"Features {fa_key} and {fb_key} are not equal") return False return True diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index 44d0c358d..d86bd3327 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -24,6 +24,7 @@ import torch.utils from huggingface_hub import HfApi, snapshot_download from huggingface_hub.errors import RevisionNotFoundError +from lerobot.configs import VideoEncoderConfig from lerobot.utils.constants import HF_LEROBOT_HUB_CACHE from .dataset_metadata import CODEBASE_VERSION, LeRobotDatasetMetadata @@ -36,7 +37,6 @@ from .utils import ( ) from .video_utils import ( StreamingVideoEncoder, - VideoEncoderConfig, get_safe_default_video_backend, ) @@ -178,7 +178,7 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras - (codec, quality, etc.). When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` + (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used by the writer. encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the codec decide. @@ -654,7 +654,7 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. ``1`` means encode immediately. camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). - When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used. + When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. metadata_buffer_size: Number of episode metadata records to buffer @@ -755,7 +755,7 @@ class LeRobotDataset(torch.utils.data.Dataset): batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). - When ``None``, :func:`~lerobot.datasets.video_utils.camera_encoder_defaults` is used. + When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. image_writer_processes: Subprocesses for async image writing. diff --git a/src/lerobot/datasets/pyav_utils.py b/src/lerobot/datasets/pyav_utils.py index 7adeb8c1a..ad2702674 100644 --- a/src/lerobot/datasets/pyav_utils.py +++ b/src/lerobot/datasets/pyav_utils.py @@ -27,7 +27,7 @@ from typing import Any import av -from lerobot.configs.video import VideoEncoderConfig +from lerobot.configs import VideoEncoderConfig logger = logging.getLogger(__name__) diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 345eac436..3aca1d085 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -36,7 +36,7 @@ import torch from datasets.features.features import register_feature from PIL import Image -from lerobot.configs.video import ( +from lerobot.configs import ( VideoEncoderConfig, camera_encoder_defaults, ) @@ -905,7 +905,7 @@ def get_audio_info(video_path: Path | str) -> dict: def get_video_info( video_path: Path | str, - camera_encoder_config: "VideoEncoderConfig | None" = None, + camera_encoder_config: VideoEncoderConfig | None = None, ) -> dict: """Build the ``video.*`` / ``audio.*`` info dict persisted in ``info.json``. diff --git a/tests/datasets/test_dataset_tools.py b/tests/datasets/test_dataset_tools.py index abb380956..2e528a3c8 100644 --- a/tests/datasets/test_dataset_tools.py +++ b/tests/datasets/test_dataset_tools.py @@ -23,6 +23,7 @@ import torch pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +from lerobot.configs import VideoEncoderConfig from lerobot.datasets.dataset_tools import ( add_features, convert_image_to_video_dataset, @@ -33,7 +34,6 @@ from lerobot.datasets.dataset_tools import ( remove_feature, split_dataset, ) -from lerobot.datasets.video_utils import VideoEncoderConfig @pytest.fixture diff --git a/tests/datasets/test_dataset_writer.py b/tests/datasets/test_dataset_writer.py index 35dc605bb..773a2ec05 100644 --- a/tests/datasets/test_dataset_writer.py +++ b/tests/datasets/test_dataset_writer.py @@ -25,10 +25,10 @@ from PIL import Image pytest.importorskip("datasets", reason="datasets is required (install lerobot[dataset])") +from lerobot.configs import VideoEncoderConfig from lerobot.datasets.dataset_writer import _encode_video_worker from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.utils import DEFAULT_IMAGE_PATH -from lerobot.datasets.video_utils import VideoEncoderConfig from tests.fixtures.constants import DEFAULT_FPS, DUMMY_REPO_ID SIMPLE_FEATURES = { diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index 36aa3f6f3..f81736589 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -29,9 +29,9 @@ from PIL import Image from safetensors.torch import load_file from torchvision.transforms import v2 +from lerobot.configs import VALID_VIDEO_CODECS, VideoEncoderConfig from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig -from lerobot.configs.video import VALID_VIDEO_CODECS, VideoEncoderConfig from lerobot.datasets import make_dataset from lerobot.datasets.feature_utils import get_hf_features_from_features from lerobot.datasets.image_writer import image_array_to_pil_image diff --git a/tests/datasets/test_streaming_video_encoder.py b/tests/datasets/test_streaming_video_encoder.py index 6af7ab781..a2f1e25e8 100644 --- a/tests/datasets/test_streaming_video_encoder.py +++ b/tests/datasets/test_streaming_video_encoder.py @@ -26,10 +26,10 @@ pytest.importorskip("av", reason="av is required (install lerobot[dataset])") import av # noqa: E402 +from lerobot.configs import VideoEncoderConfig from lerobot.datasets.pyav_utils import get_codec from lerobot.datasets.video_utils import ( StreamingVideoEncoder, - VideoEncoderConfig, _CameraEncoderThread, ) from lerobot.utils.constants import OBS_IMAGES diff --git a/tests/datasets/test_video_encoding.py b/tests/datasets/test_video_encoding.py index 3ac4269fc..f5508bad2 100644 --- a/tests/datasets/test_video_encoding.py +++ b/tests/datasets/test_video_encoding.py @@ -26,7 +26,7 @@ pytest.importorskip("av", reason="av is required (install lerobot[dataset])") import av # noqa: E402 -from lerobot.configs.video import VALID_VIDEO_CODECS, VideoEncoderConfig +from lerobot.configs import VALID_VIDEO_CODECS, VideoEncoderConfig from lerobot.datasets.image_writer import write_image from lerobot.datasets.lerobot_dataset import LeRobotDataset from lerobot.datasets.pyav_utils import get_codec