diff --git a/src/lerobot/configs/video.py b/src/lerobot/configs/video.py index 2a75cc839..c265b00ed 100644 --- a/src/lerobot/configs/video.py +++ b/src/lerobot/configs/video.py @@ -69,35 +69,18 @@ DEPTH_ENCODER_INFO_FIELD_NAMES: frozenset[str] = frozenset({"depth_min", "depth_ @dataclass class VideoEncoderConfig: - """Video encoder configuration. + """Video encoder configuration.""" - Attributes: - vcodec: Video encoder name. ``"auto"`` is resolved during - construction (HW encoder if available, else ``libsvtav1``). - pix_fmt: Pixel format (e.g. ``"yuv420p"``). - g: GOP size (keyframe interval). - crf: Quality level — mapped to the native quality parameter of the - codec (``crf`` for software, ``qp`` for NVENC/VAAPI, - ``q:v`` for VideoToolbox, ``global_quality`` for QSV). - preset: Speed/quality preset. Accepted type is per-codec. - fast_decode: Fast-decode tuning. For ``libsvtav1`` this is a level (0-2) - embedded in ``svtav1-params``. For ``h264`` and ``hevc`` non-zero values - set ``tune=fastdecode``. Ignored for other codecs. - video_backend: Python to be used for encoding. Only ``"pyav"`` - is currently supported. - extra_options: Free-form dictionary of additional video encoder options - (e.g. ``{"tune": "film", "profile:v": "high", "bf": 2}``). - """ - - vcodec: str = "libsvtav1" # TODO(CarolinePascal): rename to codec ? - pix_fmt: str = "yuv420p" - g: int | None = 2 - crf: int | float | None = 30 - preset: int | str | None = None - fast_decode: int = 0 + vcodec: str = "libsvtav1" # Video codec name. "auto" picks a hardware codec if available, else libsvtav1. + pix_fmt: str = "yuv420p" # Pixel format (e.g. yuv420p). + g: int | None = 2 # GOP size (keyframe interval). + crf: int | float | None = 30 # Quality level. Lower means better quality and larger files. + preset: int | str | None = None # Speed/quality preset. Accepted values are codec-specific. + fast_decode: int = 0 # Fast-decode tuning. Accepted values are codec-specific, 0 disables it. # TODO(CarolinePascal): add torchcodec support + find a way to unify the # two backends (encoding and decoding). - video_backend: str = "pyav" + video_backend: str = "pyav" # Encoding backend. Only "pyav" is currently supported. + # Extra codec options merged last, e.g. {"tune": "film"}. extra_options: dict[str, Any] = field(default_factory=dict) # Source-data channel count this encoder is expected to handle (3 for RGB, @@ -272,29 +255,18 @@ class DepthEncoderConfig(VideoEncoderConfig): """Encoder configuration for depth-map streams. Inherits the full :class:`VideoEncoderConfig` surface (codec, GOP, CRF, - preset, ``extra_options``…) and adds the four parameters of the depth - quantizer. - - Defaults flip ``vcodec`` to ``"hevc"`` (Main 12 profile) and ``pix_fmt`` - to ``"gray12le"``. - - - Attributes: - depth_min: Minimum depth in physical units (e.g. metres) represented - by quantum ``0``. - depth_max: Maximum depth represented by quantum :data:`DEPTH_QMAX`. - shift: Pre-log offset for numerical stability near zero. - use_log: ``True`` for logarithmic quantization (default; matches - sensor error profile), ``False`` for linear. + preset, ``extra_options``…) and adds the parameters of the depth quantizer. + Defaults flip ``vcodec`` to ``"hevc"`` (Main 12 profile) and ``pix_fmt`` to + ``"gray12le"``. """ vcodec: str = "hevc" pix_fmt: str = "gray12le" - depth_min: float = DEFAULT_DEPTH_MIN - depth_max: float = DEFAULT_DEPTH_MAX - shift: float = DEFAULT_DEPTH_SHIFT - use_log: bool = DEFAULT_DEPTH_USE_LOG + depth_min: float = DEFAULT_DEPTH_MIN # Minimum depth in meters, mapped to the lowest quantum. + depth_max: float = DEFAULT_DEPTH_MAX # Maximum depth in meters, mapped to the highest quantum. + shift: float = DEFAULT_DEPTH_SHIFT # Pre-log offset in meters for numerical stability near zero. + use_log: bool = DEFAULT_DEPTH_USE_LOG # Use logarithmic quantization (True) or linear (False). _DEFAULT_CHANNELS: ClassVar[int] = 1