diff --git a/docs/source/video_encoding_parameters.mdx b/docs/source/video_encoding_parameters.mdx index 72ed10458..132d25056 100644 --- a/docs/source/video_encoding_parameters.mdx +++ b/docs/source/video_encoding_parameters.mdx @@ -100,14 +100,15 @@ lerobot-record \ --dataset.depth_encoder.use_log=true ``` -| Parameter | Type | Default | Description | -| ----------- | ------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------- | -| `vcodec` | `str` | `"hevc"` | Defaults to HEVC Main 12 (a 12-bit-capable codec). For a lossless depth stream, keep HEVC and pass `extra_options={"x265-params": "lossless=1"}` (stays MP4-compatible). | -| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. | -| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. | -| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. | -| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. | -| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. | +| Parameter | Type | Default | Description | +| --------------- | ------- | ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | +| `vcodec` | `str` | `"hevc"` | HEVC Main 12 (a 12-bit-capable codec, MP4-compatible). | +| `extra_options` | `dict` | `{"x265-params": "lossless=1"}` | **Depth defaults to lossless** (exact round-trip); `crf` is ignored. Pass `extra_options={}` and set `crf` for a smaller lossy stream. | +| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. | +| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. | +| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. | +| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. | +| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. | > [!TIP] > `depth_min`, `depth_max`, and `shift` are always interpreted in **metres**, regardless of the input depth's unit. Inputs are auto-detected: integer arrays (e.g. `uint16` millimetres straight from a RealSense) are treated as millimetres, floating arrays as metres. diff --git a/src/lerobot/configs/video.py b/src/lerobot/configs/video.py index 1929b638a..3ea834508 100644 --- a/src/lerobot/configs/video.py +++ b/src/lerobot/configs/video.py @@ -36,9 +36,7 @@ HW_VIDEO_CODECS = [ "h264_vaapi", # Linux Intel/AMD "h264_qsv", # Intel Quick Sync ] -VALID_VIDEO_CODECS: frozenset[str] = frozenset( - {"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS} -) +VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS}) # Aliases for legacy video codec names. VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"} @@ -272,8 +270,9 @@ class DepthEncoderConfig(VideoEncoderConfig): ``"gray12le"``. """ - vcodec: str = "hevc" - pix_fmt: str = "gray12le" + vcodec: str = "hevc" # Video codec name. Defaults to HEVC Main 12 (a 12-bit-capable codec). + pix_fmt: str = "gray12le" # Pixel format. Defaults to 12-bit grayscale. + extra_options: dict[str, Any] = field(default_factory=lambda: {"x265-params": "lossless=1"}) depth_min: float = DEFAULT_DEPTH_MIN # Minimum depth in meters, mapped to the lowest quantum. depth_max: float = DEFAULT_DEPTH_MAX # Maximum depth in meters, mapped to the highest quantum. diff --git a/tests/datasets/test_video_encoding.py b/tests/datasets/test_video_encoding.py index e419a9c72..80819d665 100644 --- a/tests/datasets/test_video_encoding.py +++ b/tests/datasets/test_video_encoding.py @@ -730,6 +730,7 @@ class TestEncodeDepthVideoFrames: pix_fmt="gray12le", g=4, crf=25, + extra_options={}, depth_min=0.05, depth_max=8.0, shift=2.5, @@ -777,6 +778,7 @@ class TestDepthEncoderConfigPersistence: pix_fmt="gray12le", g=2, crf=30, + extra_options={}, depth_min=0.05, depth_max=8.0, shift=2.5,