fix(lossless): tuning depth encoding parameters for lossless depth storage

This commit is contained in:
CarolinePascal
2026-06-24 22:41:00 +02:00
parent 96e04e5d05
commit 24c0bf6199
3 changed files with 15 additions and 13 deletions
+9 -8
View File
@@ -100,14 +100,15 @@ lerobot-record \
--dataset.depth_encoder.use_log=true
```
| Parameter | Type | Default | Description |
| ----------- | ------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------- |
| `vcodec` | `str` | `"hevc"` | Defaults to HEVC Main 12 (a 12-bit-capable codec). For a lossless depth stream, keep HEVC and pass `extra_options={"x265-params": "lossless=1"}` (stays MP4-compatible). |
| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. |
| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. |
| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. |
| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. |
| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. |
| Parameter | Type | Default | Description |
| --------------- | ------- | ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
| `vcodec` | `str` | `"hevc"` | HEVC Main 12 (a 12-bit-capable codec, MP4-compatible). |
| `extra_options` | `dict` | `{"x265-params": "lossless=1"}` | **Depth defaults to lossless** (exact round-trip); `crf` is ignored. Pass `extra_options={}` and set `crf` for a smaller lossy stream. |
| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. |
| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. |
| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. |
| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. |
| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. |
> [!TIP]
> `depth_min`, `depth_max`, and `shift` are always interpreted in **metres**, regardless of the input depth's unit. Inputs are auto-detected: integer arrays (e.g. `uint16` millimetres straight from a RealSense) are treated as millimetres, floating arrays as metres.
+4 -5
View File
@@ -36,9 +36,7 @@ HW_VIDEO_CODECS = [
"h264_vaapi", # Linux Intel/AMD
"h264_qsv", # Intel Quick Sync
]
VALID_VIDEO_CODECS: frozenset[str] = frozenset(
{"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS}
)
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
# Aliases for legacy video codec names.
VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"}
@@ -272,8 +270,9 @@ class DepthEncoderConfig(VideoEncoderConfig):
``"gray12le"``.
"""
vcodec: str = "hevc"
pix_fmt: str = "gray12le"
vcodec: str = "hevc" # Video codec name. Defaults to HEVC Main 12 (a 12-bit-capable codec).
pix_fmt: str = "gray12le" # Pixel format. Defaults to 12-bit grayscale.
extra_options: dict[str, Any] = field(default_factory=lambda: {"x265-params": "lossless=1"})
depth_min: float = DEFAULT_DEPTH_MIN # Minimum depth in meters, mapped to the lowest quantum.
depth_max: float = DEFAULT_DEPTH_MAX # Maximum depth in meters, mapped to the highest quantum.
+2
View File
@@ -730,6 +730,7 @@ class TestEncodeDepthVideoFrames:
pix_fmt="gray12le",
g=4,
crf=25,
extra_options={},
depth_min=0.05,
depth_max=8.0,
shift=2.5,
@@ -777,6 +778,7 @@ class TestDepthEncoderConfigPersistence:
pix_fmt="gray12le",
g=2,
crf=30,
extra_options={},
depth_min=0.05,
depth_max=8.0,
shift=2.5,