mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-30 22:57:00 +00:00
fix(lossless): tuning depth encoding parameters for lossless depth storage
This commit is contained in:
@@ -100,14 +100,15 @@ lerobot-record \
|
||||
--dataset.depth_encoder.use_log=true
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
| ----------- | ------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vcodec` | `str` | `"hevc"` | Defaults to HEVC Main 12 (a 12-bit-capable codec). For a lossless depth stream, keep HEVC and pass `extra_options={"x265-params": "lossless=1"}` (stays MP4-compatible). |
|
||||
| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. |
|
||||
| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. |
|
||||
| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. |
|
||||
| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. |
|
||||
| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. |
|
||||
| Parameter | Type | Default | Description |
|
||||
| --------------- | ------- | ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vcodec` | `str` | `"hevc"` | HEVC Main 12 (a 12-bit-capable codec, MP4-compatible). |
|
||||
| `extra_options` | `dict` | `{"x265-params": "lossless=1"}` | **Depth defaults to lossless** (exact round-trip); `crf` is ignored. Pass `extra_options={}` and set `crf` for a smaller lossy stream. |
|
||||
| `pix_fmt` | `str` | `"gray12le"` | Single-channel 12-bit pixel format used to carry the quantized codes. |
|
||||
| `depth_min` | `float` | `0.01` | Depth in metres mapped to quantum `0`. Values below are clipped on decode. |
|
||||
| `depth_max` | `float` | `10.0` | Depth in metres mapped to quantum `4095`. Values above are clipped on decode. |
|
||||
| `shift` | `float` | `3.5` | Pre-log offset (metres) used in logarithmic quantization for numerical stability near zero. Must satisfy `depth_min + shift > 0`. |
|
||||
| `use_log` | `bool` | `True` | If `true`, quantize in log-space (recommended for typical depth sensors). Set to `false` for uniform/linear quantization. |
|
||||
|
||||
> [!TIP]
|
||||
> `depth_min`, `depth_max`, and `shift` are always interpreted in **metres**, regardless of the input depth's unit. Inputs are auto-detected: integer arrays (e.g. `uint16` millimetres straight from a RealSense) are treated as millimetres, floating arrays as metres.
|
||||
|
||||
@@ -36,9 +36,7 @@ HW_VIDEO_CODECS = [
|
||||
"h264_vaapi", # Linux Intel/AMD
|
||||
"h264_qsv", # Intel Quick Sync
|
||||
]
|
||||
VALID_VIDEO_CODECS: frozenset[str] = frozenset(
|
||||
{"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS}
|
||||
)
|
||||
VALID_VIDEO_CODECS: frozenset[str] = frozenset({"h264", "hevc", "libsvtav1", "auto", *HW_VIDEO_CODECS})
|
||||
# Aliases for legacy video codec names.
|
||||
VIDEO_CODECS_ALIASES: dict[str, str] = {"av1": "libsvtav1"}
|
||||
|
||||
@@ -272,8 +270,9 @@ class DepthEncoderConfig(VideoEncoderConfig):
|
||||
``"gray12le"``.
|
||||
"""
|
||||
|
||||
vcodec: str = "hevc"
|
||||
pix_fmt: str = "gray12le"
|
||||
vcodec: str = "hevc" # Video codec name. Defaults to HEVC Main 12 (a 12-bit-capable codec).
|
||||
pix_fmt: str = "gray12le" # Pixel format. Defaults to 12-bit grayscale.
|
||||
extra_options: dict[str, Any] = field(default_factory=lambda: {"x265-params": "lossless=1"})
|
||||
|
||||
depth_min: float = DEFAULT_DEPTH_MIN # Minimum depth in meters, mapped to the lowest quantum.
|
||||
depth_max: float = DEFAULT_DEPTH_MAX # Maximum depth in meters, mapped to the highest quantum.
|
||||
|
||||
@@ -730,6 +730,7 @@ class TestEncodeDepthVideoFrames:
|
||||
pix_fmt="gray12le",
|
||||
g=4,
|
||||
crf=25,
|
||||
extra_options={},
|
||||
depth_min=0.05,
|
||||
depth_max=8.0,
|
||||
shift=2.5,
|
||||
@@ -777,6 +778,7 @@ class TestDepthEncoderConfigPersistence:
|
||||
pix_fmt="gray12le",
|
||||
g=2,
|
||||
crf=30,
|
||||
extra_options={},
|
||||
depth_min=0.05,
|
||||
depth_max=8.0,
|
||||
shift=2.5,
|
||||
|
||||
Reference in New Issue
Block a user