mirror of
https://github.com/huggingface/lerobot.git
synced 2026-07-01 07:07:08 +00:00
feat(depth stats): enforcing all depth stats to be in millimeters (default unit) for consistency
This commit is contained in:
@@ -22,6 +22,7 @@ import numpy as np
|
||||
from lerobot.processor import RelativeActionsProcessorStep
|
||||
from lerobot.utils.constants import ACTION, OBS_STATE
|
||||
|
||||
from .depth_utils import MM_PER_METRE
|
||||
from .io_utils import load_image_as_numpy
|
||||
|
||||
DEFAULT_QUANTILES = [0.01, 0.10, 0.50, 0.90, 0.99]
|
||||
@@ -508,8 +509,8 @@ def compute_episode_stats(
|
||||
Note:
|
||||
For 'image'/'video' features, stats are computed per channel and kept with a
|
||||
leading channel axis (e.g. shape (3, 1, 1) for RGB). RGB stats are divided by
|
||||
255 to land in [0, 1]; depth maps (features flagged with ``is_depth_map``) skip
|
||||
this rescaling and remain in their stored units.
|
||||
255 to land in [0, 1]; depth maps (features flagged with ``is_depth_map``) are
|
||||
instead canonicalized to millimetres regardless of the raw frame unit.
|
||||
"""
|
||||
if quantile_list is None:
|
||||
quantile_list = DEFAULT_QUANTILES
|
||||
@@ -533,9 +534,14 @@ def compute_episode_stats(
|
||||
)
|
||||
|
||||
if features[key]["dtype"] in ["image", "video"]:
|
||||
normalization_factor = (
|
||||
255.0 if not (features[key].get("info") or {}).get("is_depth_map", False) else 1.0
|
||||
)
|
||||
if (features[key].get("info") or {}).get("is_depth_map", False):
|
||||
# Depth stats are canonically stored in millimetres; metre (float) depth is
|
||||
# scaled up, integer (millimetre) depth is left as-is.
|
||||
normalization_factor = (
|
||||
1.0 / MM_PER_METRE if np.issubdtype(ep_ft_array.dtype, np.floating) else 1.0
|
||||
)
|
||||
else:
|
||||
normalization_factor = 255.0
|
||||
ep_stats[key] = {
|
||||
k: v if k == "count" else np.squeeze(v / normalization_factor, axis=0)
|
||||
for k, v in ep_stats[key].items()
|
||||
|
||||
@@ -39,7 +39,7 @@ from lerobot.configs.video import (
|
||||
from .image_writer import squeeze_single_channel
|
||||
from .pyav_utils import write_u16_plane
|
||||
|
||||
_MM_PER_METRE = 1000.0
|
||||
MM_PER_METRE = 1000.0
|
||||
_UINT16_MAX = 65535
|
||||
|
||||
|
||||
@@ -126,12 +126,12 @@ def quantize_depth(
|
||||
|
||||
# Convert depth_min, depth_max, and shift to the resolved input unit.
|
||||
depth_min_u = (
|
||||
np.float32(depth_min) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_min * _MM_PER_METRE)
|
||||
np.float32(depth_min) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_min * MM_PER_METRE)
|
||||
)
|
||||
depth_max_u = (
|
||||
np.float32(depth_max) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_max * _MM_PER_METRE)
|
||||
np.float32(depth_max) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_max * MM_PER_METRE)
|
||||
)
|
||||
shift_u = np.float32(shift) if resolved_unit == DEPTH_METER_UNIT else np.float32(shift * _MM_PER_METRE)
|
||||
shift_u = np.float32(shift) if resolved_unit == DEPTH_METER_UNIT else np.float32(shift * MM_PER_METRE)
|
||||
|
||||
# Normalization and quantization is performed in the resolved input unit.
|
||||
if use_log:
|
||||
@@ -236,7 +236,7 @@ def dequantize_depth(
|
||||
|
||||
# mm path: round + clamp in float32, skipping the uint16 round-trip
|
||||
# when returning a tensor (torch.uint16 is poorly supported).
|
||||
buf.mul_(_MM_PER_METRE).round_().clamp_(0.0, _UINT16_MAX)
|
||||
buf.mul_(MM_PER_METRE).round_().clamp_(0.0, _UINT16_MAX)
|
||||
if output_tensor:
|
||||
return buf
|
||||
return buf.cpu().numpy().astype(np.uint16, copy=False)
|
||||
@@ -259,7 +259,7 @@ def dequantize_depth(
|
||||
if output_unit == DEPTH_METER_UNIT:
|
||||
return torch.from_numpy(buf) if output_tensor else buf
|
||||
|
||||
np.multiply(buf, _MM_PER_METRE, out=buf)
|
||||
np.multiply(buf, MM_PER_METRE, out=buf)
|
||||
np.rint(buf, out=buf)
|
||||
np.clip(buf, 0.0, _UINT16_MAX, out=buf)
|
||||
if output_tensor:
|
||||
|
||||
@@ -47,7 +47,7 @@ from lerobot.configs import (
|
||||
)
|
||||
from lerobot.utils.import_utils import get_safe_default_video_backend
|
||||
|
||||
from .depth_utils import quantize_depth
|
||||
from .depth_utils import MM_PER_METRE, quantize_depth
|
||||
from .pyav_utils import get_pix_fmt_channels
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -848,6 +848,9 @@ class _CameraEncoderThread(threading.Thread):
|
||||
# Reshape CHW to (H*W, C) for per-channel stats
|
||||
channels = img_downsampled.shape[0]
|
||||
img_for_stats = img_downsampled.transpose(1, 2, 0).reshape(-1, channels)
|
||||
# Depth stats are canonically stored in millimetres; metre (float) depth is scaled up.
|
||||
if self.is_depth and np.issubdtype(frame_data.dtype, np.floating):
|
||||
img_for_stats = img_for_stats * MM_PER_METRE
|
||||
stats_tracker.update(img_for_stats)
|
||||
|
||||
frame_count += 1
|
||||
|
||||
Reference in New Issue
Block a user