mirror of
https://github.com/huggingface/lerobot.git
synced 2026-07-03 08:07:03 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 096fdd3ea5 | |||
| 6af4eb6da4 | |||
| 052d329470 |
@@ -126,7 +126,7 @@ import time
|
||||
from lerobot.teleoperators.so_leader import SO101Leader, SO101LeaderConfig
|
||||
from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig
|
||||
from lerobot.cameras.opencv import OpenCVCameraConfig
|
||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data, shutdown_rerun
|
||||
from lerobot.utils.visualization_utils import init_visualization, log_visualization_data, shutdown_visualization
|
||||
|
||||
robot_config = SO101FollowerConfig(
|
||||
port="/dev/tty.usbmodem5AB90687491",
|
||||
@@ -142,7 +142,7 @@ teleop_config = SO101LeaderConfig(
|
||||
id="my_leader_arm",
|
||||
)
|
||||
|
||||
init_rerun(session_name="teleoperation")
|
||||
init_visualization("rerun", session_name="teleoperation") # pass "foxglove" to stream to Foxglove instead
|
||||
|
||||
robot = SO101Follower(robot_config)
|
||||
teleop_device = SO101Leader(teleop_config)
|
||||
@@ -158,7 +158,7 @@ while True:
|
||||
observation = robot.get_observation()
|
||||
action = teleop_device.get_action()
|
||||
robot.send_action(action)
|
||||
log_rerun_data(observation=observation, action=action)
|
||||
log_visualization_data("rerun", observation=observation, action=action)
|
||||
|
||||
elapsed_time = time.perf_counter() - start_time
|
||||
sleep_time = TIME_PER_FRAME - elapsed_time
|
||||
@@ -223,7 +223,7 @@ from lerobot.teleoperators.so_leader.config_so_leader import SO101LeaderConfig
|
||||
from lerobot.teleoperators.so_leader.so_leader import SO101Leader
|
||||
from lerobot.common.control_utils import init_keyboard_listener
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
from lerobot.utils.visualization_utils import init_visualization
|
||||
from lerobot.scripts.lerobot_record import record_loop
|
||||
from lerobot.processor import make_default_processors
|
||||
|
||||
@@ -270,7 +270,7 @@ def main():
|
||||
|
||||
# Initialize the keyboard listener and rerun visualization
|
||||
_, events = init_keyboard_listener()
|
||||
init_rerun(session_name="recording")
|
||||
init_visualization("rerun", session_name="recording")
|
||||
|
||||
# Connect the robot and teleoperator
|
||||
robot.connect()
|
||||
|
||||
@@ -265,6 +265,8 @@ lerobot-dataset-viz \
|
||||
|
||||
Once executed, the tool opens `rerun.io` and displays the camera streams, robot states, and actions for the selected episode.
|
||||
|
||||
To use [Foxglove](https://foxglove.dev) instead of Rerun, install the extra add `--display-mode foxglove`. This starts a WebSocket server (connect the Foxglove app to `ws://127.0.0.1:8765`) that serves the episode as a seekable timeline you can play/pause and scrub.
|
||||
|
||||
For advanced usage—including visualizing datasets stored on a remote server—run:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -6,12 +6,11 @@ Encoding frames into an MP4 is a full FFmpeg pipeline: choice of encoder, pixel
|
||||
|
||||
You can set these parameters from the CLI with `--dataset.rgb_encoder.<field>` (e.g. with `lerobot-record` or `lerobot-rollout`). The same block applies to every camera video stream in that run.
|
||||
|
||||
<Tip>
|
||||
Video storage must be on for `rgb_encoder` to have any effect —
|
||||
`use_videos=True` in Python APIs, or `--dataset.video=true` on the CLI (the
|
||||
recording default). With video off, inputs stay as images and `rgb_encoder` is
|
||||
ignored.
|
||||
</Tip>
|
||||
> [!TIP]
|
||||
> Video storage must be on for `rgb_encoder` to have any effect —
|
||||
> `use_videos=True` in Python APIs, or `--dataset.video=true` on the CLI (the
|
||||
> recording default). With video off, inputs stay as images and `rgb_encoder` is
|
||||
> ignored.
|
||||
|
||||
For details on **when** frames are written vs. encoded (streaming vs. post-episode), queues, and other top-level `--dataset.*` switches, see [Streaming Video Encoding](./streaming_video_encoding). For an encoding-parameter comparison and experiments, see the [video-benchmark Space](https://huggingface.co/spaces/lerobot/video-benchmark).
|
||||
|
||||
@@ -43,12 +42,10 @@ lerobot-record \
|
||||
|
||||
## Tuning parameters
|
||||
|
||||
<Tip warning={true}>
|
||||
The defaults are tuned to balance **compression ratio**, **visual quality**, and **decoding/seek speed** for typical robotics datasets. Changing them can affect both recording (CPU load, frame drops) and training (decoding throughput, image quality).
|
||||
|
||||
Only override these parameters if you have a specific reason to, and measure the impact on your pipeline before relying on the new settings.
|
||||
|
||||
</Tip>
|
||||
> [!WARNING]
|
||||
> The defaults are tuned to balance **compression ratio**, **visual quality**, and **decoding/seek speed** for typical robotics datasets. Changing them can affect both recording (CPU load, frame drops) and training (decoding throughput, image quality).
|
||||
>
|
||||
> Only override these parameters if you have a specific reason to, and measure the impact on your pipeline before relying on the new settings.
|
||||
|
||||
All flags below are prefixed with `--dataset.rgb_encoder.` on the CLI.
|
||||
|
||||
@@ -69,25 +66,92 @@ All flags below are prefixed with `--dataset.rgb_encoder.` on the CLI.
|
||||
|
||||
Depth maps (Intel RealSense, Reachy 2) are stored as their **own video streams** alongside the RGB streams. Raw depth (`uint16` millimetres or `float32` metres) can't survive an 8-bit codec, so LeRobot **quantizes** each map to a 12-bit code (`[0, 4095]`) — logarithmically by default, to match the `1/depth` error profile of depth sensors — then packs it into a high-bit-depth pixel format (`gray12le`) and encodes it with a 12-bit codec.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Raw depth (uint16 mm / float32 m)"] --> B["Clip to depth_min, depth_max"]
|
||||
B --> C["Quantize to 12-bit code 0–4095 (log or linear)"]
|
||||
C --> D["Pack into gray12le"]
|
||||
D --> E["Encode video (hevc Main 12)"]
|
||||
E --> F[("MP4 + metadata: depth_min/max, shift, use_log")]
|
||||
F -. "load time (depth_output_unit)" .-> G["Dequantize to mm or m"]
|
||||
|
||||
classDef input fill:#e3f2fd,stroke:#1565c0,color:#0d47a1;
|
||||
classDef encode fill:#ede7f6,stroke:#5e35b1,color:#311b92;
|
||||
classDef store fill:#fff8e1,stroke:#f9a825,color:#e65100;
|
||||
classDef load fill:#e8f5e9,stroke:#2e7d32,color:#1b5e20;
|
||||
|
||||
class A input;
|
||||
class B,C,D,E encode;
|
||||
class F store;
|
||||
class G load;
|
||||
```
|
||||
<div style="margin:28px 0;padding:14px 0;">
|
||||
<div style="margin:0 auto;display:flex;flex-wrap:wrap;justify-content:center;align-items:stretch;gap:6px;font-family:'Source Sans 3',ui-sans-serif,system-ui,sans-serif;font-size:14px;font-weight:600;color:#1B1B1D;">
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#DBEAFE;color:#1D4ED8;border-radius:9px;padding:8px 12px;">
|
||||
<span>Raw depth</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#3B6FD4;white-space:nowrap;">
|
||||
uint16 mm
|
||||
<br />
|
||||
float32 m
|
||||
</span>
|
||||
</span>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#C3CBD9;">
|
||||
→
|
||||
</span>
|
||||
<div style="border:2px dashed #C4B5FD;border-radius:13px;padding:18px 12px 12px;position:relative;display:flex;align-items:stretch;gap:6px;">
|
||||
<span style="position:absolute;top:-10px;left:12px;background:#fff;padding:0 6px;font-size:11px;font-weight:700;color:#7E22CE;text-transform:uppercase;letter-spacing:0.5px;white-space:nowrap;">
|
||||
Record time
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#F3E8FF;color:#7E22CE;border-radius:9px;padding:8px 12px;">
|
||||
<span>Clip</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#9061C2;white-space:nowrap;">
|
||||
to [depth_min,
|
||||
<br />
|
||||
depth_max]
|
||||
</span>
|
||||
</span>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#C3CBD9;">
|
||||
→
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#F3E8FF;color:#7E22CE;border-radius:9px;padding:8px 12px;">
|
||||
<span>Quantize</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#9061C2;white-space:nowrap;">
|
||||
12-bit codes 0–4095
|
||||
<br />
|
||||
log (default) or linear
|
||||
</span>
|
||||
</span>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#C3CBD9;">
|
||||
→
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#F3E8FF;color:#7E22CE;border-radius:9px;padding:8px 12px;">
|
||||
<span>Pack</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#9061C2;white-space:nowrap;">
|
||||
into gray12le
|
||||
<br />
|
||||
plane
|
||||
</span>
|
||||
</span>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#C3CBD9;">
|
||||
→
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#F3E8FF;color:#7E22CE;border-radius:9px;padding:8px 12px;">
|
||||
<span>Encode</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#9061C2;white-space:nowrap;">
|
||||
HEVC
|
||||
<br />
|
||||
Main 12
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#C3CBD9;">
|
||||
→
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#FEF3C7;color:#B45309;border-radius:9px;padding:8px 12px;">
|
||||
<span>MP4</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#C77D18;white-space:nowrap;">
|
||||
stored
|
||||
<br />
|
||||
stream
|
||||
</span>
|
||||
</span>
|
||||
<span style="display:flex;align-items:center;font-size:16px;color:#34A06B;">
|
||||
→
|
||||
</span>
|
||||
<div style="border:2px dashed #6EE7B7;border-radius:13px;padding:18px 12px 12px;position:relative;display:flex;align-items:center;gap:6px;">
|
||||
<span style="position:absolute;top:-10px;left:12px;background:#fff;padding:0 6px;font-size:11px;font-weight:700;color:#047857;text-transform:uppercase;letter-spacing:0.5px;white-space:nowrap;">
|
||||
Load time
|
||||
</span>
|
||||
<span style="display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center;gap:2px;background:#D1FAE5;color:#047857;border-radius:9px;padding:8px 12px;">
|
||||
<span>Dequantize</span>
|
||||
<span style="font-size:11px;font-weight:400;color:#059669;white-space:nowrap;">
|
||||
to mm / m
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Configure the depth pipeline through a parallel **`depth_encoder`** block (`DepthEncoderConfig`). It shares every `RGBEncoderConfig` field (`vcodec`, `pix_fmt`, `crf`, …) and adds four quantizer knobs, set via `--dataset.depth_encoder.<field>`:
|
||||
|
||||
@@ -134,9 +198,6 @@ lerobot-train \
|
||||
> [!TIP]
|
||||
> This is purely a decode-time presentation choice — it does **not** alter the stored video or its metadata, so the same dataset can be read as `mm` or `m` without re-encoding. It has no effect on datasets without depth cameras.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Depth statistics in `meta/stats.json` are always computed in **millimetres**, regardless of the raw frame dtype.
|
||||
|
||||
---
|
||||
|
||||
## Persistence in dataset metadata
|
||||
@@ -171,15 +232,81 @@ After the first episode of a video stream is encoded, the encoder configuration
|
||||
|
||||
Two sources contribute to the `info` block:
|
||||
|
||||
- **Stream-derived** (read back from the encoded MP4 with PyAV): `video.height`, `video.width`, `video.codec`, `video.pix_fmt`, `video.fps`, `video.channels`, `is_depth_map`, plus `audio.*` if an audio stream is present.
|
||||
- **Encoder-derived** (taken from `RGBEncoderConfig` or `DepthEncoderConfig`): `video.g`, `video.crf`, `video.preset`, `video.fast_decode`, `video.video_backend`, `video.extra_options`.
|
||||
<div style="display:flex;flex-wrap:wrap;gap:14px;margin:20px 0;font-family:'Source Sans 3',ui-sans-serif,system-ui,sans-serif;">
|
||||
<div style="flex:1 1 280px;border:1px solid #BFDBFE;border-radius:12px;overflow:hidden;">
|
||||
<div style="background:#DBEAFE;color:#1D4ED8;font-weight:700;font-size:14px;padding:8px 14px;">
|
||||
Stream-derived
|
||||
</div>
|
||||
<div style="padding:12px 14px;">
|
||||
<div style="font-size:13px;color:#4B5563;margin-bottom:10px;">
|
||||
Read back from the encoded MP4 with PyAV.
|
||||
</div>
|
||||
<div style="display:flex;flex-wrap:wrap;gap:6px;">
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.height
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.width
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.codec
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.pix_fmt
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.fps
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.channels
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
is_depth_map
|
||||
</code>
|
||||
<code style="background:#EFF6FF;color:#1D4ED8;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
audio.*
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="flex:1 1 280px;border:1px solid #DDD6FE;border-radius:12px;overflow:hidden;">
|
||||
<div style="background:#F3E8FF;color:#7E22CE;font-weight:700;font-size:14px;padding:8px 14px;">
|
||||
Encoder-derived
|
||||
</div>
|
||||
<div style="padding:12px 14px;">
|
||||
<div style="font-size:13px;color:#4B5563;margin-bottom:10px;">
|
||||
Taken from <code style="font-size:12px;">RGBEncoderConfig</code> /{" "}
|
||||
<code style="font-size:12px;">DepthEncoderConfig</code>.
|
||||
</div>
|
||||
<div style="display:flex;flex-wrap:wrap;gap:6px;">
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.g
|
||||
</code>
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.crf
|
||||
</code>
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.preset
|
||||
</code>
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.fast_decode
|
||||
</code>
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.video_backend
|
||||
</code>
|
||||
<code style="background:#FAF5FF;color:#7E22CE;border-radius:6px;padding:2px 8px;font-size:12px;">
|
||||
video.extra_options
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Tip>
|
||||
This block is populated **once**, from the **first** episode. It assumes every
|
||||
episode in the dataset was encoded with the same `rgb_encoder`. Changing
|
||||
encoder settings partway through a recording is not supported — the
|
||||
`info.json` will only reflect the parameters used for the first episode.
|
||||
</Tip>
|
||||
> [!IMPORTANT]
|
||||
> This block is populated **once**, from the **first** episode. It assumes every
|
||||
> episode in the dataset was encoded with the same `rgb_encoder`. Changing
|
||||
> encoder settings partway through a recording is not supported — the
|
||||
> `info.json` will only reflect the parameters used for the first episode.
|
||||
|
||||
---
|
||||
|
||||
@@ -187,5 +314,35 @@ Two sources contribute to the `info` block:
|
||||
|
||||
When aggregating datasets with `merge_datasets`, video files are concatenated as-is (no re-encoding), and encoder fields in `info.json` are merged per-key:
|
||||
|
||||
- **Stream-derived fields must match** across sources: `video.codec`, `video.pix_fmt`, `video.height`, `video.width`, `video.fps`. Otherwise FFmpeg's concat demuxer fails.
|
||||
- **Encoder-tuning fields are merged loosely**: `video.g`, `video.crf`, `video.preset`, `video.fast_decode`, `video.extra_options`. If every source agrees, the value is kept; if not, it's set to `null` (or `{}` for `video.extra_options`) and a warning is logged.
|
||||
<div style="display:flex;flex-direction:column;gap:12px;margin:20px 0;font-family:'Source Sans 3',ui-sans-serif,system-ui,sans-serif;">
|
||||
<div style="display:flex;gap:12px;align-items:flex-start;border-left:3px solid #F87171;background:#FEF2F2;border-radius:0 10px 10px 0;padding:12px 14px;">
|
||||
<span style="flex:none;background:#FEE2E2;color:#B91C1C;font-weight:700;font-size:11px;text-transform:uppercase;letter-spacing:0.4px;border-radius:6px;padding:3px 8px;margin-top:1px;white-space:nowrap;">
|
||||
Must match
|
||||
</span>
|
||||
<span style="font-size:14px;color:#1B1B1D;">
|
||||
Stream-derived fields — <code style="font-size:12px;">video.codec</code>,{" "}
|
||||
<code style="font-size:12px;">video.pix_fmt</code>,{" "}
|
||||
<code style="font-size:12px;">video.height</code>,{" "}
|
||||
<code style="font-size:12px;">video.width</code>,{" "}
|
||||
<code style="font-size:12px;">video.fps</code> — must match across
|
||||
sources, otherwise FFmpeg's concat demuxer fails.
|
||||
</span>
|
||||
</div>
|
||||
<div style="display:flex;gap:12px;align-items:flex-start;border-left:3px solid #34D399;background:#ECFDF5;border-radius:0 10px 10px 0;padding:12px 14px;">
|
||||
<span style="flex:none;background:#D1FAE5;color:#047857;font-weight:700;font-size:11px;text-transform:uppercase;letter-spacing:0.4px;border-radius:6px;padding:3px 8px;margin-top:1px;white-space:nowrap;">
|
||||
Merged loosely
|
||||
</span>
|
||||
<span style="font-size:14px;color:#1B1B1D;">
|
||||
Encoder-tuning fields — <code style="font-size:12px;">video.g</code>,{" "}
|
||||
<code style="font-size:12px;">video.crf</code>,{" "}
|
||||
<code style="font-size:12px;">video.preset</code>,{" "}
|
||||
<code style="font-size:12px;">video.fast_decode</code>,{" "}
|
||||
<code style="font-size:12px;">video.extra_options</code>. If every source
|
||||
agrees, the value is kept; if not, it's set to{" "}
|
||||
<code style="font-size:12px;">null</code> (or{" "}
|
||||
<code style="font-size:12px;">{}</code> for{" "}
|
||||
<code style="font-size:12px;">video.extra_options</code>) and a warning is
|
||||
logged.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -125,6 +125,7 @@ hardware = [
|
||||
]
|
||||
viz = [
|
||||
"rerun-sdk>=0.24.0,<0.34.0",
|
||||
"foxglove-sdk>=0.25.1,<0.26.0",
|
||||
]
|
||||
# ── User-facing composite extras (map to CLI scripts) ─────
|
||||
# lerobot-record, lerobot-replay, lerobot-calibrate, lerobot-teleoperate, etc.
|
||||
|
||||
@@ -22,7 +22,6 @@ import numpy as np
|
||||
from lerobot.processor import RelativeActionsProcessorStep
|
||||
from lerobot.utils.constants import ACTION, OBS_STATE
|
||||
|
||||
from .depth_utils import MM_PER_METRE
|
||||
from .io_utils import load_image_as_numpy
|
||||
|
||||
DEFAULT_QUANTILES = [0.01, 0.10, 0.50, 0.90, 0.99]
|
||||
@@ -509,8 +508,8 @@ def compute_episode_stats(
|
||||
Note:
|
||||
For 'image'/'video' features, stats are computed per channel and kept with a
|
||||
leading channel axis (e.g. shape (3, 1, 1) for RGB). RGB stats are divided by
|
||||
255 to land in [0, 1]; depth maps (features flagged with ``is_depth_map``) are
|
||||
instead canonicalized to millimetres regardless of the raw frame unit.
|
||||
255 to land in [0, 1]; depth maps (features flagged with ``is_depth_map``) skip
|
||||
this rescaling and remain in their stored units.
|
||||
"""
|
||||
if quantile_list is None:
|
||||
quantile_list = DEFAULT_QUANTILES
|
||||
@@ -534,14 +533,9 @@ def compute_episode_stats(
|
||||
)
|
||||
|
||||
if features[key]["dtype"] in ["image", "video"]:
|
||||
if (features[key].get("info") or {}).get("is_depth_map", False):
|
||||
# Depth stats are canonically stored in millimetres; metre (float) depth is
|
||||
# scaled up, integer (millimetre) depth is left as-is.
|
||||
normalization_factor = (
|
||||
1.0 / MM_PER_METRE if np.issubdtype(ep_ft_array.dtype, np.floating) else 1.0
|
||||
)
|
||||
else:
|
||||
normalization_factor = 255.0
|
||||
normalization_factor = (
|
||||
255.0 if not (features[key].get("info") or {}).get("is_depth_map", False) else 1.0
|
||||
)
|
||||
ep_stats[key] = {
|
||||
k: v if k == "count" else np.squeeze(v / normalization_factor, axis=0)
|
||||
for k, v in ep_stats[key].items()
|
||||
|
||||
@@ -39,7 +39,7 @@ from lerobot.configs.video import (
|
||||
from .image_writer import squeeze_single_channel
|
||||
from .pyav_utils import write_u16_plane
|
||||
|
||||
MM_PER_METRE = 1000.0
|
||||
_MM_PER_METRE = 1000.0
|
||||
_UINT16_MAX = 65535
|
||||
|
||||
|
||||
@@ -126,12 +126,12 @@ def quantize_depth(
|
||||
|
||||
# Convert depth_min, depth_max, and shift to the resolved input unit.
|
||||
depth_min_u = (
|
||||
np.float32(depth_min) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_min * MM_PER_METRE)
|
||||
np.float32(depth_min) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_min * _MM_PER_METRE)
|
||||
)
|
||||
depth_max_u = (
|
||||
np.float32(depth_max) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_max * MM_PER_METRE)
|
||||
np.float32(depth_max) if resolved_unit == DEPTH_METER_UNIT else np.float32(depth_max * _MM_PER_METRE)
|
||||
)
|
||||
shift_u = np.float32(shift) if resolved_unit == DEPTH_METER_UNIT else np.float32(shift * MM_PER_METRE)
|
||||
shift_u = np.float32(shift) if resolved_unit == DEPTH_METER_UNIT else np.float32(shift * _MM_PER_METRE)
|
||||
|
||||
# Normalization and quantization is performed in the resolved input unit.
|
||||
if use_log:
|
||||
@@ -236,7 +236,7 @@ def dequantize_depth(
|
||||
|
||||
# mm path: round + clamp in float32, skipping the uint16 round-trip
|
||||
# when returning a tensor (torch.uint16 is poorly supported).
|
||||
buf.mul_(MM_PER_METRE).round_().clamp_(0.0, _UINT16_MAX)
|
||||
buf.mul_(_MM_PER_METRE).round_().clamp_(0.0, _UINT16_MAX)
|
||||
if output_tensor:
|
||||
return buf
|
||||
return buf.cpu().numpy().astype(np.uint16, copy=False)
|
||||
@@ -259,7 +259,7 @@ def dequantize_depth(
|
||||
if output_unit == DEPTH_METER_UNIT:
|
||||
return torch.from_numpy(buf) if output_tensor else buf
|
||||
|
||||
np.multiply(buf, MM_PER_METRE, out=buf)
|
||||
np.multiply(buf, _MM_PER_METRE, out=buf)
|
||||
np.rint(buf, out=buf)
|
||||
np.clip(buf, 0.0, _UINT16_MAX, out=buf)
|
||||
if output_tensor:
|
||||
|
||||
@@ -47,7 +47,7 @@ from lerobot.configs import (
|
||||
)
|
||||
from lerobot.utils.import_utils import get_safe_default_video_backend
|
||||
|
||||
from .depth_utils import MM_PER_METRE, quantize_depth
|
||||
from .depth_utils import quantize_depth
|
||||
from .pyav_utils import get_pix_fmt_channels
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -848,9 +848,6 @@ class _CameraEncoderThread(threading.Thread):
|
||||
# Reshape CHW to (H*W, C) for per-channel stats
|
||||
channels = img_downsampled.shape[0]
|
||||
img_for_stats = img_downsampled.transpose(1, 2, 0).reshape(-1, channels)
|
||||
# Depth stats are canonically stored in millimetres; metre (float) depth is scaled up.
|
||||
if self.is_depth and np.issubdtype(frame_data.dtype, np.floating):
|
||||
img_for_stats = img_for_stats * MM_PER_METRE
|
||||
stats_tracker.update(img_for_stats)
|
||||
|
||||
frame_count += 1
|
||||
|
||||
@@ -226,11 +226,14 @@ class RolloutConfig:
|
||||
device: str | None = None
|
||||
task: str = ""
|
||||
display_data: bool = False
|
||||
# Display data on a remote Rerun server
|
||||
# Visualization backend used when display_data is True: "rerun" or "foxglove".
|
||||
display_mode: str = "rerun"
|
||||
# For "rerun": IP of a remote server to send to. For "foxglove": interface to bind the WebSocket
|
||||
# server to (127.0.0.1 for local only, 0.0.0.0 for all interfaces).
|
||||
display_ip: str | None = None
|
||||
# Port of the remote Rerun server
|
||||
# For "rerun": port of the remote server. For "foxglove": port to bind the WebSocket server to.
|
||||
display_port: int | None = None
|
||||
# Whether to display compressed images in Rerun
|
||||
# Whether to display compressed (JPEG) images instead of raw frames
|
||||
display_compressed_images: bool = False
|
||||
# Use vocal synthesis to read events
|
||||
play_sounds: bool = True
|
||||
|
||||
@@ -26,7 +26,7 @@ from lerobot.utils.action_interpolator import ActionInterpolator
|
||||
from lerobot.utils.constants import OBS_STR
|
||||
from lerobot.utils.feature_utils import build_dataset_frame
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.visualization_utils import log_rerun_data
|
||||
from lerobot.utils.visualization_utils import log_visualization_data
|
||||
|
||||
from ..inference import InferenceEngine
|
||||
|
||||
@@ -162,11 +162,12 @@ class RolloutStrategy(abc.ABC):
|
||||
action_dict: dict | None,
|
||||
runtime_ctx: RuntimeContext,
|
||||
) -> None:
|
||||
"""Log observation/action telemetry to Rerun if display_data is enabled."""
|
||||
"""Log observation/action telemetry to the visualization backend if display_data is enabled."""
|
||||
cfg = runtime_ctx.cfg
|
||||
if not cfg.display_data:
|
||||
return
|
||||
log_rerun_data(
|
||||
log_visualization_data(
|
||||
cfg.display_mode,
|
||||
observation=obs_processed,
|
||||
action=action_dict,
|
||||
compress_images=cfg.display_compressed_images,
|
||||
|
||||
@@ -44,7 +44,7 @@ from lerobot.utils.feature_utils import build_dataset_frame
|
||||
from lerobot.utils.keyboard_input import init_keyboard_listener
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.utils import log_say
|
||||
from lerobot.utils.visualization_utils import log_rerun_data
|
||||
from lerobot.utils.visualization_utils import log_visualization_data
|
||||
|
||||
from ..configs import EpisodicStrategyConfig
|
||||
from ..context import RolloutContext
|
||||
@@ -171,6 +171,7 @@ class EpisodicStrategy(RolloutStrategy):
|
||||
fps=fps,
|
||||
control_time_s=reset_time_s,
|
||||
display_data=cfg.display_data,
|
||||
display_mode=cfg.display_mode,
|
||||
display_compressed=display_compressed,
|
||||
)
|
||||
|
||||
@@ -259,6 +260,7 @@ class EpisodicStrategy(RolloutStrategy):
|
||||
fps: float,
|
||||
control_time_s: float,
|
||||
display_data: bool,
|
||||
display_mode: str,
|
||||
display_compressed: bool,
|
||||
) -> None:
|
||||
"""Reset-phase loop: teleop drives the robot if available, no recording."""
|
||||
@@ -288,7 +290,8 @@ class EpisodicStrategy(RolloutStrategy):
|
||||
|
||||
if display_data:
|
||||
obs_processed = processors.robot_observation_processor(obs)
|
||||
log_rerun_data(
|
||||
log_visualization_data(
|
||||
display_mode,
|
||||
observation=obs_processed,
|
||||
action=act_teleop,
|
||||
compress_images=display_compressed,
|
||||
|
||||
@@ -59,6 +59,18 @@ distant$ lerobot-dataset-viz \
|
||||
local$ rerun rerun+http://IP:GRPC_PORT/proxy
|
||||
```
|
||||
|
||||
- Visualize data in Foxglove with a seekable, scrubbable timeline:
|
||||
```
|
||||
local$ lerobot-dataset-viz \
|
||||
--repo-id lerobot/pusht \
|
||||
--episode-index 0 \
|
||||
--display-mode foxglove
|
||||
|
||||
# then open the Foxglove app and connect to ws://127.0.0.1:8765
|
||||
```
|
||||
This starts a Foxglove WebSocket server that serves the episode on demand from the on-disk dataset,
|
||||
so you can play/pause and scrub anywhere in the episode using Foxglove's playback controls.
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -73,9 +85,12 @@ import torch.utils.data
|
||||
import tqdm
|
||||
|
||||
from lerobot.datasets import LeRobotDataset
|
||||
from lerobot.utils.constants import ACTION, DONE, OBS_STATE, REWARD
|
||||
from lerobot.utils.constants import ACTION, DONE, OBS_STATE, REWARD, SUCCESS
|
||||
from lerobot.utils.utils import init_logging
|
||||
|
||||
DEFAULT_FOXGLOVE_PORT = 8765
|
||||
DEFAULT_RERUN_PORT = 9090
|
||||
|
||||
|
||||
def get_feature_names(dataset: LeRobotDataset, key: str) -> list[str]:
|
||||
"""Return per-dimension names for a feature from the dataset metadata.
|
||||
@@ -108,6 +123,12 @@ def to_hwc_uint8_numpy(chw_float32_torch: torch.Tensor) -> np.ndarray:
|
||||
return hwc_uint8_numpy
|
||||
|
||||
|
||||
def to_hwc_float32_numpy(chw_float32_torch: torch.Tensor) -> np.ndarray:
|
||||
check_chw_float32(chw_float32_torch)
|
||||
hwc_float32_numpy = chw_float32_torch.permute(1, 2, 0).numpy()
|
||||
return hwc_float32_numpy
|
||||
|
||||
|
||||
def build_blueprint_from_dataset(dataset: LeRobotDataset):
|
||||
"""Build a Rerun blueprint laying out camera images and time series for the given dataset.
|
||||
|
||||
@@ -126,32 +147,43 @@ def build_blueprint_from_dataset(dataset: LeRobotDataset):
|
||||
names = get_feature_names(dataset, key)
|
||||
styling = rr.SeriesLines(names=names)
|
||||
views.append(rrb.TimeSeriesView(origin=origin, name=origin, overrides={origin: styling}))
|
||||
for key in (DONE, REWARD, "next.success"):
|
||||
for key in (DONE, REWARD, SUCCESS):
|
||||
if key in dataset.features:
|
||||
views.append(rrb.TimeSeriesView(origin=key, name=key))
|
||||
|
||||
return rrb.Blueprint(rrb.Grid(*views))
|
||||
|
||||
|
||||
def to_hwc_uint16_numpy(chw_float32_torch: torch.Tensor) -> np.ndarray:
|
||||
check_chw_float32(chw_float32_torch)
|
||||
hwc_uint16_numpy = chw_float32_torch.round().type(torch.uint16).permute(1, 2, 0).numpy()
|
||||
return hwc_uint16_numpy
|
||||
|
||||
|
||||
def visualize_dataset(
|
||||
dataset: LeRobotDataset,
|
||||
episode_index: int,
|
||||
batch_size: int = 32,
|
||||
num_workers: int = 0,
|
||||
mode: str = "local",
|
||||
web_port: int = 9090,
|
||||
web_port: int | None = None,
|
||||
grpc_port: int = 9876,
|
||||
save: bool = False,
|
||||
output_dir: Path | None = None,
|
||||
display_compressed_images: bool = False,
|
||||
display_mode: str = "rerun",
|
||||
host: str = "127.0.0.1",
|
||||
autoplay: bool = True,
|
||||
**kwargs,
|
||||
) -> Path | None:
|
||||
if display_mode == "foxglove":
|
||||
from lerobot.utils.foxglove_visualization import serve_foxglove_dataset_playback
|
||||
|
||||
logging.info("Starting Foxglove server")
|
||||
serve_foxglove_dataset_playback(
|
||||
dataset,
|
||||
episode_index,
|
||||
host=host,
|
||||
port=web_port if web_port is not None else DEFAULT_FOXGLOVE_PORT,
|
||||
compress_images=display_compressed_images,
|
||||
autoplay=autoplay,
|
||||
)
|
||||
return None
|
||||
|
||||
if save:
|
||||
assert output_dir is not None, (
|
||||
"Set an output directory where to write .rrd files with `--output-dir path/to/directory`."
|
||||
@@ -188,14 +220,20 @@ def visualize_dataset(
|
||||
if mode == "distant":
|
||||
server_uri = rr.serve_grpc(grpc_port=grpc_port)
|
||||
logging.info(f"Connect to a Rerun Server: rerun rerun+http://IP:{grpc_port}/proxy")
|
||||
rr.serve_web_viewer(open_browser=False, web_port=web_port, connect_to=server_uri)
|
||||
rr.serve_web_viewer(
|
||||
open_browser=False,
|
||||
web_port=web_port if web_port is not None else DEFAULT_RERUN_PORT,
|
||||
connect_to=server_uri,
|
||||
)
|
||||
|
||||
logging.info("Logging to Rerun")
|
||||
|
||||
# Use the dataset's q01/q99 depth statistics for robust depth range bounds
|
||||
depth_ranges = {}
|
||||
for key in dataset.meta.depth_keys:
|
||||
stats = dataset.meta.stats[key]
|
||||
stats = (dataset.meta.stats or {}).get(key)
|
||||
if not stats:
|
||||
continue
|
||||
lo = stats["q01"] if "q01" in stats else stats["min"]
|
||||
hi = stats["q99"] if "q99" in stats else stats["max"]
|
||||
depth_ranges[key] = (float(np.asarray(lo).item()), float(np.asarray(hi).item()))
|
||||
@@ -213,11 +251,11 @@ def visualize_dataset(
|
||||
# display each camera image (or depth map)
|
||||
for key in dataset.meta.camera_keys:
|
||||
if key in dataset.meta.depth_keys:
|
||||
depth = to_hwc_uint16_numpy(batch[key][i])
|
||||
depth = to_hwc_float32_numpy(batch[key][i])
|
||||
depth_entity = rr.DepthImage(
|
||||
depth,
|
||||
colormap=rr.components.Colormap.Viridis,
|
||||
depth_range=depth_ranges[key],
|
||||
depth_range=depth_ranges.get(key),
|
||||
)
|
||||
rr.log(key, entity=depth_entity)
|
||||
else:
|
||||
@@ -239,8 +277,8 @@ def visualize_dataset(
|
||||
if REWARD in batch:
|
||||
rr.log(REWARD, rr.Scalars(batch[REWARD][i].item()))
|
||||
|
||||
if "next.success" in batch:
|
||||
rr.log("next.success", rr.Scalars(batch["next.success"][i].item()))
|
||||
if SUCCESS in batch:
|
||||
rr.log(SUCCESS, rr.Scalars(batch[SUCCESS][i].item()))
|
||||
|
||||
# save .rrd locally
|
||||
if mode == "local" and save:
|
||||
@@ -312,13 +350,11 @@ def main():
|
||||
parser.add_argument(
|
||||
"--web-port",
|
||||
type=int,
|
||||
default=9090,
|
||||
help="Web port for rerun.io when `--mode distant` is set.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ws-port",
|
||||
type=int,
|
||||
help="deprecated, please use --grpc-port instead.",
|
||||
default=None,
|
||||
help=(
|
||||
"Web/WebSocket port. For rerun `--mode distant` it is the web viewer port (default 9090); "
|
||||
"for `--display-mode foxglove` it is the server bind port (default 8765)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--grpc-port",
|
||||
@@ -351,24 +387,56 @@ def main():
|
||||
parser.add_argument(
|
||||
"--display-compressed-images",
|
||||
action="store_true",
|
||||
help="If set, display compressed images in Rerun instead of uncompressed ones.",
|
||||
help="If set, display compressed (JPEG) images instead of uncompressed ones.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--display-mode",
|
||||
type=str,
|
||||
default="rerun",
|
||||
choices=["rerun", "foxglove"],
|
||||
help=(
|
||||
"Visualization backend. 'rerun' uses the Rerun viewer (--mode/--save/--*-port apply). "
|
||||
"'foxglove' starts a Foxglove WebSocket server that serves the episode as a seekable, "
|
||||
"scrubbable timeline; connect the Foxglove app to ws://HOST:PORT (--host/--web-port)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
type=str,
|
||||
default="127.0.0.1",
|
||||
help=(
|
||||
"Host to bind the Foxglove WebSocket server to when `--display-mode foxglove` is set "
|
||||
"(127.0.0.1 for local only, 0.0.0.0 for all interfaces)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-autoplay",
|
||||
dest="autoplay",
|
||||
action="store_false",
|
||||
help=(
|
||||
"For `--display-mode foxglove`: don't start playing automatically when a client "
|
||||
"connects; wait for play to be pressed in the Foxglove app instead."
|
||||
),
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.display_mode == "foxglove":
|
||||
rerun_only = ("mode", "save", "output_dir", "grpc_port", "batch_size", "num_workers")
|
||||
ignored = [name for name in rerun_only if getattr(args, name) != parser.get_default(name)]
|
||||
if ignored:
|
||||
logging.warning(
|
||||
"These flags only apply to `--display-mode rerun` and are ignored with "
|
||||
"`--display-mode foxglove`: %s.",
|
||||
", ".join(f"--{name.replace('_', '-')}" for name in ignored),
|
||||
)
|
||||
|
||||
kwargs = vars(args)
|
||||
repo_id = kwargs.pop("repo_id")
|
||||
root = kwargs.pop("root")
|
||||
tolerance_s = kwargs.pop("tolerance_s")
|
||||
|
||||
if kwargs["ws_port"] is not None:
|
||||
logging.warning(
|
||||
"--ws-port is deprecated and will be removed in future versions. Please use --grpc-port instead."
|
||||
)
|
||||
logging.warning("Setting grpc_port to ws_port value.")
|
||||
kwargs["grpc_port"] = kwargs.pop("ws_port")
|
||||
else:
|
||||
kwargs.pop("ws_port") # Always remove ws_port from kwargs
|
||||
|
||||
init_logging()
|
||||
logging.info("Loading dataset")
|
||||
dataset = LeRobotDataset(repo_id, episodes=[args.episode_index], root=root, tolerance_s=tolerance_s)
|
||||
|
||||
@@ -38,6 +38,9 @@ lerobot-record \\
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
To stream the data to Foxglove instead of Rerun, add ``--display_mode=foxglove`` (then connect the
|
||||
Foxglove app to ``ws://127.0.0.1:8765``; override the port with ``--display_port=<port>``).
|
||||
|
||||
Example recording with bimanual so100:
|
||||
```shell
|
||||
lerobot-record \\
|
||||
@@ -157,7 +160,11 @@ from lerobot.utils.utils import (
|
||||
init_logging,
|
||||
log_say,
|
||||
)
|
||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data
|
||||
from lerobot.utils.visualization_utils import (
|
||||
init_visualization,
|
||||
log_visualization_data,
|
||||
shutdown_visualization,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -168,11 +175,14 @@ class RecordConfig:
|
||||
teleop: TeleoperatorConfig | None = None
|
||||
# Display all cameras on screen
|
||||
display_data: bool = False
|
||||
# Display data on a remote Rerun server
|
||||
# Visualization backend used when display_data is True: "rerun" or "foxglove".
|
||||
display_mode: str = "rerun"
|
||||
# For "rerun": IP of a remote server to send to. For "foxglove": interface to bind the WebSocket
|
||||
# server to (127.0.0.1 for local only, 0.0.0.0 for all interfaces).
|
||||
display_ip: str | None = None
|
||||
# Port of the remote Rerun server
|
||||
# For "rerun": port of the remote server. For "foxglove": port to bind the WebSocket server to.
|
||||
display_port: int | None = None
|
||||
# Whether to display compressed images in Rerun
|
||||
# Whether to display compressed (JPEG) images instead of raw frames
|
||||
display_compressed_images: bool = False
|
||||
# Use vocal synthesis to read events.
|
||||
play_sounds: bool = True
|
||||
@@ -233,6 +243,7 @@ def record_loop(
|
||||
control_time_s: int | None = None,
|
||||
single_task: str | None = None,
|
||||
display_data: bool = False,
|
||||
display_mode: str = "rerun",
|
||||
display_compressed_images: bool = False,
|
||||
):
|
||||
if dataset is not None and dataset.fps != fps:
|
||||
@@ -327,8 +338,11 @@ def record_loop(
|
||||
dataset.add_frame(frame)
|
||||
|
||||
if display_data:
|
||||
log_rerun_data(
|
||||
observation=obs_processed, action=action_values, compress_images=display_compressed_images
|
||||
log_visualization_data(
|
||||
display_mode,
|
||||
observation=obs_processed,
|
||||
action=action_values,
|
||||
compress_images=display_compressed_images,
|
||||
)
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_t
|
||||
@@ -354,7 +368,9 @@ def record(
|
||||
init_logging()
|
||||
logging.info(pformat(asdict(cfg)))
|
||||
if cfg.display_data:
|
||||
init_rerun(session_name="recording", ip=cfg.display_ip, port=cfg.display_port)
|
||||
init_visualization(
|
||||
cfg.display_mode, session_name="recording", ip=cfg.display_ip, port=cfg.display_port
|
||||
)
|
||||
display_compressed_images = (
|
||||
True
|
||||
if (cfg.display_data and cfg.display_ip is not None and cfg.display_port is not None)
|
||||
@@ -464,6 +480,7 @@ def record(
|
||||
control_time_s=cfg.dataset.episode_time_s,
|
||||
single_task=cfg.dataset.single_task,
|
||||
display_data=cfg.display_data,
|
||||
display_mode=cfg.display_mode,
|
||||
display_compressed_images=display_compressed_images,
|
||||
)
|
||||
|
||||
@@ -485,6 +502,7 @@ def record(
|
||||
control_time_s=cfg.dataset.reset_time_s,
|
||||
single_task=cfg.dataset.single_task,
|
||||
display_data=cfg.display_data,
|
||||
display_mode=cfg.display_mode,
|
||||
)
|
||||
|
||||
if events["rerecord_episode"]:
|
||||
@@ -510,6 +528,9 @@ def record(
|
||||
if listener is not None:
|
||||
listener.stop()
|
||||
|
||||
if cfg.display_data:
|
||||
shutdown_visualization(cfg.display_mode)
|
||||
|
||||
if cfg.dataset.push_to_hub:
|
||||
if dataset and dataset.num_episodes > 0:
|
||||
dataset.push_to_hub(tags=cfg.dataset.tags, private=cfg.dataset.private)
|
||||
|
||||
@@ -145,6 +145,9 @@ Usage examples
|
||||
--dataset.rgb_encoder.vcodec=h264 \\
|
||||
--dataset.rgb_encoder.preset=fast \\
|
||||
--dataset.rgb_encoder.extra_options={"tune": "film", "profile:v": "high", "bf": 2}
|
||||
|
||||
# Stream to Foxglove instead of Rerun:
|
||||
# add --display_mode=foxglove, then connect the Foxglove app to ws://127.0.0.1:8765.
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -190,7 +193,7 @@ from lerobot.teleoperators import ( # noqa: F401
|
||||
from lerobot.utils.import_utils import register_third_party_plugins
|
||||
from lerobot.utils.process import ProcessSignalHandler
|
||||
from lerobot.utils.utils import init_logging
|
||||
from lerobot.utils.visualization_utils import init_rerun
|
||||
from lerobot.utils.visualization_utils import init_visualization, shutdown_visualization
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -201,8 +204,13 @@ def rollout(cfg: RolloutConfig):
|
||||
init_logging()
|
||||
|
||||
if cfg.display_data:
|
||||
logger.info("Initializing Rerun visualization (ip=%s, port=%s)", cfg.display_ip, cfg.display_port)
|
||||
init_rerun(session_name="rollout", ip=cfg.display_ip, port=cfg.display_port)
|
||||
logger.info(
|
||||
"Initializing %s visualization (ip=%s, port=%s)",
|
||||
cfg.display_mode,
|
||||
cfg.display_ip,
|
||||
cfg.display_port,
|
||||
)
|
||||
init_visualization(cfg.display_mode, session_name="rollout", ip=cfg.display_ip, port=cfg.display_port)
|
||||
|
||||
signal_handler = ProcessSignalHandler(use_threads=True, display_pid=False)
|
||||
shutdown_event = signal_handler.shutdown_event
|
||||
@@ -227,6 +235,8 @@ def rollout(cfg: RolloutConfig):
|
||||
logger.info("Interrupted by user")
|
||||
finally:
|
||||
strategy.teardown(ctx)
|
||||
if cfg.display_data:
|
||||
shutdown_visualization(cfg.display_mode)
|
||||
|
||||
logger.info("Rollout finished")
|
||||
|
||||
|
||||
@@ -31,6 +31,22 @@ lerobot-teleoperate \
|
||||
--display_data=true
|
||||
```
|
||||
|
||||
To stream the data to Foxglove instead of Rerun, add ``--display_mode=foxglove``
|
||||
(then connect the Foxglove app to ``ws://127.0.0.1:8765``; override the port with ``--display_port=<port>``):
|
||||
|
||||
```shell
|
||||
lerobot-teleoperate \
|
||||
--robot.type=so101_follower \
|
||||
--robot.port=/dev/tty.usbmodem58760431541 \
|
||||
--robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \
|
||||
--robot.id=black \
|
||||
--teleop.type=so101_leader \
|
||||
--teleop.port=/dev/tty.usbmodem58760431551 \
|
||||
--teleop.id=blue \
|
||||
--display_data=true \
|
||||
--display_mode=foxglove
|
||||
```
|
||||
|
||||
Example teleoperation with bimanual so100:
|
||||
|
||||
```shell
|
||||
@@ -108,7 +124,11 @@ from lerobot.teleoperators import ( # noqa: F401
|
||||
from lerobot.utils.import_utils import register_third_party_plugins
|
||||
from lerobot.utils.robot_utils import precise_sleep
|
||||
from lerobot.utils.utils import init_logging, move_cursor_up
|
||||
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data, shutdown_rerun
|
||||
from lerobot.utils.visualization_utils import (
|
||||
init_visualization,
|
||||
log_visualization_data,
|
||||
shutdown_visualization,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -121,11 +141,14 @@ class TeleoperateConfig:
|
||||
teleop_time_s: float | None = None
|
||||
# Display all cameras on screen
|
||||
display_data: bool = False
|
||||
# Display data on a remote Rerun server
|
||||
# Visualization backend used when display_data is True: "rerun" or "foxglove".
|
||||
display_mode: str = "rerun"
|
||||
# For "rerun": IP of a remote server to send to. For "foxglove": interface to bind the WebSocket
|
||||
# server to (127.0.0.1 for local only, 0.0.0.0 for all interfaces).
|
||||
display_ip: str | None = None
|
||||
# Port of the remote Rerun server
|
||||
# For "rerun": port of the remote server. For "foxglove": port to bind the WebSocket server to.
|
||||
display_port: int | None = None
|
||||
# Whether to display compressed images in Rerun
|
||||
# Whether to display compressed (JPEG) images instead of raw frames
|
||||
display_compressed_images: bool = False
|
||||
|
||||
|
||||
@@ -137,6 +160,7 @@ def teleop_loop(
|
||||
robot_action_processor: RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction],
|
||||
robot_observation_processor: RobotProcessorPipeline[RobotObservation, RobotObservation],
|
||||
display_data: bool = False,
|
||||
display_mode: str = "rerun",
|
||||
duration: float | None = None,
|
||||
display_compressed_images: bool = False,
|
||||
):
|
||||
@@ -149,8 +173,10 @@ def teleop_loop(
|
||||
teleop: The teleoperator device instance providing control actions.
|
||||
robot: The robot instance being controlled.
|
||||
fps: The target frequency for the control loop in frames per second.
|
||||
display_data: If True, fetches robot observations and displays them in the console and Rerun.
|
||||
display_compressed_images: If True, compresses images before sending them to Rerun for display.
|
||||
display_data: If True, fetches robot observations and displays them in the console and the
|
||||
visualization backend.
|
||||
display_mode: Visualization backend to use when display_data is True ("rerun" or "foxglove").
|
||||
display_compressed_images: If True, compresses images before sending them to the backend for display.
|
||||
duration: The maximum duration of the teleoperation loop in seconds. If None, the loop runs indefinitely.
|
||||
teleop_action_processor: An optional pipeline to process raw actions from the teleoperator.
|
||||
robot_action_processor: An optional pipeline to process actions before they are sent to the robot.
|
||||
@@ -187,7 +213,8 @@ def teleop_loop(
|
||||
# Process robot observation through pipeline
|
||||
obs_transition = robot_observation_processor(obs)
|
||||
|
||||
log_rerun_data(
|
||||
log_visualization_data(
|
||||
display_mode,
|
||||
observation=obs_transition,
|
||||
action=teleop_action,
|
||||
compress_images=display_compressed_images,
|
||||
@@ -215,7 +242,9 @@ def teleoperate(cfg: TeleoperateConfig):
|
||||
init_logging()
|
||||
logging.info(pformat(asdict(cfg)))
|
||||
if cfg.display_data:
|
||||
init_rerun(session_name="teleoperation", ip=cfg.display_ip, port=cfg.display_port)
|
||||
init_visualization(
|
||||
cfg.display_mode, session_name="teleoperation", ip=cfg.display_ip, port=cfg.display_port
|
||||
)
|
||||
display_compressed_images = (
|
||||
True
|
||||
if (cfg.display_data and cfg.display_ip is not None and cfg.display_port is not None)
|
||||
@@ -235,6 +264,7 @@ def teleoperate(cfg: TeleoperateConfig):
|
||||
robot=robot,
|
||||
fps=cfg.fps,
|
||||
display_data=cfg.display_data,
|
||||
display_mode=cfg.display_mode,
|
||||
duration=cfg.teleop_time_s,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
@@ -245,7 +275,7 @@ def teleoperate(cfg: TeleoperateConfig):
|
||||
pass
|
||||
finally:
|
||||
if cfg.display_data:
|
||||
shutdown_rerun()
|
||||
shutdown_visualization(cfg.display_mode)
|
||||
teleop.disconnect()
|
||||
robot.disconnect()
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ ACTION_TOKEN_MASK = ACTION + ".token_mask"
|
||||
REWARD = "next.reward"
|
||||
TRUNCATED = "next.truncated"
|
||||
DONE = "next.done"
|
||||
SUCCESS = "next.success"
|
||||
INFO = "info"
|
||||
|
||||
ROBOTS = "robots"
|
||||
|
||||
@@ -0,0 +1,651 @@
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Foxglove visualization backend.
|
||||
|
||||
Live control-loop streaming (:func:`log_foxglove_data`) and seekable dataset playback
|
||||
(:func:`serve_foxglove_dataset_playback`) over a Foxglove WebSocket server. Callers usually select a
|
||||
backend at runtime through the dispatch in :mod:`lerobot.utils.visualization_utils` rather than
|
||||
importing from here directly. Requires the ``viz`` extra (``pip install 'lerobot[viz]'``).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numbers
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
|
||||
from .constants import (
|
||||
ACTION,
|
||||
ACTION_PREFIX,
|
||||
DONE,
|
||||
OBS_IMAGES,
|
||||
OBS_PREFIX,
|
||||
OBS_STATE,
|
||||
OBS_STR,
|
||||
REWARD,
|
||||
SUCCESS,
|
||||
TRUNCATED,
|
||||
)
|
||||
from .import_utils import require_package
|
||||
|
||||
# Static schema shared by all scalar topics. Each message carries a flat list of ``{label, value}``
|
||||
# pairs rather than one field per feature, so the same schema fits any robot regardless of which
|
||||
# observation/action features it reports. The ``label`` field name is what Foxglove looks for to name
|
||||
# each series automatically, so a single filtered path plots every feature, e.g.
|
||||
# ``/observation/state.scalars[:]``.
|
||||
_SCALARS_SCHEMA = {
|
||||
"type": "object",
|
||||
"title": "lerobot.Scalars",
|
||||
"properties": {
|
||||
"scalars": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"label": {"type": "string"},
|
||||
"value": {"type": "number"},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _is_scalar(x):
|
||||
return isinstance(x, (float | numbers.Real | np.integer | np.floating)) or (
|
||||
isinstance(x, np.ndarray) and x.ndim == 0
|
||||
)
|
||||
|
||||
|
||||
def init_foxglove(host: str = "127.0.0.1", port: int | None = 8765) -> None:
|
||||
"""
|
||||
Starts a Foxglove WebSocket server for visualizing the control loop.
|
||||
|
||||
Connect to it from the Foxglove app at ``ws://<host>:<port>``. Calling this
|
||||
more than once is a no-op while a server is already running.
|
||||
|
||||
Args:
|
||||
host: Host interface to bind the WebSocket server to.
|
||||
port: Port to bind the WebSocket server to (defaults to 8765).
|
||||
"""
|
||||
|
||||
require_package("foxglove-sdk", extra="viz", import_name="foxglove")
|
||||
import foxglove
|
||||
|
||||
# Live-stream state lives as attributes on ``log_foxglove_data``:
|
||||
# ``.server`` is the shared WebSocket server and
|
||||
# ``.channels`` caches one Foxglove channel per topic
|
||||
if getattr(log_foxglove_data, "server", None) is not None:
|
||||
return
|
||||
log_foxglove_data.server = foxglove.start_server(host=host, port=port or 8765)
|
||||
log_foxglove_data.channels = {}
|
||||
|
||||
|
||||
def shutdown_foxglove() -> None:
|
||||
"""Stops the Foxglove WebSocket server and clears cached channels."""
|
||||
|
||||
server = getattr(log_foxglove_data, "server", None)
|
||||
if server is not None:
|
||||
server.stop()
|
||||
log_foxglove_data.server = None
|
||||
log_foxglove_data.channels = {}
|
||||
|
||||
|
||||
def _foxglove_safe_name(name: str) -> str:
|
||||
"""Replace ``.`` with ``_`` so a feature name is a single Foxglove topic-path segment.
|
||||
|
||||
Foxglove treats ``.`` as a path separator, so an unsanitized name like ``observation.images.front``
|
||||
would split into nested segments instead of naming one topic.
|
||||
"""
|
||||
|
||||
return name.replace(".", "_")
|
||||
|
||||
|
||||
def _foxglove_topic(key: str, *, is_image: bool = False) -> str:
|
||||
"""Build the Foxglove topic for a feature ``key``.
|
||||
|
||||
Camera features map to a per-source image topic (``/observation/images/<name>``); scalar features
|
||||
share one aggregate topic per source: ``/observation/state`` for observations, ``/action/state``
|
||||
for actions.
|
||||
"""
|
||||
|
||||
if is_image:
|
||||
name = str(key)
|
||||
for prefix in (f"{OBS_IMAGES}.", OBS_PREFIX):
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix) :]
|
||||
break
|
||||
return f"/{OBS_STR}/images/{_foxglove_safe_name(name)}"
|
||||
source = ACTION if (str(key).startswith(ACTION_PREFIX) or str(key) == ACTION) else OBS_STR
|
||||
return f"/{source}/state"
|
||||
|
||||
|
||||
def _log_foxglove_scalars(
|
||||
topic: str, values: dict[str, float], *, channels: dict | None = None, log_time: int | None = None
|
||||
) -> None:
|
||||
"""Log scalars on a typed JSON channel using the static :data:`_SCALARS_SCHEMA`.
|
||||
|
||||
``values`` is an ordered mapping of feature name to value; it is emitted as a ``scalars`` array of
|
||||
``{label, value}`` objects. Insertion order is preserved so series stay stable across messages.
|
||||
|
||||
``channels`` is the per-topic channel cache to reuse (defaults to the live-stream cache on
|
||||
:func:`log_foxglove_data`; dataset playback passes its own local cache to stay self-contained).
|
||||
``log_time`` is the message time in nanoseconds; when ``None`` the server's receive time is used.
|
||||
"""
|
||||
|
||||
if not values:
|
||||
return
|
||||
|
||||
import foxglove
|
||||
|
||||
if channels is None:
|
||||
channels = log_foxglove_data.channels
|
||||
channel = channels.get(topic)
|
||||
if channel is None:
|
||||
channel = channels[topic] = foxglove.Channel(topic, schema=_SCALARS_SCHEMA, message_encoding="json")
|
||||
msg = {"scalars": [{"label": label, "value": value} for label, value in values.items()]}
|
||||
if log_time is None:
|
||||
channel.log(msg)
|
||||
else:
|
||||
channel.log(msg, log_time=log_time)
|
||||
|
||||
|
||||
def _labeled_scalars(name: str, values, labels: list[str] | None = None) -> dict[str, float]:
|
||||
"""Expand a 1D sequence into ``{label: value}`` entries with a consistent fallback."""
|
||||
|
||||
flat = [float(v) for v in values]
|
||||
if labels is None or len(labels) != len(flat):
|
||||
labels = [f"{name}_{i}" for i in range(len(flat))]
|
||||
return dict(zip(labels, flat, strict=True))
|
||||
|
||||
|
||||
def _log_foxglove_image(
|
||||
topic: str,
|
||||
frame_id: str,
|
||||
arr: np.ndarray,
|
||||
*,
|
||||
compress_images: bool,
|
||||
channels: dict | None = None,
|
||||
log_time: int | None = None,
|
||||
depth_range: tuple[float, float] | None = None,
|
||||
raw_depth_values: bool = False,
|
||||
) -> None:
|
||||
"""Log an image on a cached per-topic channel.
|
||||
|
||||
The encoding is chosen from the channel count and dtype: a single-channel ``float`` or ``uint16``
|
||||
frame is a depth map (``32FC1``/``16UC1``), single-channel ``uint8`` is ``mono8``, 3 => ``rgb8``
|
||||
(float input assumed in [0, 1], cast to uint8), 4 => ``rgba8``; other counts are skipped with a
|
||||
warning. When ``compress_images`` is set, ``rgb8`` is JPEG-encoded instead.
|
||||
|
||||
Args:
|
||||
topic: Foxglove topic to log on.
|
||||
frame_id: Frame id stamped on the message.
|
||||
arr: Image as HWC or CHW (CHW is transposed to HWC), any dtype.
|
||||
compress_images: JPEG-encode ``rgb8`` frames; ignored for other encodings.
|
||||
channels: Per-topic channel cache to reuse (see :func:`_log_foxglove_scalars`).
|
||||
log_time: Message time in nanoseconds, also written to the header timestamp; when ``None``
|
||||
the server's receive time is used.
|
||||
depth_range: ``(lo, hi)`` clip bounds in a depth frame's own input units. Depth frames
|
||||
(``32FC1``/``16UC1``) are rescaled onto Foxglove's default display max for their encoding
|
||||
(``1.0`` / ``10000``) so they show with sensible contrast; ``depth_range`` sets the source
|
||||
range, else the frame's own min/max is used. Ignored for ``mono8``/``rgb8``/``rgba8``.
|
||||
raw_depth_values: If True, depth values are not rescaled and are logged as is.
|
||||
"""
|
||||
|
||||
from foxglove.channels import CompressedImageChannel, RawImageChannel
|
||||
from foxglove.messages import CompressedImage, RawImage, Timestamp
|
||||
|
||||
if channels is None:
|
||||
channels = log_foxglove_data.channels
|
||||
time_ns = time.time_ns() if log_time is None else log_time
|
||||
timestamp = Timestamp(sec=time_ns // 1_000_000_000, nsec=time_ns % 1_000_000_000)
|
||||
log_kwargs = {} if log_time is None else {"log_time": log_time}
|
||||
|
||||
# Convert CHW -> HWC when needed (mirrors log_rerun_data).
|
||||
if arr.ndim == 3 and arr.shape[0] in (1, 3, 4) and arr.shape[-1] not in (1, 3, 4):
|
||||
arr = np.transpose(arr, (1, 2, 0))
|
||||
height, width = arr.shape[0], arr.shape[1]
|
||||
n_channels = 1 if arr.ndim == 2 else arr.shape[2]
|
||||
|
||||
if n_channels == 1 and arr.dtype != np.uint8:
|
||||
# Depth map: infer the encoding from the dtype.
|
||||
encoding, target_dtype, value_max = (
|
||||
("32FC1", np.float32, 1.0)
|
||||
if np.issubdtype(arr.dtype, np.floating)
|
||||
else ("16UC1", np.uint16, 10000.0)
|
||||
)
|
||||
if not raw_depth_values:
|
||||
# Rescale onto the encoding's display max with respect to the given depth_range.
|
||||
lo, hi = depth_range if depth_range is not None else (float(arr.min()), float(arr.max()))
|
||||
arr = arr.clip(lo, hi).astype(np.float32)
|
||||
arr = (arr - lo) / ((hi - lo) if hi > lo else 1.0) * value_max
|
||||
arr = np.ascontiguousarray(arr, dtype=target_dtype)
|
||||
else:
|
||||
if n_channels == 3 and np.issubdtype(arr.dtype, np.floating):
|
||||
arr = (arr * 255.0).clip(0, 255)
|
||||
arr = np.ascontiguousarray(arr, dtype=np.uint8)
|
||||
|
||||
if compress_images and n_channels == 3:
|
||||
buf_src = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
|
||||
_, buf = cv2.imencode(".jpg", buf_src)
|
||||
channel = channels.get(topic)
|
||||
if channel is None:
|
||||
channel = channels[topic] = CompressedImageChannel(topic=topic)
|
||||
channel.log(
|
||||
CompressedImage(timestamp=timestamp, frame_id=frame_id, data=buf.tobytes(), format="jpeg"),
|
||||
**log_kwargs,
|
||||
)
|
||||
return
|
||||
|
||||
encoding = {1: "mono8", 3: "rgb8", 4: "rgba8"}.get(n_channels)
|
||||
if encoding is None:
|
||||
logging.warning(
|
||||
"Foxglove: skipping image on topic '%s' with unsupported shape %s (%d channels); "
|
||||
"expected 1 (mono8/16UC1/32FC1), 3 (rgb8), or 4 (rgba8) channels.",
|
||||
topic,
|
||||
tuple(arr.shape),
|
||||
n_channels,
|
||||
)
|
||||
return
|
||||
|
||||
channel = channels.get(topic)
|
||||
if channel is None:
|
||||
channel = channels[topic] = RawImageChannel(topic=topic)
|
||||
channel.log(
|
||||
RawImage(
|
||||
timestamp=timestamp,
|
||||
frame_id=frame_id,
|
||||
width=width,
|
||||
height=height,
|
||||
encoding=encoding,
|
||||
step=width * n_channels * arr.itemsize,
|
||||
data=arr.tobytes(),
|
||||
),
|
||||
**log_kwargs,
|
||||
)
|
||||
|
||||
|
||||
def log_foxglove_data(
|
||||
observation: RobotObservation | None = None,
|
||||
action: RobotAction | None = None,
|
||||
compress_images: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Logs observation and action data to a Foxglove WebSocket server for real-time visualization.
|
||||
|
||||
Mirrors ``log_rerun_data`` but emits Foxglove messages over the server started by
|
||||
:func:`init_foxglove`. Data is mapped as follows:
|
||||
- Scalars (and elements of 1D arrays) are accumulated per source and logged on the
|
||||
``/observation/state`` and ``/action/state`` topics as typed JSON messages using the static
|
||||
``lerobot.Scalars`` schema: a ``scalars`` array of ``{label, value}`` objects (see
|
||||
:data:`_SCALARS_SCHEMA`). The ``label`` field lets Foxglove name each series automatically, so
|
||||
``/observation/state.scalars[:].value`` plots every feature at once.
|
||||
- 3D NumPy arrays that resemble images are transposed from CHW to HWC when needed and logged on a
|
||||
per-source topic (e.g. ``/observation/images/front``) as a ``RawImage`` (or a JPEG
|
||||
``CompressedImage`` when ``compress_images`` is True).
|
||||
|
||||
Args:
|
||||
observation: An optional dictionary containing observation data to log.
|
||||
action: An optional dictionary containing action data to log.
|
||||
compress_images: Whether to JPEG-compress images before logging to save bandwidth in exchange
|
||||
for CPU and quality.
|
||||
"""
|
||||
|
||||
require_package("foxglove-sdk", extra="viz", import_name="foxglove")
|
||||
|
||||
if getattr(log_foxglove_data, "server", None) is None:
|
||||
raise RuntimeError("init_foxglove() must be called before log_foxglove_data().")
|
||||
|
||||
now = time.time_ns()
|
||||
|
||||
if observation:
|
||||
obs_scalars: dict[str, float] = {}
|
||||
for k, v in observation.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k[len(OBS_PREFIX) :] if str(k).startswith(OBS_PREFIX) else str(k)
|
||||
if _is_scalar(v):
|
||||
obs_scalars[key] = float(v)
|
||||
elif isinstance(v, np.ndarray):
|
||||
if v.ndim == 1:
|
||||
obs_scalars.update(_labeled_scalars(key, v))
|
||||
else:
|
||||
_log_foxglove_image(
|
||||
_foxglove_topic(k, is_image=True),
|
||||
key,
|
||||
v,
|
||||
compress_images=compress_images,
|
||||
log_time=now,
|
||||
)
|
||||
_log_foxglove_scalars(_foxglove_topic(OBS_STATE), obs_scalars, log_time=now)
|
||||
|
||||
if action:
|
||||
action_scalars: dict[str, float] = {}
|
||||
for k, v in action.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k[len(ACTION_PREFIX) :] if str(k).startswith(ACTION_PREFIX) else str(k)
|
||||
if _is_scalar(v):
|
||||
action_scalars[key] = float(v)
|
||||
elif isinstance(v, np.ndarray):
|
||||
action_scalars.update(_labeled_scalars(key, v.flatten()))
|
||||
_log_foxglove_scalars(_foxglove_topic(ACTION), action_scalars, log_time=now)
|
||||
|
||||
|
||||
# ── Dataset playback over a Foxglove WebSocket server ─────────────────────
|
||||
# A LeRobotDataset is random-access on disk, so rather than fire-and-forget a forward stream we
|
||||
# advertise a seekable timeline and serve frames on demand for whatever time the user scrubs/plays
|
||||
# to in the Foxglove app. This relies on the SDK's PlaybackControl capability.
|
||||
|
||||
|
||||
def _feature_dim_names(feature: dict | None) -> list[str] | None:
|
||||
"""Best-effort per-dimension series labels for a 1D feature, or ``None`` to fall back to indices.
|
||||
|
||||
LeRobot records a feature's ``names`` inconsistently: a flat list (``["x", "y"]``), a category
|
||||
mapping (``{"motors": ["motor_0", "motor_1"]}``), or a name->index mapping
|
||||
(``{"delta_x": 0, "delta_y": 1}``). Each is handled, but labels are only returned when their count
|
||||
matches the feature's 1D shape, so a malformed/mismatched ``names`` can't silently mislabel series.
|
||||
"""
|
||||
|
||||
if not feature:
|
||||
return None
|
||||
shape = feature.get("shape")
|
||||
dim = shape[0] if shape and len(shape) == 1 else None
|
||||
names = feature.get("names")
|
||||
labels: list[str] | None = None
|
||||
if isinstance(names, dict):
|
||||
values = list(names.values())
|
||||
if values and all(isinstance(v, (list, tuple)) for v in values):
|
||||
labels = [str(n) for group in values for n in group]
|
||||
elif values and all(isinstance(v, int) and not isinstance(v, bool) for v in values):
|
||||
labels = [name for name, _ in sorted(names.items(), key=lambda kv: kv[1])]
|
||||
elif isinstance(names, (list, tuple)):
|
||||
labels = [str(n) for n in names]
|
||||
if labels is not None and dim is not None and len(labels) == dim:
|
||||
return labels
|
||||
return None
|
||||
|
||||
|
||||
def _frame_to_scalars(sample: dict, key: str, labels: list[str] | None = None) -> dict[str, float]:
|
||||
"""Flatten a frame's vector/scalar feature ``key`` into ``{label: value}`` entries.
|
||||
|
||||
``labels`` provides one name per dimension (from the dataset's feature metadata); when absent or
|
||||
the wrong length, dimensions fall back to ``{name}_{i}`` (the short feature name), matching the
|
||||
live stream so series names agree. A scalar feature becomes a single entry. Missing or ``None``
|
||||
features yield an empty mapping.
|
||||
"""
|
||||
|
||||
v = sample.get(key)
|
||||
if v is None:
|
||||
return {}
|
||||
arr = v.numpy() if hasattr(v, "numpy") else np.asarray(v)
|
||||
if key.startswith(OBS_PREFIX):
|
||||
name = key[len(OBS_PREFIX) :]
|
||||
elif key.startswith(ACTION_PREFIX):
|
||||
name = key[len(ACTION_PREFIX) :]
|
||||
else:
|
||||
name = key
|
||||
if arr.ndim == 0:
|
||||
return {name: float(arr)}
|
||||
return _labeled_scalars(name, arr.flatten(), labels)
|
||||
|
||||
|
||||
def serve_foxglove_dataset_playback(
|
||||
dataset,
|
||||
episode_index: int,
|
||||
*,
|
||||
host: str = "127.0.0.1",
|
||||
port: int = 8765,
|
||||
compress_images: bool = False,
|
||||
autoplay: bool = True,
|
||||
) -> None:
|
||||
"""Serve a single dataset episode to Foxglove as a seekable, scrubbable timeline.
|
||||
|
||||
Starts a Foxglove WebSocket server advertising the ``PlaybackControl`` capability over the
|
||||
episode's time range. The Foxglove app drives play/pause/seek/speed; a background thread and a
|
||||
``ServerListener`` read frames from the on-disk ``dataset`` on demand and log them stamped at
|
||||
their dataset timestamps, so the user can scrub anywhere in the episode. Blocks until interrupted.
|
||||
|
||||
Args:
|
||||
dataset: A ``LeRobotDataset`` loaded for the single episode to visualize.
|
||||
episode_index: Index of the episode being visualized (used only for the session name).
|
||||
host: Host interface to bind the WebSocket server to.
|
||||
port: Port to bind the WebSocket server to.
|
||||
compress_images: Whether to JPEG-compress camera frames before logging.
|
||||
autoplay: If True, start playing automatically as soon as a client connects, instead of
|
||||
waiting for the user to press play in the Foxglove app.
|
||||
"""
|
||||
|
||||
require_package("foxglove-sdk", extra="viz", import_name="foxglove")
|
||||
import bisect
|
||||
import threading
|
||||
|
||||
import foxglove
|
||||
from foxglove.websocket import (
|
||||
Capability,
|
||||
PlaybackCommand,
|
||||
PlaybackControlRequest,
|
||||
PlaybackState,
|
||||
PlaybackStatus,
|
||||
ServerListener,
|
||||
)
|
||||
|
||||
# Per-frame timestamps in nanoseconds (read straight from the table, no video decode).
|
||||
times_ns = [int(round(float(t) * 1e9)) for t in dataset.hf_dataset["timestamp"]]
|
||||
n_frames = len(times_ns)
|
||||
if n_frames == 0:
|
||||
raise ValueError("Cannot visualize an empty episode.")
|
||||
first_ns, last_ns = times_ns[0], times_ns[-1]
|
||||
camera_keys = list(dataset.meta.camera_keys)
|
||||
# Dataset-wide q01/q99 depth bounds (fallback min/max) used to normalize depth to [0, 1].
|
||||
depth_ranges: dict[str, tuple[float, float]] = {}
|
||||
for key in dataset.meta.depth_keys:
|
||||
stats = (dataset.meta.stats or {}).get(key)
|
||||
if not stats:
|
||||
continue
|
||||
lo = stats["q01"] if "q01" in stats else stats["min"]
|
||||
hi = stats["q99"] if "q99" in stats else stats["max"]
|
||||
depth_ranges[key] = (float(np.asarray(lo).item()), float(np.asarray(hi).item()))
|
||||
# Per-dimension series labels from the dataset metadata (e.g. joint names), computed once.
|
||||
scalar_labels = {
|
||||
OBS_STATE: _feature_dim_names(dataset.meta.features.get(OBS_STATE)),
|
||||
ACTION: _feature_dim_names(dataset.meta.features.get(ACTION)),
|
||||
}
|
||||
# Local channel cache so the playback server is self-contained and doesn't touch the live-stream cache.
|
||||
channels: dict = {}
|
||||
|
||||
def emit_frame(i: int) -> None:
|
||||
"""Log every channel for frame ``i`` stamped at its dataset timestamp."""
|
||||
sample = dataset[i]
|
||||
log_time = times_ns[i]
|
||||
for key in camera_keys:
|
||||
arr = sample.get(key)
|
||||
if arr is None:
|
||||
continue
|
||||
arr = arr.numpy() if hasattr(arr, "numpy") else np.asarray(arr)
|
||||
_log_foxglove_image(
|
||||
_foxglove_topic(key, is_image=True),
|
||||
key,
|
||||
arr,
|
||||
compress_images=compress_images,
|
||||
channels=channels,
|
||||
log_time=log_time,
|
||||
depth_range=depth_ranges.get(key),
|
||||
raw_depth_values=True,
|
||||
)
|
||||
_log_foxglove_scalars(
|
||||
_foxglove_topic(OBS_STATE),
|
||||
_frame_to_scalars(sample, OBS_STATE, scalar_labels[OBS_STATE]),
|
||||
channels=channels,
|
||||
log_time=log_time,
|
||||
)
|
||||
_log_foxglove_scalars(
|
||||
_foxglove_topic(ACTION),
|
||||
_frame_to_scalars(sample, ACTION, scalar_labels[ACTION]),
|
||||
channels=channels,
|
||||
log_time=log_time,
|
||||
)
|
||||
episode_scalars = {}
|
||||
for feat, label in (
|
||||
(DONE, "done"),
|
||||
(TRUNCATED, "truncated"),
|
||||
(REWARD, "reward"),
|
||||
(SUCCESS, "success"),
|
||||
):
|
||||
v = sample.get(feat)
|
||||
if v is not None:
|
||||
episode_scalars[label] = float(v)
|
||||
_log_foxglove_scalars("/episode/state", episode_scalars, channels=channels, log_time=log_time)
|
||||
|
||||
lock = threading.Lock()
|
||||
stop_event = threading.Event()
|
||||
# Shared playback state, guarded by ``lock``. ``seek_idx`` is a one-shot request set by the
|
||||
# listener and serviced by the playback loop, which is the *only* thread that emits frames (so
|
||||
# concurrent random access into the on-disk dataset / video decoder never overlaps).
|
||||
state = {
|
||||
"status": PlaybackStatus.Paused,
|
||||
"cursor": first_ns,
|
||||
"speed": 1.0,
|
||||
"last_idx": -1,
|
||||
"seek_idx": None,
|
||||
}
|
||||
|
||||
def index_at(t_ns: int) -> int:
|
||||
return max(0, min(n_frames - 1, bisect.bisect_right(times_ns, t_ns) - 1))
|
||||
|
||||
# One-shot latch so autoplay fires only on the first client subscription.
|
||||
autoplay_started = threading.Event()
|
||||
|
||||
class _PlaybackListener(ServerListener):
|
||||
def on_subscribe(self, client, channel):
|
||||
# Start playing automatically once a client actually connects (subscribes). Using the
|
||||
# subscribe hook, rather than starting in Playing up front, means the timeline doesn't
|
||||
# advance before anyone is watching. Fires once; the user can still pause/seek after.
|
||||
if not autoplay:
|
||||
return
|
||||
with lock:
|
||||
if autoplay_started.is_set() or state["status"] != PlaybackStatus.Paused:
|
||||
return
|
||||
autoplay_started.set()
|
||||
state["status"] = PlaybackStatus.Playing
|
||||
cursor, speed = state["cursor"], state["speed"]
|
||||
server.broadcast_playback_state(PlaybackState(PlaybackStatus.Playing, cursor, speed, False, ""))
|
||||
|
||||
def on_playback_control_request(self, req: PlaybackControlRequest):
|
||||
# Only mutate state here; the playback loop performs all frame emission.
|
||||
with lock:
|
||||
did_seek = False
|
||||
if req.seek_time is not None:
|
||||
cursor = max(first_ns, min(last_ns, req.seek_time))
|
||||
state["cursor"] = cursor
|
||||
state["last_idx"] = state["seek_idx"] = index_at(cursor)
|
||||
did_seek = True
|
||||
if req.playback_speed and req.playback_speed > 0:
|
||||
state["speed"] = req.playback_speed
|
||||
if req.playback_command == PlaybackCommand.Play:
|
||||
# Restarting from the end replays from the beginning.
|
||||
if state["cursor"] >= last_ns:
|
||||
state["cursor"] = first_ns
|
||||
state["last_idx"] = state["seek_idx"] = 0
|
||||
did_seek = True
|
||||
state["status"] = PlaybackStatus.Playing
|
||||
elif req.playback_command == PlaybackCommand.Pause:
|
||||
state["status"] = PlaybackStatus.Paused
|
||||
status, cursor, speed = state["status"], state["cursor"], state["speed"]
|
||||
request_id = req.request_id or ""
|
||||
return PlaybackState(status, cursor, speed, did_seek, request_id)
|
||||
|
||||
server = foxglove.start_server(
|
||||
name=f"{dataset.repo_id}/episode_{episode_index}",
|
||||
host=host,
|
||||
port=port,
|
||||
capabilities=[Capability.PlaybackControl, Capability.Time],
|
||||
server_listener=_PlaybackListener(),
|
||||
playback_time_range=(first_ns, last_ns),
|
||||
)
|
||||
|
||||
def playback_loop() -> None:
|
||||
# Cap how far the cursor may advance in a single tick. A slow frame decode (or any stall)
|
||||
# would otherwise make ``dt`` huge and produce one enormous catch-up batch; clamping it makes
|
||||
# playback trail wall-clock under a slow decoder while each tick emits a bounded frame range.
|
||||
max_tick_dt_s = 0.25
|
||||
prev = time.monotonic()
|
||||
while not stop_event.is_set():
|
||||
time.sleep(1.0 / 60.0)
|
||||
ended = False
|
||||
speed = 1.0
|
||||
with lock:
|
||||
now = time.monotonic()
|
||||
dt = min(now - prev, max_tick_dt_s)
|
||||
prev = now
|
||||
# A queued seek is always serviced, even while paused, so scrubbing updates the view.
|
||||
work = []
|
||||
seek_idx = state["seek_idx"]
|
||||
if seek_idx is not None:
|
||||
state["seek_idx"] = None
|
||||
work.append(seek_idx)
|
||||
if state["status"] == PlaybackStatus.Playing:
|
||||
cursor = state["cursor"] + int(dt * 1e9 * state["speed"])
|
||||
start_idx = state["last_idx"] + 1
|
||||
if cursor >= last_ns:
|
||||
cursor, target, ended = last_ns, n_frames - 1, True
|
||||
else:
|
||||
target = index_at(cursor)
|
||||
state["cursor"] = cursor
|
||||
work.extend(range(start_idx, target + 1))
|
||||
# cursor only grows while playing (seeks reset last_idx in the listener), so
|
||||
# target >= last_idx here; a plain assignment is correct and clearer than max().
|
||||
state["last_idx"] = target
|
||||
if ended:
|
||||
state["status"] = PlaybackStatus.Ended
|
||||
if not work:
|
||||
continue
|
||||
cursor, speed = state["cursor"], state["speed"]
|
||||
# Emit outside the lock; this is the only thread that calls emit_frame. Re-check
|
||||
# stop_event between frames so shutdown stays responsive even mid-batch.
|
||||
for i in work:
|
||||
if stop_event.is_set():
|
||||
break
|
||||
emit_frame(i)
|
||||
server.broadcast_time(cursor)
|
||||
if ended:
|
||||
server.broadcast_playback_state(PlaybackState(PlaybackStatus.Ended, cursor, speed, False, ""))
|
||||
|
||||
# Emit the first frame so channels are advertised (done before the loop starts, so emission stays
|
||||
# single-threaded). Late-connecting clients re-receive frames once they seek/play.
|
||||
emit_frame(0)
|
||||
with lock:
|
||||
state["last_idx"] = 0
|
||||
server.broadcast_time(first_ns)
|
||||
server.broadcast_playback_state(PlaybackState(PlaybackStatus.Paused, first_ns, 1.0, True, ""))
|
||||
|
||||
thread = threading.Thread(target=playback_loop, name="foxglove-playback", daemon=True)
|
||||
thread.start()
|
||||
|
||||
print(f"Foxglove server running. Connect the Foxglove app to ws://{host}:{port}")
|
||||
print("Use the playback controls in Foxglove to play/pause and scrub the episode. Ctrl-C to exit.")
|
||||
try:
|
||||
while not stop_event.is_set():
|
||||
time.sleep(0.5)
|
||||
except KeyboardInterrupt:
|
||||
print("Ctrl-C received. Exiting.")
|
||||
finally:
|
||||
stop_event.set()
|
||||
thread.join(timeout=2.0)
|
||||
server.stop()
|
||||
channels.clear()
|
||||
@@ -0,0 +1,184 @@
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Rerun visualization backend.
|
||||
|
||||
Live control-loop streaming to the Rerun viewer (:func:`log_rerun_data`). Callers usually select a
|
||||
backend at runtime through the dispatch in :mod:`lerobot.utils.visualization_utils` rather than
|
||||
importing from here directly. Requires the ``viz`` extra (``pip install 'lerobot[viz]'``).
|
||||
"""
|
||||
|
||||
import numbers
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
|
||||
from .constants import ACTION, ACTION_PREFIX, OBS_PREFIX, OBS_STR
|
||||
from .import_utils import require_package
|
||||
|
||||
|
||||
def _is_scalar(x):
|
||||
return isinstance(x, (float | numbers.Real | np.integer | np.floating)) or (
|
||||
isinstance(x, np.ndarray) and x.ndim == 0
|
||||
)
|
||||
|
||||
|
||||
def init_rerun(
|
||||
session_name: str = "lerobot_control_loop", ip: str | None = None, port: int | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the Rerun SDK for visualizing the control loop.
|
||||
|
||||
Args:
|
||||
session_name: Name of the Rerun session.
|
||||
ip: Optional IP for connecting to a Rerun server.
|
||||
port: Optional port for connecting to a Rerun server.
|
||||
"""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
log_rerun_data.blueprint = None # Reset blueprint cache for new session
|
||||
|
||||
batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
|
||||
os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
|
||||
rr.init(session_name)
|
||||
memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
|
||||
if ip and port:
|
||||
rr.connect_grpc(url=f"rerun+http://{ip}:{port}/proxy")
|
||||
else:
|
||||
rr.spawn(memory_limit=memory_limit)
|
||||
|
||||
|
||||
def shutdown_rerun() -> None:
|
||||
"""Shuts down the Rerun SDK gracefully."""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
rr.rerun_shutdown()
|
||||
|
||||
|
||||
def _build_blueprint(observation_paths: set[str], action_paths: set[str], image_paths: set[str]):
|
||||
"""Build a Rerun blueprint laying out camera images, observation and action scalars in separate views.
|
||||
|
||||
Camera images, observation and action scalars are arranged in a grid.
|
||||
"""
|
||||
|
||||
# Safe + zero-overhead: `log_rerun_data` already ran the `require_package` guard and imported rerun.
|
||||
import rerun.blueprint as rrb
|
||||
|
||||
views = [rrb.Spatial2DView(origin=path, name=path) for path in sorted(image_paths)]
|
||||
|
||||
if observation_paths:
|
||||
views.append(rrb.TimeSeriesView(name="observation", contents=sorted(observation_paths)))
|
||||
if action_paths:
|
||||
views.append(rrb.TimeSeriesView(name="action", contents=sorted(action_paths)))
|
||||
|
||||
return rrb.Blueprint(rrb.Grid(*views))
|
||||
|
||||
|
||||
def _ensure_blueprint(observation_paths: set[str], action_paths: set[str], image_paths: set[str]) -> None:
|
||||
"""Build and send the blueprint once, from the first observation and action data."""
|
||||
if getattr(log_rerun_data, "blueprint", None) is not None:
|
||||
return
|
||||
|
||||
if not (observation_paths or action_paths or image_paths):
|
||||
return
|
||||
|
||||
# Safe + zero-overhead: `log_rerun_data` already ran the `require_package` guard and imported rerun.
|
||||
import rerun as rr
|
||||
|
||||
blueprint = _build_blueprint(observation_paths, action_paths, image_paths)
|
||||
log_rerun_data.blueprint = blueprint
|
||||
rr.send_blueprint(blueprint)
|
||||
|
||||
|
||||
def log_rerun_data(
|
||||
observation: RobotObservation | None = None,
|
||||
action: RobotAction | None = None,
|
||||
compress_images: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Logs observation and action data to Rerun for real-time visualization.
|
||||
|
||||
This function iterates through the provided observation and action dictionaries and sends their contents
|
||||
to the Rerun viewer. It handles different data types appropriately:
|
||||
- Scalars values (floats, ints) are logged as `rr.Scalars`.
|
||||
- 3D NumPy arrays that resemble images (e.g., with 1, 3, or 4 channels first) are transposed
|
||||
from CHW to HWC format, (optionally) compressed to JPEG and logged as `rr.Image` or `rr.EncodedImage`.
|
||||
- 1D NumPy arrays are logged as a single `rr.Scalars` batch under one entity path, so that every
|
||||
dimension shares the same view instead of being split across one view per element.
|
||||
- Multi-dimensional **action** arrays are flattened and logged as a single `rr.Scalars` batch.
|
||||
|
||||
Keys are automatically namespaced with "observation." or "action." if not already present.
|
||||
|
||||
On the first call, a blueprint is built and sent so observation and action scalars get separate
|
||||
time-series views and each image gets its own spatial view.
|
||||
|
||||
Args:
|
||||
observation: An optional dictionary containing observation data to log.
|
||||
action: An optional dictionary containing action data to log.
|
||||
compress_images: Whether to compress images before logging to save bandwidth & memory in exchange for cpu and quality.
|
||||
"""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
observation_paths: set[str] = set()
|
||||
action_paths: set[str] = set()
|
||||
image_paths: set[str] = set()
|
||||
|
||||
if observation:
|
||||
for k, v in observation.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k if str(k).startswith(OBS_PREFIX) else f"{OBS_STR}.{k}"
|
||||
|
||||
if _is_scalar(v):
|
||||
rr.log(key, rr.Scalars(float(v)))
|
||||
observation_paths.add(key)
|
||||
elif isinstance(v, np.ndarray):
|
||||
arr = v
|
||||
# Convert CHW -> HWC when needed
|
||||
if arr.ndim == 3 and arr.shape[0] in (1, 3, 4) and arr.shape[-1] not in (1, 3, 4):
|
||||
arr = np.transpose(arr, (1, 2, 0))
|
||||
if arr.ndim == 1:
|
||||
rr.log(key, rr.Scalars(arr.astype(float)))
|
||||
observation_paths.add(key)
|
||||
else:
|
||||
if arr.shape[-1] == 1:
|
||||
img_entity = rr.DepthImage(arr, colormap=rr.components.Colormap.Viridis)
|
||||
else:
|
||||
img_entity = rr.Image(arr).compress() if compress_images else rr.Image(arr)
|
||||
rr.log(key, entity=img_entity, static=True)
|
||||
image_paths.add(key)
|
||||
|
||||
if action:
|
||||
for k, v in action.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k if str(k).startswith(ACTION_PREFIX) else f"{ACTION}.{k}"
|
||||
|
||||
if _is_scalar(v):
|
||||
rr.log(key, rr.Scalars(float(v)))
|
||||
action_paths.add(key)
|
||||
elif isinstance(v, np.ndarray):
|
||||
# Flatten any (incl. higher-dimensional) array into a single batched Scalars
|
||||
rr.log(key, rr.Scalars(v.reshape(-1).astype(float)))
|
||||
action_paths.add(key)
|
||||
|
||||
_ensure_blueprint(observation_paths, action_paths, image_paths)
|
||||
@@ -12,166 +12,68 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numbers
|
||||
import os
|
||||
"""Backend-agnostic visualization dispatch.
|
||||
|
||||
import numpy as np
|
||||
Selects a visualization backend at runtime via a display-mode string (e.g. a ``--display_mode`` CLI
|
||||
flag) so callers never branch on the backend. The concrete implementations live in
|
||||
:mod:`lerobot.utils.rerun_visualization` and :mod:`lerobot.utils.foxglove_visualization`; importing
|
||||
this module does not import ``rerun`` or ``foxglove`` (each backend imports its SDK lazily behind a
|
||||
``require_package`` guard).
|
||||
"""
|
||||
|
||||
from lerobot.types import RobotAction, RobotObservation
|
||||
|
||||
from .constants import ACTION, ACTION_PREFIX, OBS_PREFIX, OBS_STR
|
||||
from .import_utils import require_package
|
||||
from .foxglove_visualization import init_foxglove, log_foxglove_data, shutdown_foxglove
|
||||
from .rerun_visualization import init_rerun, log_rerun_data, shutdown_rerun
|
||||
|
||||
# Visualization backends selectable at runtime via a display-mode string (e.g. a --display_mode flag).
|
||||
VISUALIZATION_MODES = ("rerun", "foxglove")
|
||||
|
||||
|
||||
def init_rerun(
|
||||
session_name: str = "lerobot_control_loop", ip: str | None = None, port: int | None = None
|
||||
def init_visualization(
|
||||
display_mode: str,
|
||||
*,
|
||||
session_name: str = "lerobot_control_loop",
|
||||
ip: str | None = None,
|
||||
port: int | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the Rerun SDK for visualizing the control loop.
|
||||
"""Initializes the visualization backend selected by ``display_mode``.
|
||||
|
||||
Args:
|
||||
session_name: Name of the Rerun session.
|
||||
ip: Optional IP for connecting to a Rerun server.
|
||||
port: Optional port for connecting to a Rerun server.
|
||||
For ``"rerun"``, ``ip``/``port`` point at an optional remote Rerun server. For ``"foxglove"``,
|
||||
``ip`` is the interface to bind the WebSocket server to (``127.0.0.1`` for local only, ``0.0.0.0``
|
||||
for all interfaces) and ``port`` is its port.
|
||||
"""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
log_rerun_data.blueprint = None # Reset blueprint cache for new session
|
||||
|
||||
batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
|
||||
os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
|
||||
rr.init(session_name)
|
||||
memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
|
||||
if ip and port:
|
||||
rr.connect_grpc(url=f"rerun+http://{ip}:{port}/proxy")
|
||||
if display_mode == "rerun":
|
||||
init_rerun(session_name=session_name, ip=ip, port=port)
|
||||
elif display_mode == "foxglove":
|
||||
init_foxglove(host=ip or "127.0.0.1", port=port)
|
||||
else:
|
||||
rr.spawn(memory_limit=memory_limit)
|
||||
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")
|
||||
|
||||
|
||||
def shutdown_rerun() -> None:
|
||||
"""Shuts down the Rerun SDK gracefully."""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
rr.rerun_shutdown()
|
||||
|
||||
|
||||
def _is_scalar(x):
|
||||
return isinstance(x, (float | numbers.Real | np.integer | np.floating)) or (
|
||||
isinstance(x, np.ndarray) and x.ndim == 0
|
||||
)
|
||||
|
||||
|
||||
def _build_blueprint(observation_paths: set[str], action_paths: set[str], image_paths: set[str]):
|
||||
"""Build a Rerun blueprint laying out camera images, observation and action scalars in separate views.
|
||||
|
||||
Camera images, observation and action scalars are arranged in a grid.
|
||||
"""
|
||||
|
||||
# Safe + zero-overhead: `log_rerun_data` already ran the `require_package` guard and imported rerun.
|
||||
import rerun.blueprint as rrb
|
||||
|
||||
views = [rrb.Spatial2DView(origin=path, name=path) for path in sorted(image_paths)]
|
||||
|
||||
if observation_paths:
|
||||
views.append(rrb.TimeSeriesView(name="observation", contents=sorted(observation_paths)))
|
||||
if action_paths:
|
||||
views.append(rrb.TimeSeriesView(name="action", contents=sorted(action_paths)))
|
||||
|
||||
return rrb.Blueprint(rrb.Grid(*views))
|
||||
|
||||
|
||||
def _ensure_blueprint(observation_paths: set[str], action_paths: set[str], image_paths: set[str]) -> None:
|
||||
"""Build and send the blueprint once, from the first observation and action data."""
|
||||
if getattr(log_rerun_data, "blueprint", None) is not None:
|
||||
return
|
||||
|
||||
if not (observation_paths or action_paths or image_paths):
|
||||
return
|
||||
|
||||
# Safe + zero-overhead: `log_rerun_data` already ran the `require_package` guard and imported rerun.
|
||||
import rerun as rr
|
||||
|
||||
blueprint = _build_blueprint(observation_paths, action_paths, image_paths)
|
||||
log_rerun_data.blueprint = blueprint
|
||||
rr.send_blueprint(blueprint)
|
||||
|
||||
|
||||
def log_rerun_data(
|
||||
def log_visualization_data(
|
||||
display_mode: str,
|
||||
observation: RobotObservation | None = None,
|
||||
action: RobotAction | None = None,
|
||||
compress_images: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Logs observation and action data to Rerun for real-time visualization.
|
||||
"""Logs observation/action data to the backend selected by ``display_mode``."""
|
||||
|
||||
This function iterates through the provided observation and action dictionaries and sends their contents
|
||||
to the Rerun viewer. It handles different data types appropriately:
|
||||
- Scalars values (floats, ints) are logged as `rr.Scalars`.
|
||||
- 3D NumPy arrays that resemble images (e.g., with 1, 3, or 4 channels first) are transposed
|
||||
from CHW to HWC format, (optionally) compressed to JPEG and logged as `rr.Image` or `rr.EncodedImage`.
|
||||
- 1D NumPy arrays are logged as a single `rr.Scalars` batch under one entity path, so that every
|
||||
dimension shares the same view instead of being split across one view per element.
|
||||
- Multi-dimensional **action** arrays are flattened and logged as a single `rr.Scalars` batch.
|
||||
if display_mode == "rerun":
|
||||
log_rerun_data(observation=observation, action=action, compress_images=compress_images)
|
||||
elif display_mode == "foxglove":
|
||||
log_foxglove_data(observation=observation, action=action, compress_images=compress_images)
|
||||
else:
|
||||
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")
|
||||
|
||||
Keys are automatically namespaced with "observation." or "action." if not already present.
|
||||
|
||||
On the first call, a blueprint is built and sent so observation and action scalars get separate
|
||||
time-series views and each image gets its own spatial view.
|
||||
def shutdown_visualization(display_mode: str) -> None:
|
||||
"""Shuts down the backend selected by ``display_mode``."""
|
||||
|
||||
Args:
|
||||
observation: An optional dictionary containing observation data to log.
|
||||
action: An optional dictionary containing action data to log.
|
||||
compress_images: Whether to compress images before logging to save bandwidth & memory in exchange for cpu and quality.
|
||||
"""
|
||||
|
||||
require_package("rerun-sdk", extra="viz", import_name="rerun")
|
||||
import rerun as rr
|
||||
|
||||
observation_paths: set[str] = set()
|
||||
action_paths: set[str] = set()
|
||||
image_paths: set[str] = set()
|
||||
|
||||
if observation:
|
||||
for k, v in observation.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k if str(k).startswith(OBS_PREFIX) else f"{OBS_STR}.{k}"
|
||||
|
||||
if _is_scalar(v):
|
||||
rr.log(key, rr.Scalars(float(v)))
|
||||
observation_paths.add(key)
|
||||
elif isinstance(v, np.ndarray):
|
||||
arr = v
|
||||
# Convert CHW -> HWC when needed
|
||||
if arr.ndim == 3 and arr.shape[0] in (1, 3, 4) and arr.shape[-1] not in (1, 3, 4):
|
||||
arr = np.transpose(arr, (1, 2, 0))
|
||||
if arr.ndim == 1:
|
||||
rr.log(key, rr.Scalars(arr.astype(float)))
|
||||
observation_paths.add(key)
|
||||
else:
|
||||
if arr.shape[-1] == 1:
|
||||
img_entity = rr.DepthImage(arr, colormap=rr.components.Colormap.Viridis)
|
||||
else:
|
||||
img_entity = rr.Image(arr).compress() if compress_images else rr.Image(arr)
|
||||
rr.log(key, entity=img_entity, static=True)
|
||||
image_paths.add(key)
|
||||
|
||||
if action:
|
||||
for k, v in action.items():
|
||||
if v is None:
|
||||
continue
|
||||
key = k if str(k).startswith(ACTION_PREFIX) else f"{ACTION}.{k}"
|
||||
|
||||
if _is_scalar(v):
|
||||
rr.log(key, rr.Scalars(float(v)))
|
||||
action_paths.add(key)
|
||||
elif isinstance(v, np.ndarray):
|
||||
# Flatten any (incl. higher-dimensional) array into a single batched Scalars
|
||||
rr.log(key, rr.Scalars(v.reshape(-1).astype(float)))
|
||||
action_paths.add(key)
|
||||
|
||||
_ensure_blueprint(observation_paths, action_paths, image_paths)
|
||||
if display_mode == "rerun":
|
||||
shutdown_rerun()
|
||||
elif display_mode == "foxglove":
|
||||
shutdown_foxglove()
|
||||
else:
|
||||
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")
|
||||
|
||||
@@ -245,44 +245,3 @@ class TestFeatureFileRouting:
|
||||
|
||||
dataset.save_episode()
|
||||
dataset.finalize()
|
||||
|
||||
|
||||
# ── 5. Depth stats unit canonicalization (millimetres) ────────────────
|
||||
|
||||
|
||||
class TestDepthStatsUnit:
|
||||
"""Depth stats are always stored in millimetres, regardless of raw frame dtype."""
|
||||
|
||||
NUM_FRAMES = 4
|
||||
|
||||
@pytest.mark.parametrize("use_videos", [False, True])
|
||||
def test_stats_canonicalized_to_mm(self, tmp_path, features_factory, use_videos):
|
||||
"""Float (metre) and integer (millimetre) depth over the same physical range
|
||||
yield identical millimetre-scale stats."""
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
|
||||
def _record(depth_dtype, root):
|
||||
features = features_factory(
|
||||
camera_features=DUMMY_CAMERA_FEATURES_WITH_DEPTH, use_videos=use_videos
|
||||
)
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id=DUMMY_REPO_ID,
|
||||
fps=DEFAULT_FPS,
|
||||
features=features,
|
||||
root=root,
|
||||
use_videos=use_videos,
|
||||
streaming_encoding=use_videos,
|
||||
)
|
||||
add_frames(dataset, num_frames=self.NUM_FRAMES, depth_dtype=depth_dtype)
|
||||
dataset.save_episode()
|
||||
dataset.finalize()
|
||||
return np.asarray(dataset.meta.stats[DEPTH_KEY]["mean"]).reshape(-1)
|
||||
|
||||
# add_frames ramps float depth over 0.1–10 m and integer depth over 100–10000 mm
|
||||
# (the same physical range), so canonicalized stats must match.
|
||||
mean_m = _record(np.float32, tmp_path / "ds_m")
|
||||
mean_mm = _record(np.uint16, tmp_path / "ds_mm")
|
||||
|
||||
# Float (metre) input is scaled to millimetres, not left in the single-digit metre range.
|
||||
assert mean_m.item() > 50.0
|
||||
np.testing.assert_allclose(mean_m, mean_mm, rtol=0.05)
|
||||
|
||||
Vendored
+7
-12
@@ -49,18 +49,16 @@ from tests.fixtures.constants import (
|
||||
)
|
||||
|
||||
|
||||
def add_frames(dataset: LeRobotDataset, num_frames: int, depth_dtype: np.dtype = np.uint16) -> None:
|
||||
def add_frames(dataset: LeRobotDataset, num_frames: int) -> None:
|
||||
"""Append ``num_frames`` synthetic frames to ``dataset``.
|
||||
|
||||
Generates per-feature payloads from ``dataset.meta``: depth ramps (``depth_dtype``,
|
||||
default ``uint16`` millimetres; pass ``np.float32`` for metres) for keys in
|
||||
``dataset.meta.depth_keys``, uint8 random noise for video/image keys, and float32
|
||||
zeros for everything else. ``DEFAULT_FEATURES`` (timestamp, frame_index, ...) are
|
||||
auto-populated by ``add_frame`` and skipped here.
|
||||
Generates per-feature payloads from ``dataset.meta``: uint16 depth ramps for
|
||||
keys in ``dataset.meta.depth_keys``, uint8 random noise for video/image keys,
|
||||
and float32 zeros for everything else. ``DEFAULT_FEATURES`` (timestamp,
|
||||
frame_index, ...) are auto-populated by ``add_frame`` and skipped here.
|
||||
"""
|
||||
video_keys = dataset.meta.video_keys
|
||||
depth_keys = dataset.meta.depth_keys
|
||||
depth_is_float = np.issubdtype(depth_dtype, np.floating)
|
||||
# Smooth gradient base reused per (H, W) to keep depth frames cheap to
|
||||
# encode (HEVC Main 12 hates white noise).
|
||||
_depth_base_cache: dict[tuple[int, int], np.ndarray] = {}
|
||||
@@ -72,14 +70,11 @@ def add_frames(dataset: LeRobotDataset, num_frames: int, depth_dtype: np.dtype =
|
||||
shape = ft["shape"]
|
||||
if key in depth_keys:
|
||||
h, w, _ = shape
|
||||
# Float depth is expressed in metres, integer depth in millimetres.
|
||||
lo, hi = (0.1, 10.0) if depth_is_float else (100.0, 10_000.0)
|
||||
base = _depth_base_cache.setdefault(
|
||||
(h, w),
|
||||
np.linspace(lo, hi, h * w, dtype=np.float32).reshape(h, w, 1),
|
||||
np.linspace(100.0, 10_000.0, h * w, dtype=np.float32).reshape(h, w, 1),
|
||||
)
|
||||
step = (0.05 if depth_is_float else 50.0) * i
|
||||
frame[key] = (base + step).clip(0, 65535).astype(depth_dtype)
|
||||
frame[key] = (base + 50.0 * i).clip(0, 65535).astype(np.uint16)
|
||||
elif key in video_keys:
|
||||
frame[key] = np.random.randint(0, 256, shape, dtype=np.uint8)
|
||||
else:
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the Foxglove backend's pure helpers.
|
||||
|
||||
These cover topic naming, series labelling and feature-name parsing. They import
|
||||
``foxglove_visualization`` directly and need NO ``foxglove`` extra: the SDK is imported lazily inside
|
||||
the functions that talk to the server, so the helpers below run in the base test tier.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from lerobot.utils import foxglove_visualization as fv
|
||||
from lerobot.utils.constants import ACTION, OBS_STATE
|
||||
|
||||
|
||||
def test_foxglove_safe_name_collapses_dots():
|
||||
assert fv._foxglove_safe_name("observation.images.front") == "observation_images_front"
|
||||
assert fv._foxglove_safe_name("plain") == "plain"
|
||||
|
||||
|
||||
def test_foxglove_topic_image_strips_prefix_without_doubling_images():
|
||||
# Fully-qualified camera key -> single clean segment (no doubled "images").
|
||||
assert fv._foxglove_topic("observation.images.front", is_image=True) == "/observation/images/front"
|
||||
# A nested camera name keeps its structure via safe-name collapsing.
|
||||
assert (
|
||||
fv._foxglove_topic("observation.images.wrist.left", is_image=True) == "/observation/images/wrist_left"
|
||||
)
|
||||
# Bare camera name (as real robots emit).
|
||||
assert fv._foxglove_topic("front", is_image=True) == "/observation/images/front"
|
||||
|
||||
|
||||
def test_foxglove_topic_scalar_sources():
|
||||
assert fv._foxglove_topic(OBS_STATE) == "/observation/state"
|
||||
assert fv._foxglove_topic("observation.environment_state") == "/observation/state"
|
||||
assert fv._foxglove_topic(ACTION) == "/action/state"
|
||||
assert fv._foxglove_topic("action.delta") == "/action/state"
|
||||
|
||||
|
||||
def test_labeled_scalars_uses_labels_then_index_fallback():
|
||||
assert fv._labeled_scalars("state", np.array([1.0, 2.0, 3.0])) == {
|
||||
"state_0": 1.0,
|
||||
"state_1": 2.0,
|
||||
"state_2": 3.0,
|
||||
}
|
||||
assert fv._labeled_scalars("state", [1.0, 2.0], ["pan", "lift"]) == {"pan": 1.0, "lift": 2.0}
|
||||
# Wrong-length labels fall back to index naming (never silently mislabels).
|
||||
assert fv._labeled_scalars("q", [1.0, 2.0], ["only_one"]) == {"q_0": 1.0, "q_1": 2.0}
|
||||
|
||||
|
||||
def test_frame_to_scalars_matches_live_labeling_and_handles_scalar():
|
||||
frame = {OBS_STATE: np.array([1.0, 2.0])}
|
||||
# No metadata -> {short_name}_{i}, identical to the live-stream fallback.
|
||||
assert fv._frame_to_scalars(frame, OBS_STATE) == fv._labeled_scalars("state", np.array([1.0, 2.0]))
|
||||
assert fv._frame_to_scalars(frame, OBS_STATE) == {"state_0": 1.0, "state_1": 2.0}
|
||||
# Metadata labels are honored.
|
||||
assert fv._frame_to_scalars(frame, OBS_STATE, ["pan", "lift"]) == {"pan": 1.0, "lift": 2.0}
|
||||
# A 0-d scalar becomes a single entry named by the short feature name.
|
||||
assert fv._frame_to_scalars({ACTION: np.array(5.0)}, ACTION) == {"action": 5.0}
|
||||
# A missing feature yields an empty mapping.
|
||||
assert fv._frame_to_scalars({}, OBS_STATE) == {}
|
||||
|
||||
|
||||
def test_feature_dim_names_formats():
|
||||
# Flat list of names.
|
||||
assert fv._feature_dim_names({"shape": [2], "names": ["x", "y"]}) == ["x", "y"]
|
||||
# Category mapping (dict of lists).
|
||||
assert fv._feature_dim_names({"shape": [2], "names": {"motors": ["m0", "m1"]}}) == ["m0", "m1"]
|
||||
# name -> index mapping (returned sorted by index).
|
||||
assert fv._feature_dim_names({"shape": [2], "names": {"delta_x": 0, "delta_y": 1}}) == [
|
||||
"delta_x",
|
||||
"delta_y",
|
||||
]
|
||||
# Bool values must NOT be treated as an index map (bool is a subclass of int).
|
||||
assert fv._feature_dim_names({"shape": [2], "names": {"a": True, "b": False}}) is None
|
||||
# Mismatched length -> None (won't silently mislabel).
|
||||
assert fv._feature_dim_names({"shape": [3], "names": ["x", "y"]}) is None
|
||||
# Missing / absent names -> None.
|
||||
assert fv._feature_dim_names(None) is None
|
||||
assert fv._feature_dim_names({"shape": [2]}) is None
|
||||
|
||||
|
||||
def test_is_scalar():
|
||||
assert fv._is_scalar(1.0)
|
||||
assert fv._is_scalar(np.float32(2.0))
|
||||
assert fv._is_scalar(np.array(3.0)) # 0-d array
|
||||
assert not fv._is_scalar(np.array([1.0, 2.0]))
|
||||
assert not fv._is_scalar("x")
|
||||
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
pytest.importorskip("rerun", reason="rerun-sdk is required (install lerobot[viz])")
|
||||
|
||||
from lerobot.types import TransitionKey
|
||||
from lerobot.utils.constants import OBS_STATE
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_rerun(monkeypatch):
|
||||
"""
|
||||
Provide a mock `rerun` module (and `rerun.blueprint` submodule) so tests don't
|
||||
depend on the real library. Also reload the module-under-test so it binds to
|
||||
this mock `rr`.
|
||||
"""
|
||||
calls = []
|
||||
blueprints = []
|
||||
|
||||
class DummyScalar:
|
||||
def __init__(self, value):
|
||||
# Scalars may be built from a single float or from a 1D array batch.
|
||||
self.value = value
|
||||
|
||||
class DummyImage:
|
||||
def __init__(self, arr):
|
||||
self.arr = arr
|
||||
|
||||
def compress(self, *a, **k):
|
||||
return self
|
||||
|
||||
class DummyDepthImage:
|
||||
def __init__(self, arr, colormap=None):
|
||||
self.arr = arr
|
||||
self.colormap = colormap
|
||||
|
||||
def dummy_log(key, obj=None, **kwargs):
|
||||
# Accept either positional `obj` or keyword `entity` and record remaining kwargs.
|
||||
if obj is None and "entity" in kwargs:
|
||||
obj = kwargs.pop("entity")
|
||||
calls.append((key, obj, kwargs))
|
||||
|
||||
def dummy_send_blueprint(blueprint, *a, **k):
|
||||
blueprints.append(blueprint)
|
||||
|
||||
# Mock the `rerun.blueprint` submodule used to build the layout.
|
||||
dummy_rrb = SimpleNamespace(
|
||||
Spatial2DView=lambda origin=None, name=None: SimpleNamespace(
|
||||
kind="Spatial2DView", origin=origin, name=name
|
||||
),
|
||||
TimeSeriesView=lambda name=None, contents=None: SimpleNamespace(
|
||||
kind="TimeSeriesView", name=name, contents=contents
|
||||
),
|
||||
Grid=lambda *views: SimpleNamespace(kind="Grid", views=list(views)),
|
||||
Blueprint=lambda root: SimpleNamespace(kind="Blueprint", root=root),
|
||||
)
|
||||
|
||||
dummy_rr = SimpleNamespace(
|
||||
__name__="rerun",
|
||||
__package__="rerun",
|
||||
__spec__=SimpleNamespace(name="rerun", submodule_search_locations=None),
|
||||
Scalars=DummyScalar,
|
||||
Image=DummyImage,
|
||||
DepthImage=DummyDepthImage,
|
||||
components=SimpleNamespace(Colormap=SimpleNamespace(Viridis="viridis")),
|
||||
log=dummy_log,
|
||||
send_blueprint=dummy_send_blueprint,
|
||||
init=lambda *a, **k: None,
|
||||
spawn=lambda *a, **k: None,
|
||||
blueprint=dummy_rrb,
|
||||
)
|
||||
|
||||
# Inject fake modules into sys.modules (both `rerun` and `rerun.blueprint`).
|
||||
monkeypatch.setitem(sys.modules, "rerun", dummy_rr)
|
||||
monkeypatch.setitem(sys.modules, "rerun.blueprint", dummy_rrb)
|
||||
|
||||
# Now import and reload the module under test, to bind to our rerun mock
|
||||
import lerobot.utils.rerun_visualization as rv
|
||||
|
||||
importlib.reload(rv)
|
||||
|
||||
# Expose the reloaded module, the call recorder and the captured blueprints
|
||||
yield rv, calls, blueprints
|
||||
|
||||
|
||||
def _keys(calls):
|
||||
"""Helper to extract just the keys logged to rr.log"""
|
||||
return [k for (k, _obj, _kw) in calls]
|
||||
|
||||
|
||||
def _obj_for(calls, key):
|
||||
"""Find the first object logged under a given key."""
|
||||
for k, obj, _kw in calls:
|
||||
if k == key:
|
||||
return obj
|
||||
raise KeyError(f"Key {key} not found in calls: {calls}")
|
||||
|
||||
|
||||
def _kwargs_for(calls, key):
|
||||
for k, _obj, kw in calls:
|
||||
if k == key:
|
||||
return kw
|
||||
raise KeyError(f"Key {key} not found in calls: {calls}")
|
||||
|
||||
|
||||
def _views_by_kind(blueprint, kind):
|
||||
"""Return the views of a given kind from the (single) blueprint's grid."""
|
||||
return [v for v in blueprint.root.views if v.kind == kind]
|
||||
|
||||
|
||||
def test_log_rerun_data_envtransition_scalars_and_image(mock_rerun):
|
||||
rv, calls, blueprints = mock_rerun
|
||||
|
||||
# Build EnvTransition dict
|
||||
obs = {
|
||||
f"{OBS_STATE}.temperature": np.float32(25.0),
|
||||
# CHW image should be converted to HWC for rr.Image
|
||||
"observation.camera": np.zeros((3, 10, 20), dtype=np.uint8),
|
||||
}
|
||||
act = {
|
||||
"action.throttle": 0.7,
|
||||
# 1D array should be logged as a single Scalars batch under one entity path
|
||||
"action.vector": np.array([1.0, 2.0], dtype=np.float32),
|
||||
}
|
||||
transition = {
|
||||
TransitionKey.OBSERVATION: obs,
|
||||
TransitionKey.ACTION: act,
|
||||
}
|
||||
|
||||
# Extract observation and action data from transition like in the real call sites
|
||||
obs_data = transition.get(TransitionKey.OBSERVATION, {})
|
||||
action_data = transition.get(TransitionKey.ACTION, {})
|
||||
rv.log_rerun_data(observation=obs_data, action=action_data)
|
||||
|
||||
# We expect:
|
||||
# - observation.state.temperature -> Scalars
|
||||
# - observation.camera -> Image (HWC) with static=True
|
||||
# - action.throttle -> Scalars
|
||||
# - action.vector -> single Scalars batch (no per-element suffix)
|
||||
expected_keys = {
|
||||
f"{OBS_STATE}.temperature",
|
||||
"observation.camera",
|
||||
"action.throttle",
|
||||
"action.vector",
|
||||
}
|
||||
assert set(_keys(calls)) == expected_keys
|
||||
|
||||
# Check scalar types and values
|
||||
temp_obj = _obj_for(calls, f"{OBS_STATE}.temperature")
|
||||
assert type(temp_obj).__name__ == "DummyScalar"
|
||||
assert float(temp_obj.value) == pytest.approx(25.0)
|
||||
|
||||
throttle_obj = _obj_for(calls, "action.throttle")
|
||||
assert type(throttle_obj).__name__ == "DummyScalar"
|
||||
assert float(throttle_obj.value) == pytest.approx(0.7)
|
||||
|
||||
# 1D vector logged as a single batched Scalars under one entity path
|
||||
vec = _obj_for(calls, "action.vector")
|
||||
assert type(vec).__name__ == "DummyScalar"
|
||||
np.testing.assert_allclose(np.asarray(vec.value), [1.0, 2.0])
|
||||
|
||||
# Check image handling: CHW -> HWC
|
||||
img_obj = _obj_for(calls, "observation.camera")
|
||||
assert type(img_obj).__name__ == "DummyImage"
|
||||
assert img_obj.arr.shape == (10, 20, 3) # transposed
|
||||
assert _kwargs_for(calls, "observation.camera").get("static", False) is True # static=True for images
|
||||
|
||||
# A blueprint should have been built and sent exactly once, and cached on the function.
|
||||
assert len(blueprints) == 1
|
||||
assert rv.log_rerun_data.blueprint is blueprints[0]
|
||||
|
||||
bp = blueprints[0]
|
||||
# One spatial view per image path
|
||||
spatial_views = _views_by_kind(bp, "Spatial2DView")
|
||||
assert {v.origin for v in spatial_views} == {"observation.camera"}
|
||||
|
||||
# One time-series view each for observation and action scalars
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert set(ts_views) == {"observation", "action"}
|
||||
assert ts_views["observation"].contents == [f"{OBS_STATE}.temperature"]
|
||||
assert ts_views["action"].contents == ["action.throttle", "action.vector"]
|
||||
|
||||
|
||||
def test_log_rerun_data_plain_list_ordering_and_prefixes(mock_rerun):
|
||||
rv, calls, blueprints = mock_rerun
|
||||
|
||||
# First dict without prefixes treated as observation
|
||||
# Second dict without prefixes treated as action
|
||||
obs_plain = {
|
||||
"temp": 1.5,
|
||||
# Already HWC image => should stay as-is
|
||||
"img": np.zeros((5, 6, 3), dtype=np.uint8),
|
||||
"none": None, # should be skipped
|
||||
}
|
||||
act_plain = {
|
||||
"throttle": 0.3,
|
||||
"vec": np.array([9, 8, 7], dtype=np.float32),
|
||||
}
|
||||
|
||||
# Extract observation and action data from list like the old function logic did
|
||||
# First dict was treated as observation, second as action
|
||||
rv.log_rerun_data(observation=obs_plain, action=act_plain)
|
||||
|
||||
# Expected keys with auto-prefixes. The 1D vector is a single batched Scalars.
|
||||
expected = {
|
||||
"observation.temp",
|
||||
"observation.img",
|
||||
"action.throttle",
|
||||
"action.vec",
|
||||
}
|
||||
logged = set(_keys(calls))
|
||||
assert logged == expected
|
||||
|
||||
# Scalars
|
||||
t = _obj_for(calls, "observation.temp")
|
||||
assert type(t).__name__ == "DummyScalar"
|
||||
assert float(t.value) == pytest.approx(1.5)
|
||||
|
||||
throttle = _obj_for(calls, "action.throttle")
|
||||
assert type(throttle).__name__ == "DummyScalar"
|
||||
assert float(throttle.value) == pytest.approx(0.3)
|
||||
|
||||
# Image stays HWC
|
||||
img = _obj_for(calls, "observation.img")
|
||||
assert type(img).__name__ == "DummyImage"
|
||||
assert img.arr.shape == (5, 6, 3)
|
||||
assert _kwargs_for(calls, "observation.img").get("static", False) is True
|
||||
|
||||
# Vector logged as a single batched Scalars under one entity path
|
||||
vec = _obj_for(calls, "action.vec")
|
||||
assert type(vec).__name__ == "DummyScalar"
|
||||
np.testing.assert_allclose(np.asarray(vec.value), [9, 8, 7])
|
||||
|
||||
# Blueprint sent once with the expected view layout
|
||||
assert len(blueprints) == 1
|
||||
bp = blueprints[0]
|
||||
spatial_views = _views_by_kind(bp, "Spatial2DView")
|
||||
assert {v.origin for v in spatial_views} == {"observation.img"}
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert ts_views["observation"].contents == ["observation.temp"]
|
||||
assert ts_views["action"].contents == ["action.throttle", "action.vec"]
|
||||
|
||||
|
||||
def test_log_rerun_data_kwargs_only(mock_rerun):
|
||||
rv, calls, blueprints = mock_rerun
|
||||
|
||||
rv.log_rerun_data(
|
||||
observation={"observation.temp": 10.0, "observation.gray": np.zeros((8, 8, 1), dtype=np.uint8)},
|
||||
action={"action.a": 1.0},
|
||||
)
|
||||
|
||||
keys = set(_keys(calls))
|
||||
assert "observation.temp" in keys
|
||||
assert "observation.gray" in keys
|
||||
assert "action.a" in keys
|
||||
|
||||
temp = _obj_for(calls, "observation.temp")
|
||||
assert type(temp).__name__ == "DummyScalar"
|
||||
assert float(temp.value) == pytest.approx(10.0)
|
||||
|
||||
img = _obj_for(calls, "observation.gray")
|
||||
assert type(img).__name__ == "DummyDepthImage" # single-channel -> DepthImage
|
||||
assert img.arr.shape == (8, 8, 1) # remains HWC
|
||||
assert _kwargs_for(calls, "observation.gray").get("static", False) is True
|
||||
|
||||
a = _obj_for(calls, "action.a")
|
||||
assert type(a).__name__ == "DummyScalar"
|
||||
assert float(a.value) == pytest.approx(1.0)
|
||||
|
||||
# Blueprint sent once, with a spatial view for the image and time-series views for scalars
|
||||
assert len(blueprints) == 1
|
||||
bp = blueprints[0]
|
||||
assert {v.origin for v in _views_by_kind(bp, "Spatial2DView")} == {"observation.gray"}
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert ts_views["observation"].contents == ["observation.temp"]
|
||||
assert ts_views["action"].contents == ["action.a"]
|
||||
|
||||
|
||||
def test_log_rerun_data_blueprint_sent_only_once(mock_rerun):
|
||||
"""The blueprint is built from the first call and not resent on subsequent calls."""
|
||||
rv, calls, blueprints = mock_rerun
|
||||
|
||||
rv.log_rerun_data(observation={"temp": 1.0}, action={"a": 2.0})
|
||||
assert len(blueprints) == 1
|
||||
first_blueprint = rv.log_rerun_data.blueprint
|
||||
|
||||
rv.log_rerun_data(observation={"temp": 3.0}, action={"a": 4.0})
|
||||
# Still only one blueprint, and the cached one is unchanged.
|
||||
assert len(blueprints) == 1
|
||||
assert rv.log_rerun_data.blueprint is first_blueprint
|
||||
@@ -14,297 +14,23 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
"""Tests for the backend-agnostic visualization dispatch.
|
||||
|
||||
These exercise the display-mode routing/validation only; they need neither ``rerun`` nor
|
||||
``foxglove`` installed since the unknown-mode branch raises before touching any backend. Backend
|
||||
behavior is covered in ``test_rerun_visualization.py`` and ``test_foxglove_visualization.py``.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
pytest.importorskip("rerun", reason="rerun-sdk is required (install lerobot[viz])")
|
||||
|
||||
from lerobot.types import TransitionKey
|
||||
from lerobot.utils.constants import OBS_STATE
|
||||
from lerobot.utils import visualization_utils as vu
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_rerun(monkeypatch):
|
||||
"""
|
||||
Provide a mock `rerun` module (and `rerun.blueprint` submodule) so tests don't
|
||||
depend on the real library. Also reload the module-under-test so it binds to
|
||||
this mock `rr`.
|
||||
"""
|
||||
calls = []
|
||||
blueprints = []
|
||||
|
||||
class DummyScalar:
|
||||
def __init__(self, value):
|
||||
# Scalars may be built from a single float or from a 1D array batch.
|
||||
self.value = value
|
||||
|
||||
class DummyImage:
|
||||
def __init__(self, arr):
|
||||
self.arr = arr
|
||||
|
||||
def compress(self, *a, **k):
|
||||
return self
|
||||
|
||||
class DummyDepthImage:
|
||||
def __init__(self, arr, colormap=None):
|
||||
self.arr = arr
|
||||
self.colormap = colormap
|
||||
|
||||
def dummy_log(key, obj=None, **kwargs):
|
||||
# Accept either positional `obj` or keyword `entity` and record remaining kwargs.
|
||||
if obj is None and "entity" in kwargs:
|
||||
obj = kwargs.pop("entity")
|
||||
calls.append((key, obj, kwargs))
|
||||
|
||||
def dummy_send_blueprint(blueprint, *a, **k):
|
||||
blueprints.append(blueprint)
|
||||
|
||||
# Mock the `rerun.blueprint` submodule used to build the layout.
|
||||
dummy_rrb = SimpleNamespace(
|
||||
Spatial2DView=lambda origin=None, name=None: SimpleNamespace(
|
||||
kind="Spatial2DView", origin=origin, name=name
|
||||
),
|
||||
TimeSeriesView=lambda name=None, contents=None: SimpleNamespace(
|
||||
kind="TimeSeriesView", name=name, contents=contents
|
||||
),
|
||||
Grid=lambda *views: SimpleNamespace(kind="Grid", views=list(views)),
|
||||
Blueprint=lambda root: SimpleNamespace(kind="Blueprint", root=root),
|
||||
)
|
||||
|
||||
dummy_rr = SimpleNamespace(
|
||||
__name__="rerun",
|
||||
__package__="rerun",
|
||||
__spec__=SimpleNamespace(name="rerun", submodule_search_locations=None),
|
||||
Scalars=DummyScalar,
|
||||
Image=DummyImage,
|
||||
DepthImage=DummyDepthImage,
|
||||
components=SimpleNamespace(Colormap=SimpleNamespace(Viridis="viridis")),
|
||||
log=dummy_log,
|
||||
send_blueprint=dummy_send_blueprint,
|
||||
init=lambda *a, **k: None,
|
||||
spawn=lambda *a, **k: None,
|
||||
blueprint=dummy_rrb,
|
||||
)
|
||||
|
||||
# Inject fake modules into sys.modules (both `rerun` and `rerun.blueprint`).
|
||||
monkeypatch.setitem(sys.modules, "rerun", dummy_rr)
|
||||
monkeypatch.setitem(sys.modules, "rerun.blueprint", dummy_rrb)
|
||||
|
||||
# Now import and reload the module under test, to bind to our rerun mock
|
||||
import lerobot.utils.visualization_utils as vu
|
||||
|
||||
importlib.reload(vu)
|
||||
|
||||
# Expose the reloaded module, the call recorder and the captured blueprints
|
||||
yield vu, calls, blueprints
|
||||
def test_visualization_modes():
|
||||
assert vu.VISUALIZATION_MODES == ("rerun", "foxglove")
|
||||
|
||||
|
||||
def _keys(calls):
|
||||
"""Helper to extract just the keys logged to rr.log"""
|
||||
return [k for (k, _obj, _kw) in calls]
|
||||
|
||||
|
||||
def _obj_for(calls, key):
|
||||
"""Find the first object logged under a given key."""
|
||||
for k, obj, _kw in calls:
|
||||
if k == key:
|
||||
return obj
|
||||
raise KeyError(f"Key {key} not found in calls: {calls}")
|
||||
|
||||
|
||||
def _kwargs_for(calls, key):
|
||||
for k, _obj, kw in calls:
|
||||
if k == key:
|
||||
return kw
|
||||
raise KeyError(f"Key {key} not found in calls: {calls}")
|
||||
|
||||
|
||||
def _views_by_kind(blueprint, kind):
|
||||
"""Return the views of a given kind from the (single) blueprint's grid."""
|
||||
return [v for v in blueprint.root.views if v.kind == kind]
|
||||
|
||||
|
||||
def test_log_rerun_data_envtransition_scalars_and_image(mock_rerun):
|
||||
vu, calls, blueprints = mock_rerun
|
||||
|
||||
# Build EnvTransition dict
|
||||
obs = {
|
||||
f"{OBS_STATE}.temperature": np.float32(25.0),
|
||||
# CHW image should be converted to HWC for rr.Image
|
||||
"observation.camera": np.zeros((3, 10, 20), dtype=np.uint8),
|
||||
}
|
||||
act = {
|
||||
"action.throttle": 0.7,
|
||||
# 1D array should be logged as a single Scalars batch under one entity path
|
||||
"action.vector": np.array([1.0, 2.0], dtype=np.float32),
|
||||
}
|
||||
transition = {
|
||||
TransitionKey.OBSERVATION: obs,
|
||||
TransitionKey.ACTION: act,
|
||||
}
|
||||
|
||||
# Extract observation and action data from transition like in the real call sites
|
||||
obs_data = transition.get(TransitionKey.OBSERVATION, {})
|
||||
action_data = transition.get(TransitionKey.ACTION, {})
|
||||
vu.log_rerun_data(observation=obs_data, action=action_data)
|
||||
|
||||
# We expect:
|
||||
# - observation.state.temperature -> Scalars
|
||||
# - observation.camera -> Image (HWC) with static=True
|
||||
# - action.throttle -> Scalars
|
||||
# - action.vector -> single Scalars batch (no per-element suffix)
|
||||
expected_keys = {
|
||||
f"{OBS_STATE}.temperature",
|
||||
"observation.camera",
|
||||
"action.throttle",
|
||||
"action.vector",
|
||||
}
|
||||
assert set(_keys(calls)) == expected_keys
|
||||
|
||||
# Check scalar types and values
|
||||
temp_obj = _obj_for(calls, f"{OBS_STATE}.temperature")
|
||||
assert type(temp_obj).__name__ == "DummyScalar"
|
||||
assert float(temp_obj.value) == pytest.approx(25.0)
|
||||
|
||||
throttle_obj = _obj_for(calls, "action.throttle")
|
||||
assert type(throttle_obj).__name__ == "DummyScalar"
|
||||
assert float(throttle_obj.value) == pytest.approx(0.7)
|
||||
|
||||
# 1D vector logged as a single batched Scalars under one entity path
|
||||
vec = _obj_for(calls, "action.vector")
|
||||
assert type(vec).__name__ == "DummyScalar"
|
||||
np.testing.assert_allclose(np.asarray(vec.value), [1.0, 2.0])
|
||||
|
||||
# Check image handling: CHW -> HWC
|
||||
img_obj = _obj_for(calls, "observation.camera")
|
||||
assert type(img_obj).__name__ == "DummyImage"
|
||||
assert img_obj.arr.shape == (10, 20, 3) # transposed
|
||||
assert _kwargs_for(calls, "observation.camera").get("static", False) is True # static=True for images
|
||||
|
||||
# A blueprint should have been built and sent exactly once, and cached on the function.
|
||||
assert len(blueprints) == 1
|
||||
assert vu.log_rerun_data.blueprint is blueprints[0]
|
||||
|
||||
bp = blueprints[0]
|
||||
# One spatial view per image path
|
||||
spatial_views = _views_by_kind(bp, "Spatial2DView")
|
||||
assert {v.origin for v in spatial_views} == {"observation.camera"}
|
||||
|
||||
# One time-series view each for observation and action scalars
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert set(ts_views) == {"observation", "action"}
|
||||
assert ts_views["observation"].contents == [f"{OBS_STATE}.temperature"]
|
||||
assert ts_views["action"].contents == ["action.throttle", "action.vector"]
|
||||
|
||||
|
||||
def test_log_rerun_data_plain_list_ordering_and_prefixes(mock_rerun):
|
||||
vu, calls, blueprints = mock_rerun
|
||||
|
||||
# First dict without prefixes treated as observation
|
||||
# Second dict without prefixes treated as action
|
||||
obs_plain = {
|
||||
"temp": 1.5,
|
||||
# Already HWC image => should stay as-is
|
||||
"img": np.zeros((5, 6, 3), dtype=np.uint8),
|
||||
"none": None, # should be skipped
|
||||
}
|
||||
act_plain = {
|
||||
"throttle": 0.3,
|
||||
"vec": np.array([9, 8, 7], dtype=np.float32),
|
||||
}
|
||||
|
||||
# Extract observation and action data from list like the old function logic did
|
||||
# First dict was treated as observation, second as action
|
||||
vu.log_rerun_data(observation=obs_plain, action=act_plain)
|
||||
|
||||
# Expected keys with auto-prefixes. The 1D vector is a single batched Scalars.
|
||||
expected = {
|
||||
"observation.temp",
|
||||
"observation.img",
|
||||
"action.throttle",
|
||||
"action.vec",
|
||||
}
|
||||
logged = set(_keys(calls))
|
||||
assert logged == expected
|
||||
|
||||
# Scalars
|
||||
t = _obj_for(calls, "observation.temp")
|
||||
assert type(t).__name__ == "DummyScalar"
|
||||
assert float(t.value) == pytest.approx(1.5)
|
||||
|
||||
throttle = _obj_for(calls, "action.throttle")
|
||||
assert type(throttle).__name__ == "DummyScalar"
|
||||
assert float(throttle.value) == pytest.approx(0.3)
|
||||
|
||||
# Image stays HWC
|
||||
img = _obj_for(calls, "observation.img")
|
||||
assert type(img).__name__ == "DummyImage"
|
||||
assert img.arr.shape == (5, 6, 3)
|
||||
assert _kwargs_for(calls, "observation.img").get("static", False) is True
|
||||
|
||||
# Vector logged as a single batched Scalars under one entity path
|
||||
vec = _obj_for(calls, "action.vec")
|
||||
assert type(vec).__name__ == "DummyScalar"
|
||||
np.testing.assert_allclose(np.asarray(vec.value), [9, 8, 7])
|
||||
|
||||
# Blueprint sent once with the expected view layout
|
||||
assert len(blueprints) == 1
|
||||
bp = blueprints[0]
|
||||
spatial_views = _views_by_kind(bp, "Spatial2DView")
|
||||
assert {v.origin for v in spatial_views} == {"observation.img"}
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert ts_views["observation"].contents == ["observation.temp"]
|
||||
assert ts_views["action"].contents == ["action.throttle", "action.vec"]
|
||||
|
||||
|
||||
def test_log_rerun_data_kwargs_only(mock_rerun):
|
||||
vu, calls, blueprints = mock_rerun
|
||||
|
||||
vu.log_rerun_data(
|
||||
observation={"observation.temp": 10.0, "observation.gray": np.zeros((8, 8, 1), dtype=np.uint8)},
|
||||
action={"action.a": 1.0},
|
||||
)
|
||||
|
||||
keys = set(_keys(calls))
|
||||
assert "observation.temp" in keys
|
||||
assert "observation.gray" in keys
|
||||
assert "action.a" in keys
|
||||
|
||||
temp = _obj_for(calls, "observation.temp")
|
||||
assert type(temp).__name__ == "DummyScalar"
|
||||
assert float(temp.value) == pytest.approx(10.0)
|
||||
|
||||
img = _obj_for(calls, "observation.gray")
|
||||
assert type(img).__name__ == "DummyDepthImage" # single-channel -> DepthImage
|
||||
assert img.arr.shape == (8, 8, 1) # remains HWC
|
||||
assert _kwargs_for(calls, "observation.gray").get("static", False) is True
|
||||
|
||||
a = _obj_for(calls, "action.a")
|
||||
assert type(a).__name__ == "DummyScalar"
|
||||
assert float(a.value) == pytest.approx(1.0)
|
||||
|
||||
# Blueprint sent once, with a spatial view for the image and time-series views for scalars
|
||||
assert len(blueprints) == 1
|
||||
bp = blueprints[0]
|
||||
assert {v.origin for v in _views_by_kind(bp, "Spatial2DView")} == {"observation.gray"}
|
||||
ts_views = {v.name: v for v in _views_by_kind(bp, "TimeSeriesView")}
|
||||
assert ts_views["observation"].contents == ["observation.temp"]
|
||||
assert ts_views["action"].contents == ["action.a"]
|
||||
|
||||
|
||||
def test_log_rerun_data_blueprint_sent_only_once(mock_rerun):
|
||||
"""The blueprint is built from the first call and not resent on subsequent calls."""
|
||||
vu, calls, blueprints = mock_rerun
|
||||
|
||||
vu.log_rerun_data(observation={"temp": 1.0}, action={"a": 2.0})
|
||||
assert len(blueprints) == 1
|
||||
first_blueprint = vu.log_rerun_data.blueprint
|
||||
|
||||
vu.log_rerun_data(observation={"temp": 3.0}, action={"a": 4.0})
|
||||
# Still only one blueprint, and the cached one is unchanged.
|
||||
assert len(blueprints) == 1
|
||||
assert vu.log_rerun_data.blueprint is first_blueprint
|
||||
@pytest.mark.parametrize("func", ["init_visualization", "log_visualization_data", "shutdown_visualization"])
|
||||
def test_dispatch_rejects_unknown_mode(func):
|
||||
with pytest.raises(ValueError, match="Unknown display_mode"):
|
||||
getattr(vu, func)("bogus")
|
||||
|
||||
@@ -1550,6 +1550,26 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/47/c99d5268f354002ce80f8d029cd9d7d872969da1de8b93d32de4dc56d6f4/fonttools-4.63.0-py3-none-any.whl", hash = "sha256:445af2eab030a16b9171ea8bdda7ebf7d96bda2df88ee182a464252f6e05e20d", size = 1164562, upload-time = "2026-05-14T12:04:29.092Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foxglove-sdk"
|
||||
version = "0.25.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c1/a7/86a252782ea0d9baf1357369ad1bbf1ed644768702b0266a3fa3a05361d0/foxglove_sdk-0.25.1.tar.gz", hash = "sha256:8230f3c32ea3ab715818687377491594ec9c7e58e6b0ed8ed91aadf937ce706b", size = 547778, upload-time = "2026-06-02T03:13:18.942Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/58/15/59f02e8201b8da09ce05d8774820c29efc9149862b70ee6b3a27968e791a/foxglove_sdk-0.25.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5af9f9a691eefbe6e0a47875ff2f7d0fc36607f0920e8690bbdc2dfd4fb22451", size = 17911538, upload-time = "2026-06-02T03:13:12.493Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/ed/16d809fab24cbfdf97c15c9cdd80eabfeb447ca545ede426950d62bac848/foxglove_sdk-0.25.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:3e908bd87d1926a05c785779d8252db6b87eef685f284ec1cf46ee501645d08e", size = 16452309, upload-time = "2026-06-02T03:13:10.607Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/c3/f95874935a3436841487df1f0202de4d20eabc0adb6b79c94c531bbe7eb3/foxglove_sdk-0.25.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:968e32c8668d172f6b546c8e7af658ed35a21ec165adc3bacf53a04dda159f12", size = 2355680, upload-time = "2026-06-02T02:34:01.668Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/38/da/ad22d8d6e3fedde9fc0c49aa8b20394e5e0bc44ab3fba564c77a64ddc7e2/foxglove_sdk-0.25.1-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f75374fedafe259c40b19bc645589d9453708eab679a5b07c603035f936d29a", size = 2274075, upload-time = "2026-06-02T02:34:07.212Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/fa/1254adb5e72eff507695473e9c82d0e90395b61463e5353762250db30d3d/foxglove_sdk-0.25.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b6d3af517a00342bf7b08a4a65b043f3eafaa197138752b6fbd704fb91043fa", size = 2282160, upload-time = "2026-06-02T02:34:08.812Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/e4/2b22ef06ba4058494c7aa35974d138f8f1ae4cf5273f77d69c9dc3a99b45/foxglove_sdk-0.25.1-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:aed27c0f03a45fd6abdd566498bfee2672391602bcff32c827b8e3a6d8f67ab1", size = 22685338, upload-time = "2026-06-02T02:34:04.688Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/7c/58324c99b80eef0b674c8d4f5c2e07c66fd1480a27a8f0d4d79371805111/foxglove_sdk-0.25.1-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:419dd8308e3f91e2ae487b727f1bf1804642990876163b2a353db4a1b1de1425", size = 19326096, upload-time = "2026-06-02T02:34:10.939Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fe/9c/3452d92959e05fc6b1c1e5f032605d55623aeb6704357d20408f8781bc84/foxglove_sdk-0.25.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0fcb36e628ab3d9043e193f12ad4dbbb955fe18616aac7ef5bca82c52910f108", size = 2539020, upload-time = "2026-06-02T03:13:14.365Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/af/57fa58525d3acb5c5480a6f0ef86450b1a0ccae2b21248edb1376073ce55/foxglove_sdk-0.25.1-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:7909fd9f94935935dd8813702d84ffdbfebeb3866673c618ce35e8cfedd03029", size = 2550999, upload-time = "2026-06-02T03:13:15.715Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/90/78/f74bb167186c965d475ff360fa6eb7441d5ac6c6239d60f542f63984f849/foxglove_sdk-0.25.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:69d5966213b5212b8841b4004fe582db924a74f1610d8452ad890f6931702926", size = 2560166, upload-time = "2026-06-02T03:13:17.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/83/1c4c6d04fbd4784fe44fb2da021db1adf1f03a371f1e5679a383c1173235/foxglove_sdk-0.25.1-cp310-abi3-win32.whl", hash = "sha256:2a1121a5c74590ff6e61628c4a46dc57d392d290b4beeb29d6852933da56224a", size = 1618124, upload-time = "2026-06-02T03:13:20.158Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/4d/bdb9e252a41a951eb53908ac9cb965b7480c3ba649174f5398d4fcf0ca1d/foxglove_sdk-0.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:6ed3ad0d3e72cd7875e7e293709c5ff90494fe14f1b48a336baffc313a7272cc", size = 16588452, upload-time = "2026-06-02T03:13:21.636Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fqdn"
|
||||
version = "1.5.1"
|
||||
@@ -2811,6 +2831,7 @@ all = [
|
||||
{ name = "faker" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "feetech-servo-sdk" },
|
||||
{ name = "foxglove-sdk" },
|
||||
{ name = "grpcio" },
|
||||
{ name = "grpcio-tools" },
|
||||
{ name = "gym-aloha" },
|
||||
@@ -2895,6 +2916,7 @@ core-scripts = [
|
||||
{ name = "av" },
|
||||
{ name = "datasets" },
|
||||
{ name = "deepdiff" },
|
||||
{ name = "foxglove-sdk" },
|
||||
{ name = "jsonlines" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pyarrow" },
|
||||
@@ -2917,6 +2939,7 @@ dataset = [
|
||||
dataset-viz = [
|
||||
{ name = "av" },
|
||||
{ name = "datasets" },
|
||||
{ name = "foxglove-sdk" },
|
||||
{ name = "jsonlines" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pyarrow" },
|
||||
@@ -3187,6 +3210,7 @@ video-benchmark = [
|
||||
{ name = "scikit-image" },
|
||||
]
|
||||
viz = [
|
||||
{ name = "foxglove-sdk" },
|
||||
{ name = "rerun-sdk" },
|
||||
]
|
||||
vla-jepa = [
|
||||
@@ -3226,6 +3250,7 @@ requires-dist = [
|
||||
{ name = "fastapi", marker = "extra == 'phone'", specifier = "<1.0" },
|
||||
{ name = "feetech-servo-sdk", marker = "extra == 'feetech'", specifier = ">=1.0.0,<2.0.0" },
|
||||
{ name = "flash-attn", marker = "sys_platform != 'darwin' and extra == 'groot'", specifier = ">=2.5.9,<3.0.0" },
|
||||
{ name = "foxglove-sdk", marker = "extra == 'viz'", specifier = ">=0.25.1,<0.26.0" },
|
||||
{ name = "grpcio", marker = "extra == 'grpcio-dep'", specifier = ">=1.73.1,<2.0.0" },
|
||||
{ name = "grpcio", marker = "extra == 'reachy2'", specifier = "<=1.73.1" },
|
||||
{ name = "grpcio-tools", marker = "extra == 'dev'", specifier = ">=1.73.1,<2.0.0" },
|
||||
|
||||
Reference in New Issue
Block a user