Add Foxglove display mode for teleoperate

Add a --display_mode flag (rerun|foxglove) to lerobot-teleoperate. When set
to foxglove, stream observations/actions over a Foxglove WebSocket server:
images as RawImage/CompressedImage, scalars as typed JSON channels with
schemas generated from the feature names (sanitized so paths don't need
quoting). Adds a `foxglove` extra.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Roman Shtylman
2026-05-28 19:22:00 -07:00
committed by CarolinePascal
parent 2f2b567951
commit 3062bf0548
3 changed files with 274 additions and 9 deletions
+3
View File
@@ -126,6 +126,9 @@ hardware = [
viz = [
"rerun-sdk>=0.24.0,<0.34.0",
]
foxglove = [
"foxglove-sdk>=0.24.0,<1.0.0",
]
# ── User-facing composite extras (map to CLI scripts) ─────
# lerobot-record, lerobot-replay, lerobot-calibrate, lerobot-teleoperate, etc.
core_scripts = ["lerobot[dataset]", "lerobot[hardware]", "lerobot[viz]"]
+39 -9
View File
@@ -31,6 +31,22 @@ lerobot-teleoperate \
--display_data=true
```
To stream the data to Foxglove instead of Rerun, add ``--display_mode=foxglove`` (then connect the
Foxglove app to ``ws://127.0.0.1:8765``; override the port with ``--display_port=<port>``):
```shell
lerobot-teleoperate \
--robot.type=so101_follower \
--robot.port=/dev/tty.usbmodem58760431541 \
--robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \
--robot.id=black \
--teleop.type=so101_leader \
--teleop.port=/dev/tty.usbmodem58760431551 \
--teleop.id=blue \
--display_data=true \
--display_mode=foxglove
```
Example teleoperation with bimanual so100:
```shell
@@ -108,7 +124,11 @@ from lerobot.teleoperators import ( # noqa: F401
from lerobot.utils.import_utils import register_third_party_plugins
from lerobot.utils.robot_utils import precise_sleep
from lerobot.utils.utils import init_logging, move_cursor_up
from lerobot.utils.visualization_utils import init_rerun, log_rerun_data, shutdown_rerun
from lerobot.utils.visualization_utils import (
init_visualization,
log_visualization_data,
shutdown_visualization,
)
@dataclass
@@ -121,11 +141,14 @@ class TeleoperateConfig:
teleop_time_s: float | None = None
# Display all cameras on screen
display_data: bool = False
# Display data on a remote Rerun server
# Visualization backend used when display_data is True: "rerun" or "foxglove".
# "foxglove" starts a WebSocket server (default ws://127.0.0.1:8765) to stream data to the Foxglove app.
display_mode: str = "rerun"
# For "rerun": IP of a remote Rerun server to connect to. Unused by "foxglove".
display_ip: str | None = None
# Port of the remote Rerun server
# For "rerun": port of the remote Rerun server. For "foxglove": port to bind the WebSocket server to.
display_port: int | None = None
# Whether to display compressed images in Rerun
# Whether to display compressed (JPEG) images instead of raw frames
display_compressed_images: bool = False
@@ -137,6 +160,7 @@ def teleop_loop(
robot_action_processor: RobotProcessorPipeline[tuple[RobotAction, RobotObservation], RobotAction],
robot_observation_processor: RobotProcessorPipeline[RobotObservation, RobotObservation],
display_data: bool = False,
display_mode: str = "rerun",
duration: float | None = None,
display_compressed_images: bool = False,
):
@@ -149,8 +173,10 @@ def teleop_loop(
teleop: The teleoperator device instance providing control actions.
robot: The robot instance being controlled.
fps: The target frequency for the control loop in frames per second.
display_data: If True, fetches robot observations and displays them in the console and Rerun.
display_compressed_images: If True, compresses images before sending them to Rerun for display.
display_data: If True, fetches robot observations and displays them in the console and the
visualization backend.
display_mode: Visualization backend to use when display_data is True ("rerun" or "foxglove").
display_compressed_images: If True, compresses images before sending them to the backend for display.
duration: The maximum duration of the teleoperation loop in seconds. If None, the loop runs indefinitely.
teleop_action_processor: An optional pipeline to process raw actions from the teleoperator.
robot_action_processor: An optional pipeline to process actions before they are sent to the robot.
@@ -187,7 +213,8 @@ def teleop_loop(
# Process robot observation through pipeline
obs_transition = robot_observation_processor(obs)
log_rerun_data(
log_visualization_data(
display_mode,
observation=obs_transition,
action=teleop_action,
compress_images=display_compressed_images,
@@ -215,7 +242,9 @@ def teleoperate(cfg: TeleoperateConfig):
init_logging()
logging.info(pformat(asdict(cfg)))
if cfg.display_data:
init_rerun(session_name="teleoperation", ip=cfg.display_ip, port=cfg.display_port)
init_visualization(
cfg.display_mode, session_name="teleoperation", ip=cfg.display_ip, port=cfg.display_port
)
display_compressed_images = (
True
if (cfg.display_data and cfg.display_ip is not None and cfg.display_port is not None)
@@ -235,6 +264,7 @@ def teleoperate(cfg: TeleoperateConfig):
robot=robot,
fps=cfg.fps,
display_data=cfg.display_data,
display_mode=cfg.display_mode,
duration=cfg.teleop_time_s,
teleop_action_processor=teleop_action_processor,
robot_action_processor=robot_action_processor,
@@ -245,7 +275,7 @@ def teleoperate(cfg: TeleoperateConfig):
pass
finally:
if cfg.display_data:
shutdown_rerun()
shutdown_visualization(cfg.display_mode)
teleop.disconnect()
robot.disconnect()
+232
View File
@@ -14,6 +14,7 @@
import numbers
import os
import time
import numpy as np
@@ -22,6 +23,12 @@ from lerobot.types import RobotAction, RobotObservation
from .constants import ACTION, ACTION_PREFIX, OBS_PREFIX, OBS_STR
from .import_utils import require_package
# Module-level Foxglove state. A single WebSocket server is shared for the
# process lifetime, and image channels are cached by topic (the Foxglove SDK
# requires reusing one channel per topic).
_foxglove_server = None
_foxglove_channels: dict = {}
def init_rerun(
session_name: str = "lerobot_control_loop", ip: str | None = None, port: int | None = None
@@ -59,6 +66,37 @@ def shutdown_rerun() -> None:
rr.rerun_shutdown()
def init_foxglove(host: str = "127.0.0.1", port: int | None = 8765) -> None:
"""
Starts a Foxglove WebSocket server for visualizing the control loop.
Connect to it from the Foxglove app at ``ws://<host>:<port>``. Calling this
more than once is a no-op while a server is already running.
Args:
host: Host interface to bind the WebSocket server to.
port: Port to bind the WebSocket server to (defaults to 8765).
"""
require_package("foxglove-sdk", extra="foxglove", import_name="foxglove")
import foxglove
global _foxglove_server
if _foxglove_server is not None:
return
_foxglove_server = foxglove.start_server(host=host, port=port or 8765)
def shutdown_foxglove() -> None:
"""Stops the Foxglove WebSocket server and clears cached channels."""
global _foxglove_server
if _foxglove_server is not None:
_foxglove_server.stop()
_foxglove_server = None
_foxglove_channels.clear()
def _is_scalar(x):
return isinstance(x, (float | numbers.Real | np.integer | np.floating)) or (
isinstance(x, np.ndarray) and x.ndim == 0
@@ -100,6 +138,41 @@ def _ensure_blueprint(observation_paths: set[str], action_paths: set[str], image
rr.send_blueprint(blueprint)
def _foxglove_safe_name(name: str) -> str:
"""Make a feature name usable as an unquoted Foxglove message path / topic segment.
Foxglove message paths treat ``.`` as a field separator, so ``shoulder_pan.pos`` would have to be
written as ``"shoulder_pan.pos"`` when plotting. Replacing ``.`` with ``_`` avoids the quoting.
"""
return name.replace(".", "_")
def _log_foxglove_scalars(topic: str, schema_name: str, values: dict[str, float]) -> None:
"""Log a flat dict of scalars on a typed JSON channel, building the schema on first use.
The schema is derived from the keys of the first message (stable for a given robot/session) so
Foxglove offers message-path autocomplete. ``additionalProperties`` keeps it permissive if a later
message carries extra keys.
"""
if not values:
return
import foxglove
channel = _foxglove_channels.get(topic)
if channel is None:
schema = {
"type": "object",
"title": schema_name,
"properties": {name: {"type": "number"} for name in values},
"additionalProperties": {"type": "number"},
}
channel = _foxglove_channels[topic] = foxglove.Channel(topic, schema=schema, message_encoding="json")
channel.log(values)
def log_rerun_data(
observation: RobotObservation | None = None,
action: RobotAction | None = None,
@@ -175,3 +248,162 @@ def log_rerun_data(
action_paths.add(key)
_ensure_blueprint(observation_paths, action_paths, image_paths)
def log_foxglove_data(
observation: RobotObservation | None = None,
action: RobotAction | None = None,
compress_images: bool = False,
) -> None:
"""
Logs observation and action data to a Foxglove WebSocket server for real-time visualization.
Mirrors :func:`log_rerun_data` but emits Foxglove messages over the server started by
:func:`init_foxglove`. Data is mapped as follows:
- Scalars (and elements of 1D arrays) are accumulated per source and logged on the
``/observation/state`` and ``/action/state`` topics as typed JSON messages. Each topic gets a
schema generated from its field names so Foxglove provides message-path autocomplete. Field names
are sanitized (``.`` -> ``_``) so they don't need quoting when plotting.
- 3D NumPy arrays that resemble images are transposed from CHW to HWC when needed and logged on a
per-source topic (e.g. ``/observation/images/front``) as a ``RawImage`` (or a JPEG
``CompressedImage`` when ``compress_images`` is True).
Args:
observation: An optional dictionary containing observation data to log.
action: An optional dictionary containing action data to log.
compress_images: Whether to JPEG-compress images before logging to save bandwidth in exchange
for CPU and quality.
"""
require_package("foxglove-sdk", extra="foxglove", import_name="foxglove")
from foxglove.channels import CompressedImageChannel, RawImageChannel
from foxglove.messages import CompressedImage, RawImage, Timestamp
if _foxglove_server is None:
raise RuntimeError("init_foxglove() must be called before log_foxglove_data().")
now = time.time_ns()
timestamp = Timestamp(sec=now // 1_000_000_000, nsec=now % 1_000_000_000)
def log_image(topic: str, frame_id: str, arr: np.ndarray) -> None:
# Convert CHW -> HWC when needed (mirrors log_rerun_data).
if arr.ndim == 3 and arr.shape[0] in (1, 3, 4) and arr.shape[-1] not in (1, 3, 4):
arr = np.transpose(arr, (1, 2, 0))
height, width = arr.shape[0], arr.shape[1]
channels = 1 if arr.ndim == 2 else arr.shape[2]
if compress_images and channels == 3:
import cv2
# Camera frames are RGB; cv2.imencode assumes BGR, so swap to keep colors correct.
_, buf = cv2.imencode(".jpg", cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
channel = _foxglove_channels.get(topic)
if channel is None:
channel = _foxglove_channels[topic] = CompressedImageChannel(topic=topic)
channel.log(
CompressedImage(timestamp=timestamp, frame_id=frame_id, data=buf.tobytes(), format="jpeg")
)
return
encoding = {1: "mono8", 3: "rgb8", 4: "rgba8"}.get(channels)
if encoding is None:
return
arr = np.ascontiguousarray(arr, dtype=np.uint8)
channel = _foxglove_channels.get(topic)
if channel is None:
channel = _foxglove_channels[topic] = RawImageChannel(topic=topic)
channel.log(
RawImage(
timestamp=timestamp,
frame_id=frame_id,
width=width,
height=height,
encoding=encoding,
step=width * channels,
data=arr.tobytes(),
)
)
if observation:
obs_scalars: dict[str, float] = {}
for k, v in observation.items():
if v is None:
continue
key = _foxglove_safe_name(k[len(OBS_PREFIX) :] if str(k).startswith(OBS_PREFIX) else str(k))
if _is_scalar(v):
obs_scalars[key] = float(v)
elif isinstance(v, np.ndarray):
if v.ndim == 1:
for i, vi in enumerate(v):
obs_scalars[f"{key}_{i}"] = float(vi)
else:
log_image(f"/{OBS_STR}/images/{key}", key, v)
_log_foxglove_scalars(f"/{OBS_STR}/state", "lerobot.Observation", obs_scalars)
if action:
action_scalars: dict[str, float] = {}
for k, v in action.items():
if v is None:
continue
key = _foxglove_safe_name(k[len(ACTION_PREFIX) :] if str(k).startswith(ACTION_PREFIX) else str(k))
if _is_scalar(v):
action_scalars[key] = float(v)
elif isinstance(v, np.ndarray):
for i, vi in enumerate(v.flatten()):
action_scalars[f"{key}_{i}"] = float(vi)
_log_foxglove_scalars(f"/{ACTION}/state", "lerobot.Action", action_scalars)
# ── Backend-agnostic dispatch ─────────────────────────────────────────────
# These let callers select a visualization backend at runtime via a string
# (e.g. a `--display_mode` CLI flag) without branching on the backend everywhere.
VISUALIZATION_MODES = ("rerun", "foxglove")
def init_visualization(
display_mode: str,
*,
session_name: str = "lerobot_control_loop",
ip: str | None = None,
port: int | None = None,
) -> None:
"""Initializes the visualization backend selected by ``display_mode``.
For ``"rerun"``, ``ip``/``port`` point at an optional remote Rerun server. For ``"foxglove"``,
``port`` is the local WebSocket server port (``ip`` is ignored; the server binds locally).
"""
if display_mode == "rerun":
init_rerun(session_name=session_name, ip=ip, port=port)
elif display_mode == "foxglove":
init_foxglove(port=port)
else:
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")
def log_visualization_data(
display_mode: str,
observation: RobotObservation | None = None,
action: RobotAction | None = None,
compress_images: bool = False,
) -> None:
"""Logs observation/action data to the backend selected by ``display_mode``."""
if display_mode == "rerun":
log_rerun_data(observation=observation, action=action, compress_images=compress_images)
elif display_mode == "foxglove":
log_foxglove_data(observation=observation, action=action, compress_images=compress_images)
else:
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")
def shutdown_visualization(display_mode: str) -> None:
"""Shuts down the backend selected by ``display_mode``."""
if display_mode == "rerun":
shutdown_rerun()
elif display_mode == "foxglove":
shutdown_foxglove()
else:
raise ValueError(f"Unknown display_mode '{display_mode}'. Expected one of {VISUALIZATION_MODES}.")