From af0294198a2eeb6f2c150046fc1976e7b55687c5 Mon Sep 17 00:00:00 2001
From: CarolinePascal <caroline8.pascal@gmail.com>
Date: Fri, 9 May 2025 18:00:30 +0200
Subject: [PATCH] [skip ci] feat(visualize audio): adding audio recordings
 visualization in rerun

---
 examples/lekiwi/teleoperate.py             |  2 +-
 src/lerobot/scripts/lerobot_record.py      |  7 ++
 src/lerobot/scripts/lerobot_teleoperate.py |  6 ++
 src/lerobot/utils/visualization_utils.py   | 85 +++++++++++++++++++++-
 4 files changed, 97 insertions(+), 3 deletions(-)

diff --git a/examples/lekiwi/teleoperate.py b/examples/lekiwi/teleoperate.py
index feb3cbb01..102bd31f5 100644
--- a/examples/lekiwi/teleoperate.py
+++ b/examples/lekiwi/teleoperate.py
@@ -43,7 +43,7 @@ def main():
     keyboard.connect()
 
     # Init rerun viewer
-    init_rerun(session_name="lekiwi_teleop")
+    init_rerun(session_name="lekiwi_teleop", robot=robot, reset_time=True)
 
     if not robot.is_connected or not leader_arm.is_connected or not keyboard.is_connected:
         raise ValueError("Robot or teleop is not connected!")
diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py
index 684b299af..af0ce34d9 100644
--- a/src/lerobot/scripts/lerobot_record.py
+++ b/src/lerobot/scripts/lerobot_record.py
@@ -285,6 +285,13 @@ def record_loop(
     display_data: bool = False,
     display_compressed_images: bool = False,
 ):
+    if display_data:
+        init_rerun(
+            session_name="recording",
+            robot=robot,
+            reset_time=True,
+        )
+
     if dataset is not None and dataset.fps != fps:
         raise ValueError(f"The dataset fps should be equal to requested fps ({dataset.fps} != {fps}).")
 
diff --git a/src/lerobot/scripts/lerobot_teleoperate.py b/src/lerobot/scripts/lerobot_teleoperate.py
index 99a99b4df..2115a5fc2 100644
--- a/src/lerobot/scripts/lerobot_teleoperate.py
+++ b/src/lerobot/scripts/lerobot_teleoperate.py
@@ -143,6 +143,12 @@ def teleop_loop(
         robot_action_processor: An optional pipeline to process actions before they are sent to the robot.
         robot_observation_processor: An optional pipeline to process raw observations from the robot.
     """
+    if display_data:
+        init_rerun(
+            session_name="teleoperation",
+            robot=robot,
+            reset_time=True,
+        )
 
     display_len = max(len(key) for key in robot.action_features)
 
diff --git a/src/lerobot/utils/visualization_utils.py b/src/lerobot/utils/visualization_utils.py
index 31ca8d247..05a30df3e 100644
--- a/src/lerobot/utils/visualization_utils.py
+++ b/src/lerobot/utils/visualization_utils.py
@@ -14,17 +14,24 @@
 
 import numbers
 import os
+from uuid import uuid4
 
 import numpy as np
 import rerun as rr
 
+from lerobot.datasets.utils import DEFAULT_AUDIO_CHUNK_DURATION
 from lerobot.processor import RobotAction, RobotObservation
+from lerobot.robots import Robot
 
 from .constants import ACTION, ACTION_PREFIX, OBS_PREFIX, OBS_STR
 
 
 def init_rerun(
-    session_name: str = "lerobot_control_loop", ip: str | None = None, port: int | None = None
+    session_name: str = "lerobot_control_loop",
+    ip: str | None = None,
+    port: int | None = None,
+    robot: Robot | None = None,
+    reset_time: bool = False,
 ) -> None:
     """
     Initializes the Rerun SDK for visualizing the control loop.
@@ -33,16 +40,25 @@ def init_rerun(
         session_name: Name of the Rerun session.
         ip: Optional IP for connecting to a Rerun server.
         port: Optional port for connecting to a Rerun server.
+        robot: A Robot object. If provided, Rerun will be initialized with a blueprint that includes the object's cameras and microphones.
+        reset_time: Whether to reset the timer "episode_time" to 0.
     """
     batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
     os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
-    rr.init(session_name)
+    rr.init(
+        application_id=session_name,
+        recording_id=uuid4(),
+        default_blueprint=build_rerun_blueprint(robot) if robot is not None else None,
+    )
     memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
     if ip and port:
         rr.connect_grpc(url=f"rerun+http://{ip}:{port}/proxy")
     else:
         rr.spawn(memory_limit=memory_limit)
 
+    if reset_time:
+        rr.set_time_seconds("episode_time", seconds=0.0)
+
 
 def _is_scalar(x):
     return isinstance(x, (float | numbers.Real | np.integer | np.floating)) or (
@@ -50,10 +66,47 @@ def _is_scalar(x):
     )
 
 
+def build_rerun_blueprint(robot: Robot) -> rr.blueprint.Grid:
+    """ "
+    Builds a Rerun blueprint for optimized visualization of the robot's observations and actions :
+    -   Time series views for all scalar observations and actions (e.g. position, velocity, torque, etc.).
+    -   Spatial 2D views for all camera observations.
+    -   Time series views for all microphone observations.
+
+    Args:
+        robot: A Robot object.
+    Returns:
+        A Rerun blueprint.
+    """
+    contents = [
+        rr.blueprint.TimeSeriesView(
+            origin="states_actions",
+            plot_legend=rr.blueprint.PlotLegend(visible=True),
+        )
+    ]
+    if robot.microphones:
+        contents += [
+            rr.blueprint.TimeSeriesView(
+                origin="microphones",
+                plot_legend=rr.blueprint.PlotLegend(visible=True),
+            )
+        ]
+    if robot.cameras:
+        contents += [
+            rr.blueprint.Spatial2DView(
+                origin=camera_name,
+            )
+            for camera_name in robot.cameras
+        ]
+
+    return rr.blueprint.Grid(contents)
+
+
 def log_rerun_data(
     observation: RobotObservation | None = None,
     action: RobotAction | None = None,
     compress_images: bool = False,
+    log_time: float | None = None,
 ) -> None:
     """
     Logs observation and action data to Rerun for real-time visualization.
@@ -72,7 +125,12 @@ def log_rerun_data(
         observation: An optional dictionary containing observation data to log.
         action: An optional dictionary containing action data to log.
         compress_images: Whether to compress images before logging to save bandwidth & memory in exchange for cpu and quality.
+        log_time: The time to log the data in the "episode_time" timeline.
+                  If None, the current time is used in Rerun's default timeline.
     """
+    if log_time is not None:
+        rr.set_time_seconds("episode_time", seconds=log_time)
+
     if observation:
         for k, v in observation.items():
             if v is None:
@@ -86,9 +144,32 @@ def log_rerun_data(
                 # Convert CHW -> HWC when needed
                 if arr.ndim == 3 and arr.shape[0] in (1, 3, 4) and arr.shape[-1] not in (1, 3, 4):
                     arr = np.transpose(arr, (1, 2, 0))
+                # Convert channel x samples -> samples x channel when needed
+                elif arr.ndim == 2 and arr.shape[0] < arr.shape[1]:
+                    arr = np.transpose(arr, (1, 0))
+
                 if arr.ndim == 1:
                     for i, vi in enumerate(arr):
                         rr.log(f"{key}_{i}", rr.Scalars(float(vi)))
+                elif arr.ndim == 2:
+                    rr.send_columns(
+                        "audio/" + key,
+                        indexes=[
+                            rr.TimeSecondsColumn(
+                                "episode_time",
+                                times=log_time
+                                + np.linspace(
+                                    -DEFAULT_AUDIO_CHUNK_DURATION,
+                                    0,
+                                    len(observation[key]),
+                                    endpoint=False,
+                                ),
+                            )
+                        ],
+                        columns=rr.Scalar.columns(scalar=observation[key]),
+                    )
+                elif arr.ndim == 3:
+                    rr.log(key, rr.Image(arr), static=True)
                 else:
                     img_entity = rr.Image(arr).compress() if compress_images else rr.Image(arr)
                     rr.log(key, entity=img_entity, static=True)