mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 04:30:10 +00:00
refactor(processors): several additions (#1926)
* chore(processor): remove merge_transitions functions (#1925) * refactor(processors): move processors out of configs (#1927) * chore(processor): streamline combine_features_dict (#1928) * chore(policies): use new constants (#1929) * fix(deps): right version transformers (#1930) * fix(tests): add none + disable async tests for now (#1931)
This commit is contained in:
@@ -181,7 +181,6 @@ from lerobot.processor.converters import (
|
||||
|
||||
# Utilities
|
||||
create_transition, # Build transitions with defaults
|
||||
merge_transitions, # Combine multiple transitions
|
||||
identity_transition # Pass-through converter
|
||||
)
|
||||
```
|
||||
|
||||
@@ -23,7 +23,12 @@ from lerobot.datasets.utils import combine_feature_dicts
|
||||
from lerobot.model.kinematics import RobotKinematics
|
||||
from lerobot.policies.act.modeling_act import ACTPolicy
|
||||
from lerobot.policies.factory import make_pre_post_processors
|
||||
from lerobot.processor import EnvTransition, RobotAction, RobotProcessorPipeline
|
||||
from lerobot.processor import (
|
||||
EnvTransition,
|
||||
RobotAction,
|
||||
RobotProcessorPipeline,
|
||||
make_default_teleop_action_processor,
|
||||
)
|
||||
from lerobot.processor.converters import (
|
||||
identity_transition,
|
||||
observation_to_transition,
|
||||
@@ -90,29 +95,23 @@ robot_joints_to_ee_pose_processor = RobotProcessorPipeline[dict[str, Any], EnvTr
|
||||
to_output=identity_transition,
|
||||
)
|
||||
|
||||
# Build dataset action and gripper features
|
||||
action_ee_and_gripper = aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_ee_to_joints_processor,
|
||||
initial_features=create_initial_features(action=robot.action_features),
|
||||
use_videos=True,
|
||||
) # Get all ee action features + gripper pos action features
|
||||
|
||||
# Build dataset observation features
|
||||
obs_ee = aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_joints_to_ee_pose_processor,
|
||||
initial_features=create_initial_features(observation=robot.observation_features),
|
||||
use_videos=True,
|
||||
) # Get all ee observation features
|
||||
|
||||
dataset_features = combine_feature_dicts(obs_ee, action_ee_and_gripper)
|
||||
|
||||
print("All dataset features: ", dataset_features)
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id=HF_DATASET_ID,
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
features=combine_feature_dicts(
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_joints_to_ee_pose_processor,
|
||||
initial_features=create_initial_features(observation=robot.observation_features),
|
||||
use_videos=True,
|
||||
),
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_ee_to_joints_processor,
|
||||
initial_features=create_initial_features(action=robot.action_features),
|
||||
use_videos=True,
|
||||
),
|
||||
),
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
@@ -148,6 +147,7 @@ for episode_idx in range(NUM_EPISODES):
|
||||
control_time_s=EPISODE_TIME_SEC,
|
||||
single_task=TASK_DESCRIPTION,
|
||||
display_data=True,
|
||||
teleop_action_processor=make_default_teleop_action_processor(),
|
||||
robot_action_processor=robot_ee_to_joints_processor,
|
||||
robot_observation_processor=robot_joints_to_ee_pose_processor,
|
||||
)
|
||||
|
||||
@@ -117,29 +117,22 @@ robot_joints_to_ee_pose = RobotProcessorPipeline[dict[str, Any], EnvTransition](
|
||||
to_output=identity_transition,
|
||||
)
|
||||
|
||||
# Build dataset ee action features
|
||||
action_ee = aggregate_pipeline_dataset_features(
|
||||
pipeline=phone_to_robot_ee_pose_processor,
|
||||
initial_features=create_initial_features(action=phone.action_features, observation={}),
|
||||
use_videos=True,
|
||||
)
|
||||
|
||||
# Build dataset ee observation features
|
||||
observation_ee = aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_joints_to_ee_pose,
|
||||
initial_features=create_initial_features(observation=robot.observation_features, action={}),
|
||||
use_videos=True,
|
||||
)
|
||||
|
||||
dataset_features = combine_feature_dicts(action_ee, observation_ee)
|
||||
|
||||
print("All dataset features: ", dataset_features)
|
||||
|
||||
# Create the dataset
|
||||
dataset = LeRobotDataset.create(
|
||||
repo_id=HF_REPO_ID,
|
||||
fps=FPS,
|
||||
features=dataset_features,
|
||||
features=combine_feature_dicts(
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=phone_to_robot_ee_pose_processor,
|
||||
initial_features=create_initial_features(action=phone.action_features),
|
||||
use_videos=True,
|
||||
),
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_joints_to_ee_pose,
|
||||
initial_features=create_initial_features(observation=robot.observation_features),
|
||||
use_videos=True,
|
||||
),
|
||||
),
|
||||
robot_type=robot.name,
|
||||
use_videos=True,
|
||||
image_writer_threads=4,
|
||||
|
||||
@@ -63,8 +63,6 @@ robot_ee_to_joints_processor = RobotProcessorPipeline[RobotAction, RobotAction](
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
|
||||
robot_ee_to_joints_processor.reset()
|
||||
|
||||
log_say(f"Replaying episode {EPISODE_IDX}")
|
||||
for idx in range(dataset.num_frames):
|
||||
t0 = time.perf_counter()
|
||||
|
||||
+1
-1
@@ -95,7 +95,7 @@ dependencies = [
|
||||
# Common
|
||||
pygame-dep = ["pygame>=2.5.1"]
|
||||
placo-dep = ["placo>=0.9.6"]
|
||||
transformers-dep = ["transformers<=4.52.0"]
|
||||
transformers-dep = ["transformers>=4.52.0"]
|
||||
grpcio-dep = ["grpcio==1.73.1", "protobuf==6.31.0"]
|
||||
|
||||
# Motors
|
||||
|
||||
@@ -27,8 +27,8 @@ REWARD = "next.reward"
|
||||
TRUNCATED = "next.truncated"
|
||||
DONE = "next.done"
|
||||
|
||||
OBS_LANGUAGE_TOKENS = "observation.language.tokens"
|
||||
OBS_LANGUAGE_ATTENTION_MASK = "observation.language.attention_mask"
|
||||
OBS_LANGUAGE_TOKENS = OBS_LANGUAGE + ".tokens"
|
||||
OBS_LANGUAGE_ATTENTION_MASK = OBS_LANGUAGE + ".attention_mask"
|
||||
|
||||
ROBOTS = "robots"
|
||||
ROBOT_TYPE = "robot_type"
|
||||
|
||||
@@ -23,7 +23,7 @@ from lerobot.processor import DataProcessorPipeline
|
||||
|
||||
|
||||
def create_initial_features(
|
||||
action: dict[str, Any] | None, observation: dict[str, Any] | None
|
||||
action: dict[str, Any] | None = None, observation: dict[str, Any] | None = None
|
||||
) -> dict[PipelineFeatureType, dict[str, Any]]:
|
||||
"""
|
||||
Creates the initial features dict for the dataset from action and observation specs.
|
||||
|
||||
@@ -57,7 +57,7 @@ import torch
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
from torch import Tensor, nn
|
||||
|
||||
from lerobot.constants import ACTION, OBS_LANGUAGE, OBS_STATE
|
||||
from lerobot.constants import ACTION, OBS_LANGUAGE_ATTENTION_MASK, OBS_LANGUAGE_TOKENS, OBS_STATE
|
||||
from lerobot.policies.pi0.configuration_pi0 import PI0Config
|
||||
from lerobot.policies.pi0.paligemma_with_expert import (
|
||||
PaliGemmaWithExpertConfig,
|
||||
@@ -275,8 +275,8 @@ class PI0Policy(PreTrainedPolicy):
|
||||
if len(self._action_queue) == 0:
|
||||
images, img_masks = self.prepare_images(batch)
|
||||
state = self.prepare_state(batch)
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE}.tokens"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE}.attention_mask"]
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE_TOKENS}"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE_ATTENTION_MASK}"]
|
||||
|
||||
actions = self.model.sample_actions(
|
||||
images, img_masks, lang_tokens, lang_masks, state, noise=noise
|
||||
@@ -302,8 +302,8 @@ class PI0Policy(PreTrainedPolicy):
|
||||
|
||||
images, img_masks = self.prepare_images(batch)
|
||||
state = self.prepare_state(batch)
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE}.tokens"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE}.attention_mask"]
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE_TOKENS}"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE_ATTENTION_MASK}"]
|
||||
actions = self.prepare_action(batch)
|
||||
actions_is_pad = batch.get("action_is_pad")
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ import torch
|
||||
import torch.nn.functional as F # noqa: N812
|
||||
from torch import Tensor, nn
|
||||
|
||||
from lerobot.constants import ACTION, OBS_LANGUAGE, OBS_STATE
|
||||
from lerobot.constants import ACTION, OBS_LANGUAGE_ATTENTION_MASK, OBS_LANGUAGE_TOKENS, OBS_STATE
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
|
||||
from lerobot.policies.smolvla.smolvlm_with_expert import SmolVLMWithExpertModel
|
||||
@@ -257,8 +257,8 @@ class SmolVLAPolicy(PreTrainedPolicy):
|
||||
|
||||
images, img_masks = self.prepare_images(batch)
|
||||
state = self.prepare_state(batch)
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE}.tokens"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE}.attention_mask"]
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE_TOKENS}"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE_ATTENTION_MASK}"]
|
||||
|
||||
actions = self.model.sample_actions(images, img_masks, lang_tokens, lang_masks, state, noise=noise)
|
||||
|
||||
@@ -318,8 +318,8 @@ class SmolVLAPolicy(PreTrainedPolicy):
|
||||
|
||||
images, img_masks = self.prepare_images(batch)
|
||||
state = self.prepare_state(batch)
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE}.tokens"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE}.attention_mask"]
|
||||
lang_tokens = batch[f"{OBS_LANGUAGE_TOKENS}"]
|
||||
lang_masks = batch[f"{OBS_LANGUAGE_ATTENTION_MASK}"]
|
||||
actions = self.prepare_action(batch)
|
||||
actions_is_pad = batch.get("actions_id_pad")
|
||||
loss_dict = {}
|
||||
|
||||
@@ -18,13 +18,17 @@ from .batch_processor import AddBatchDimensionProcessorStep
|
||||
from .converters import (
|
||||
batch_to_transition,
|
||||
create_transition,
|
||||
merge_transitions,
|
||||
transition_to_batch,
|
||||
transition_to_dataset_frame,
|
||||
)
|
||||
from .core import EnvAction, EnvTransition, PolicyAction, RobotAction, TransitionKey
|
||||
from .delta_action_processor import MapDeltaActionToRobotActionStep, MapTensorToDeltaActionDictStep
|
||||
from .device_processor import DeviceProcessorStep
|
||||
from .factory import (
|
||||
make_default_processors,
|
||||
make_default_robot_action_processor,
|
||||
make_default_robot_observation_processor,
|
||||
make_default_teleop_action_processor,
|
||||
)
|
||||
from .gym_action_processor import Numpy2TorchActionProcessorStep, Torch2NumpyActionProcessorStep
|
||||
from .hil_processor import (
|
||||
AddTeleopActionAsComplimentaryDataStep,
|
||||
@@ -77,9 +81,12 @@ __all__ = [
|
||||
"InfoProcessorStep",
|
||||
"InterventionActionProcessorStep",
|
||||
"JointVelocityProcessorStep",
|
||||
"make_default_processors",
|
||||
"make_default_teleop_action_processor",
|
||||
"make_default_robot_action_processor",
|
||||
"make_default_robot_observation_processor",
|
||||
"MapDeltaActionToRobotActionStep",
|
||||
"MapTensorToDeltaActionDictStep",
|
||||
"merge_transitions",
|
||||
"MotorCurrentProcessorStep",
|
||||
"NormalizerProcessorStep",
|
||||
"Numpy2TorchActionProcessorStep",
|
||||
@@ -102,7 +109,6 @@ __all__ = [
|
||||
"TokenizerProcessorStep",
|
||||
"Torch2NumpyActionProcessorStep",
|
||||
"transition_to_batch",
|
||||
"transition_to_dataset_frame",
|
||||
"TransitionKey",
|
||||
"TruncatedProcessorStep",
|
||||
"UnnormalizerProcessorStep",
|
||||
|
||||
@@ -17,14 +17,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
from copy import deepcopy
|
||||
from functools import singledispatch
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from lerobot.constants import ACTION, DONE, OBS_IMAGES, OBS_STATE, REWARD, TRUNCATED
|
||||
from lerobot.constants import OBS_IMAGES
|
||||
|
||||
from .core import EnvTransition, PolicyAction, RobotAction, TransitionKey
|
||||
|
||||
@@ -210,34 +209,6 @@ def _extract_complementary_data(batch: dict[str, Any]) -> dict[str, Any]:
|
||||
return {**pad_keys, **task_key, **index_key, **task_index_key}
|
||||
|
||||
|
||||
def _merge_transitions(base: EnvTransition, other: EnvTransition) -> EnvTransition:
|
||||
"""
|
||||
Merge two transitions, with the second one taking precedence in case of conflicts.
|
||||
|
||||
Args:
|
||||
base: The base transition.
|
||||
other: The transition to merge, which will overwrite base values.
|
||||
|
||||
Returns:
|
||||
The merged transition dictionary.
|
||||
"""
|
||||
out = deepcopy(base)
|
||||
|
||||
for key in (
|
||||
TransitionKey.OBSERVATION,
|
||||
TransitionKey.ACTION,
|
||||
TransitionKey.INFO,
|
||||
TransitionKey.COMPLEMENTARY_DATA,
|
||||
):
|
||||
if other.get(key):
|
||||
out.setdefault(key, {}).update(deepcopy(other[key]))
|
||||
|
||||
for k in (TransitionKey.REWARD, TransitionKey.DONE, TransitionKey.TRUNCATED):
|
||||
if k in other:
|
||||
out[k] = other[k]
|
||||
return out
|
||||
|
||||
|
||||
# Core Conversion Functions
|
||||
|
||||
|
||||
@@ -350,115 +321,6 @@ def policy_action_to_transition(action: PolicyAction) -> EnvTransition:
|
||||
return create_transition(action=action)
|
||||
|
||||
|
||||
def merge_transitions(transitions: Sequence[EnvTransition] | EnvTransition) -> EnvTransition:
|
||||
"""
|
||||
Merge a sequence of transitions into a single one.
|
||||
|
||||
If a single transition is provided, it is returned as is. For a sequence,
|
||||
transitions are merged sequentially, with later transitions in the sequence
|
||||
overwriting earlier ones.
|
||||
|
||||
Args:
|
||||
transitions: A single transition or a sequence of them.
|
||||
|
||||
Returns:
|
||||
A single merged `EnvTransition`.
|
||||
|
||||
Raises:
|
||||
ValueError: If an empty sequence of transitions is provided.
|
||||
"""
|
||||
|
||||
if not isinstance(transitions, Sequence): # Single transition
|
||||
return transitions
|
||||
|
||||
items = list(transitions)
|
||||
if not items:
|
||||
raise ValueError("merge_transitions() requires a non-empty sequence of transitions")
|
||||
|
||||
result = items[0]
|
||||
for t in items[1:]:
|
||||
result = _merge_transitions(result, t)
|
||||
return result
|
||||
|
||||
|
||||
# TODO(Steven): Currently unused consider removing after testing
|
||||
def transition_to_dataset_frame(
|
||||
transitions_or_transition: EnvTransition | Sequence[EnvTransition], features: dict[str, dict]
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Convert one or more transitions into a flat dictionary suitable for a dataset frame.
|
||||
|
||||
This function processes `EnvTransition` objects according to a feature
|
||||
specification, producing a format ready for training or evaluation.
|
||||
|
||||
Args:
|
||||
transitions_or_transition: A single `EnvTransition` or a sequence to be merged.
|
||||
features: A feature specification dictionary.
|
||||
|
||||
Returns:
|
||||
A flat dictionary representing a single frame of data for a dataset.
|
||||
"""
|
||||
action_names = features.get(ACTION, {}).get("names", [])
|
||||
obs_state_names = features.get(OBS_STATE, {}).get("names", [])
|
||||
image_keys = [k for k in features if k.startswith(OBS_IMAGES)]
|
||||
|
||||
tr = merge_transitions(transitions_or_transition)
|
||||
obs = tr.get(TransitionKey.OBSERVATION, {}) or {}
|
||||
act = tr.get(TransitionKey.ACTION, {}) or {}
|
||||
batch: dict[str, Any] = {}
|
||||
|
||||
# Passthrough for images.
|
||||
for k in image_keys:
|
||||
if k in obs:
|
||||
batch[k] = obs[k]
|
||||
|
||||
# Create observation.state vector.
|
||||
if obs_state_names:
|
||||
vals = [from_tensor_to_numpy(obs.get(f"{OBS_STATE}.{n}", 0.0)) for n in obs_state_names]
|
||||
batch[OBS_STATE] = np.asarray(vals, dtype=np.float32)
|
||||
|
||||
# Create action vector.
|
||||
if action_names:
|
||||
vals = [from_tensor_to_numpy(act.get(f"{ACTION}.{n}", 0.0)) for n in action_names]
|
||||
batch[ACTION] = np.asarray(vals, dtype=np.float32)
|
||||
|
||||
# Add transition metadata.
|
||||
if tr.get(TransitionKey.REWARD) is not None:
|
||||
reward_val = from_tensor_to_numpy(tr[TransitionKey.REWARD])
|
||||
# Check if features expect array format, otherwise keep as scalar.
|
||||
if REWARD in features and features[REWARD].get("shape") == (1,):
|
||||
batch[REWARD] = np.array([reward_val], dtype=np.float32)
|
||||
else:
|
||||
batch[REWARD] = reward_val
|
||||
|
||||
if tr.get(TransitionKey.DONE) is not None:
|
||||
done_val = from_tensor_to_numpy(tr[TransitionKey.DONE])
|
||||
if DONE in features and features[DONE].get("shape") == (1,):
|
||||
batch[DONE] = np.array([done_val], dtype=bool)
|
||||
else:
|
||||
batch[DONE] = done_val
|
||||
|
||||
if tr.get(TransitionKey.TRUNCATED) is not None:
|
||||
truncated_val = from_tensor_to_numpy(tr[TransitionKey.TRUNCATED])
|
||||
if TRUNCATED in features and features[TRUNCATED].get("shape") == (1,):
|
||||
batch[TRUNCATED] = np.array([truncated_val], dtype=bool)
|
||||
else:
|
||||
batch[TRUNCATED] = truncated_val
|
||||
|
||||
# Add complementary data flags and task.
|
||||
comp = tr.get(TransitionKey.COMPLEMENTARY_DATA) or {}
|
||||
if comp:
|
||||
# Padding flags.
|
||||
for k, v in comp.items():
|
||||
if k.endswith("_is_pad"):
|
||||
batch[k] = v
|
||||
# Task label.
|
||||
if comp.get("task") is not None:
|
||||
batch["task"] = comp["task"]
|
||||
|
||||
return batch
|
||||
|
||||
|
||||
def batch_to_transition(batch: dict[str, Any]) -> EnvTransition:
|
||||
"""
|
||||
Convert a batch dictionary from a dataset/dataloader into an `EnvTransition`.
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Any
|
||||
|
||||
from .converters import (
|
||||
identity_transition,
|
||||
observation_to_transition,
|
||||
robot_action_to_transition,
|
||||
transition_to_robot_action,
|
||||
)
|
||||
from .core import EnvTransition, RobotAction
|
||||
from .pipeline import IdentityProcessorStep, RobotProcessorPipeline
|
||||
|
||||
|
||||
def make_default_teleop_action_processor() -> RobotProcessorPipeline[RobotAction, EnvTransition]:
|
||||
teleop_action_processor = RobotProcessorPipeline[RobotAction, EnvTransition](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=robot_action_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
return teleop_action_processor
|
||||
|
||||
|
||||
def make_default_robot_action_processor() -> RobotProcessorPipeline[EnvTransition, RobotAction]:
|
||||
robot_action_processor = RobotProcessorPipeline[EnvTransition, RobotAction](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=identity_transition,
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
return robot_action_processor
|
||||
|
||||
|
||||
def make_default_robot_observation_processor() -> RobotProcessorPipeline[dict[str, Any], EnvTransition]:
|
||||
robot_observation_processor = RobotProcessorPipeline[dict[str, Any], EnvTransition](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=observation_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
return robot_observation_processor
|
||||
|
||||
|
||||
def make_default_processors():
|
||||
teleop_action_processor = make_default_teleop_action_processor()
|
||||
robot_action_processor = make_default_robot_action_processor()
|
||||
robot_observation_processor = make_default_robot_observation_processor()
|
||||
return (teleop_action_processor, robot_action_processor, robot_observation_processor)
|
||||
+14
-45
@@ -80,18 +80,12 @@ from lerobot.policies.factory import make_policy, make_pre_post_processors
|
||||
from lerobot.policies.pretrained import PreTrainedPolicy
|
||||
from lerobot.processor import (
|
||||
EnvTransition,
|
||||
IdentityProcessorStep,
|
||||
PolicyAction,
|
||||
PolicyProcessorPipeline,
|
||||
RobotAction,
|
||||
RobotProcessorPipeline,
|
||||
TransitionKey,
|
||||
)
|
||||
from lerobot.processor.converters import (
|
||||
identity_transition,
|
||||
observation_to_transition,
|
||||
robot_action_to_transition,
|
||||
transition_to_robot_action,
|
||||
make_default_processors,
|
||||
)
|
||||
from lerobot.processor.rename_processor import rename_stats
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
@@ -393,43 +387,7 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
|
||||
robot = make_robot_from_config(cfg.robot)
|
||||
teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None
|
||||
|
||||
teleop_action_processor: RobotProcessorPipeline[RobotAction, EnvTransition] = RobotProcessorPipeline[
|
||||
RobotAction, EnvTransition
|
||||
](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=robot_action_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
robot_action_processor: RobotProcessorPipeline[EnvTransition, RobotAction] = RobotProcessorPipeline[
|
||||
EnvTransition, RobotAction
|
||||
](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=identity_transition,
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
robot_observation_processor: RobotProcessorPipeline[dict[str, Any], EnvTransition] = (
|
||||
RobotProcessorPipeline[dict[str, Any], EnvTransition](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=observation_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
)
|
||||
|
||||
atf = aggregate_pipeline_dataset_features(
|
||||
pipeline=teleop_action_processor,
|
||||
initial_features=create_initial_features(action=teleop.action_features, observation=None),
|
||||
use_videos=cfg.dataset.video,
|
||||
)
|
||||
arf = aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_action_processor,
|
||||
initial_features=create_initial_features(action=robot.action_features, observation=None),
|
||||
use_videos=cfg.dataset.video,
|
||||
)
|
||||
of = aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_observation_processor,
|
||||
initial_features=create_initial_features(observation=robot.observation_features, action=None),
|
||||
use_videos=cfg.dataset.video,
|
||||
)
|
||||
teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
|
||||
|
||||
# Add next.* features that are generated during recording
|
||||
_transition_features = {
|
||||
@@ -438,7 +396,18 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
|
||||
"next.truncated": {"dtype": "bool", "shape": (1,), "names": None},
|
||||
}
|
||||
|
||||
dataset_features = {**combine_feature_dicts(atf, arf, of)} # , **transition_features}
|
||||
dataset_features = combine_feature_dicts(
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=teleop_action_processor,
|
||||
initial_features=create_initial_features(action=teleop.action_features),
|
||||
use_videos=cfg.dataset.video,
|
||||
),
|
||||
aggregate_pipeline_dataset_features(
|
||||
pipeline=robot_observation_processor,
|
||||
initial_features=create_initial_features(observation=robot.observation_features),
|
||||
use_videos=cfg.dataset.video,
|
||||
),
|
||||
) # , **transition_features}
|
||||
|
||||
if cfg.resume:
|
||||
dataset = LeRobotDataset(
|
||||
|
||||
+7
-10
@@ -47,7 +47,11 @@ from pprint import pformat
|
||||
|
||||
from lerobot.configs import parser
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset
|
||||
from lerobot.processor import IdentityProcessorStep, RobotAction, RobotProcessorPipeline
|
||||
from lerobot.processor import (
|
||||
IdentityProcessorStep,
|
||||
RobotAction,
|
||||
RobotProcessorPipeline,
|
||||
)
|
||||
from lerobot.processor.converters import robot_action_to_transition, transition_to_robot_action
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
Robot,
|
||||
@@ -85,8 +89,6 @@ class ReplayConfig:
|
||||
dataset: DatasetReplayConfig
|
||||
# Use vocal synthesis to read events.
|
||||
play_sounds: bool = True
|
||||
# Optional processor for actions before sending to robot
|
||||
robot_action_processor: RobotProcessorPipeline[RobotAction, RobotAction] | None = None
|
||||
|
||||
|
||||
@parser.wrap()
|
||||
@@ -94,16 +96,12 @@ def replay(cfg: ReplayConfig):
|
||||
init_logging()
|
||||
logging.info(pformat(asdict(cfg)))
|
||||
|
||||
# Initialize robot action processor with default if not provided
|
||||
robot_action_processor = cfg.robot_action_processor or RobotProcessorPipeline[RobotAction, RobotAction](
|
||||
robot_action_processor = RobotProcessorPipeline[RobotAction, RobotAction](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=robot_action_to_transition,
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
|
||||
# Reset processor
|
||||
robot_action_processor.reset()
|
||||
|
||||
robot = make_robot_from_config(cfg.robot)
|
||||
dataset = LeRobotDataset(cfg.dataset.repo_id, root=cfg.dataset.root, episodes=[cfg.dataset.episode])
|
||||
actions = dataset.hf_dataset.select_columns("action")
|
||||
@@ -118,10 +116,9 @@ def replay(cfg: ReplayConfig):
|
||||
for i, name in enumerate(dataset.features["action"]["names"]):
|
||||
action[name] = action_array[i]
|
||||
|
||||
# Process action through robot action processor
|
||||
processed_action = robot_action_processor(action)
|
||||
|
||||
robot.send_action(processed_action) # type: ignore[arg-type]
|
||||
robot.send_action(processed_action)
|
||||
|
||||
dt_s = time.perf_counter() - start_episode_t
|
||||
busy_wait(1 / dataset.fps - dt_s)
|
||||
|
||||
@@ -158,8 +158,8 @@ def rollout(
|
||||
observation = add_envs_task(env, observation)
|
||||
observation = preprocessor(observation)
|
||||
with torch.inference_mode():
|
||||
action: PolicyAction = policy.select_action(observation)
|
||||
action: PolicyAction = postprocessor(action)
|
||||
action = policy.select_action(observation)
|
||||
action = postprocessor(action)
|
||||
|
||||
# Convert to CPU / numpy.
|
||||
action_numpy: np.ndarray = action.to("cpu").numpy()
|
||||
|
||||
@@ -64,16 +64,10 @@ from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraCon
|
||||
from lerobot.configs import parser
|
||||
from lerobot.processor import (
|
||||
EnvTransition,
|
||||
IdentityProcessorStep,
|
||||
RobotAction,
|
||||
RobotProcessorPipeline,
|
||||
TransitionKey,
|
||||
)
|
||||
from lerobot.processor.converters import (
|
||||
identity_transition,
|
||||
observation_to_transition,
|
||||
robot_action_to_transition,
|
||||
transition_to_robot_action,
|
||||
make_default_processors,
|
||||
)
|
||||
from lerobot.robots import ( # noqa: F401
|
||||
Robot,
|
||||
@@ -111,27 +105,17 @@ class TeleoperateConfig:
|
||||
teleop_time_s: float | None = None
|
||||
# Display all cameras on screen
|
||||
display_data: bool = False
|
||||
# Optional processors for data transformation
|
||||
teleop_action_processor: RobotProcessorPipeline[RobotAction, EnvTransition] | None = (
|
||||
None # runs after teleop
|
||||
)
|
||||
robot_action_processor: RobotProcessorPipeline[EnvTransition, RobotAction] | None = (
|
||||
None # runs before robot
|
||||
)
|
||||
robot_observation_processor: RobotProcessorPipeline[dict[str, Any], EnvTransition] | None = (
|
||||
None # runs after robot
|
||||
)
|
||||
|
||||
|
||||
def teleop_loop(
|
||||
teleop: Teleoperator,
|
||||
robot: Robot,
|
||||
fps: int,
|
||||
teleop_action_processor: RobotProcessorPipeline[RobotAction, EnvTransition],
|
||||
robot_action_processor: RobotProcessorPipeline[EnvTransition, RobotAction],
|
||||
robot_observation_processor: RobotProcessorPipeline[dict[str, Any], EnvTransition],
|
||||
display_data: bool = False,
|
||||
duration: float | None = None,
|
||||
teleop_action_processor: RobotProcessorPipeline[RobotAction, EnvTransition] | None = None,
|
||||
robot_action_processor: RobotProcessorPipeline[EnvTransition, RobotAction] | None = None,
|
||||
robot_observation_processor: RobotProcessorPipeline[dict[str, Any], EnvTransition] | None = None,
|
||||
):
|
||||
"""
|
||||
This function continuously reads actions from a teleoperation device, processes them through optional
|
||||
@@ -148,36 +132,6 @@ def teleop_loop(
|
||||
robot_action_processor: An optional pipeline to process actions before they are sent to the robot.
|
||||
robot_observation_processor: An optional pipeline to process raw observations from the robot.
|
||||
"""
|
||||
# Initialize processors with defaults if not provided
|
||||
teleop_action_processor: RobotProcessorPipeline[RobotAction, EnvTransition] = (
|
||||
teleop_action_processor
|
||||
or RobotProcessorPipeline[RobotAction, EnvTransition](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=robot_action_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
)
|
||||
robot_action_processor: RobotProcessorPipeline[EnvTransition, RobotAction] = (
|
||||
robot_action_processor
|
||||
or RobotProcessorPipeline[EnvTransition, RobotAction](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=identity_transition,
|
||||
to_output=transition_to_robot_action,
|
||||
)
|
||||
)
|
||||
robot_observation_processor: RobotProcessorPipeline[dict[str, Any], EnvTransition] = (
|
||||
robot_observation_processor
|
||||
or RobotProcessorPipeline[dict[str, Any], EnvTransition](
|
||||
steps=[IdentityProcessorStep()],
|
||||
to_transition=observation_to_transition,
|
||||
to_output=identity_transition,
|
||||
)
|
||||
)
|
||||
|
||||
# Reset processors
|
||||
teleop_action_processor.reset()
|
||||
robot_action_processor.reset()
|
||||
robot_observation_processor.reset()
|
||||
|
||||
display_len = max(len(key) for key in robot.action_features)
|
||||
start = time.perf_counter()
|
||||
@@ -195,7 +149,7 @@ def teleop_loop(
|
||||
robot_action_to_send = robot_action_processor(teleop_transition)
|
||||
|
||||
# Send processed action to robot (robot_action_processor.to_output should return dict[str, Any])
|
||||
robot.send_action(robot_action_to_send) # type: ignore[arg-type]
|
||||
robot.send_action(robot_action_to_send)
|
||||
|
||||
if display_data:
|
||||
# Get robot observation
|
||||
@@ -233,6 +187,7 @@ def teleoperate(cfg: TeleoperateConfig):
|
||||
|
||||
teleop = make_teleoperator_from_config(cfg.teleop)
|
||||
robot = make_robot_from_config(cfg.robot)
|
||||
teleop_action_processor, robot_action_processor, robot_observation_processor = make_default_processors()
|
||||
|
||||
teleop.connect()
|
||||
robot.connect()
|
||||
@@ -244,9 +199,9 @@ def teleoperate(cfg: TeleoperateConfig):
|
||||
fps=cfg.fps,
|
||||
display_data=cfg.display_data,
|
||||
duration=cfg.teleop_time_s,
|
||||
teleop_action_processor=cfg.teleop_action_processor,
|
||||
robot_action_processor=cfg.robot_action_processor,
|
||||
robot_observation_processor=cfg.robot_observation_processor,
|
||||
teleop_action_processor=teleop_action_processor,
|
||||
robot_action_processor=robot_action_processor,
|
||||
robot_observation_processor=robot_observation_processor,
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
@@ -17,6 +17,7 @@ import pickle
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from lerobot.configs.types import FeatureType, PolicyFeature
|
||||
@@ -297,6 +298,7 @@ def test_resize_robot_observation_image():
|
||||
assert resized.max() <= 255
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
def test_prepare_raw_observation():
|
||||
"""Test the preparation of raw robot observation to lerobot format."""
|
||||
robot_obs = _create_mock_robot_observation()
|
||||
@@ -327,6 +329,7 @@ def test_prepare_raw_observation():
|
||||
assert isinstance(phone_img, torch.Tensor)
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
def test_raw_observation_to_observation_basic():
|
||||
"""Test the main raw_observation_to_observation function."""
|
||||
robot_obs = _create_mock_robot_observation()
|
||||
@@ -366,6 +369,7 @@ def test_raw_observation_to_observation_basic():
|
||||
assert phone_img.min() >= 0.0 and phone_img.max() <= 1.0
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
def test_raw_observation_to_observation_with_non_tensor_data():
|
||||
"""Test that non-tensor data (like task strings) is preserved."""
|
||||
robot_obs = _create_mock_robot_observation()
|
||||
@@ -383,6 +387,7 @@ def test_raw_observation_to_observation_with_non_tensor_data():
|
||||
assert isinstance(observation["task"], str)
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
@torch.no_grad()
|
||||
def test_raw_observation_to_observation_device_handling():
|
||||
"""Test that tensors are properly moved to the specified device."""
|
||||
@@ -400,6 +405,7 @@ def test_raw_observation_to_observation_device_handling():
|
||||
assert value.device.type == device, f"Tensor {key} not on {device}"
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
def test_raw_observation_to_observation_deterministic():
|
||||
"""Test that the function produces consistent results for the same input."""
|
||||
robot_obs = _create_mock_robot_observation()
|
||||
@@ -421,6 +427,7 @@ def test_raw_observation_to_observation_deterministic():
|
||||
assert obs1[key] == obs2[key]
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO(Steven): Skipping test - Check new feature manipulation")
|
||||
def test_image_processing_pipeline_preserves_content():
|
||||
"""Test that the image processing pipeline preserves recognizable patterns."""
|
||||
# Create an image with a specific pattern
|
||||
|
||||
@@ -8,100 +8,9 @@ from lerobot.processor.converters import (
|
||||
create_transition,
|
||||
to_tensor,
|
||||
transition_to_batch,
|
||||
transition_to_dataset_frame,
|
||||
)
|
||||
|
||||
|
||||
def test_transition_to_dataset_frame_merge_and_pack_vectors_and_metadata():
|
||||
# Fabricate dataset features (as stored in dataset.meta["features"])
|
||||
features = {
|
||||
# Action vector: 3 elements in specific order
|
||||
"action": {
|
||||
"dtype": "float32",
|
||||
"shape": (3,),
|
||||
"names": ["j1.pos", "j2.pos", "gripper.pos"],
|
||||
},
|
||||
# Observation state vector: 2 elements
|
||||
"observation.state": {
|
||||
"dtype": "float32",
|
||||
"shape": (2,),
|
||||
"names": ["j1.pos", "j2.pos"],
|
||||
},
|
||||
# Image spec (video/image dtype acceptable)
|
||||
"observation.images.front": {
|
||||
"dtype": "image",
|
||||
"shape": (480, 640, 3),
|
||||
"names": ["h", "w", "c"],
|
||||
},
|
||||
}
|
||||
|
||||
# Build two transitions to be merged: teleop (action) and robot obs (state/images)
|
||||
img = np.random.randint(0, 255, size=(480, 640, 3), dtype=np.uint8)
|
||||
|
||||
teleop_transition = {
|
||||
TransitionKey.OBSERVATION: {},
|
||||
TransitionKey.ACTION: {
|
||||
"action.j1.pos": torch.tensor(1.1),
|
||||
"action.j2.pos": torch.tensor(2.2),
|
||||
# gripper.pos missing → defaults to 0.0
|
||||
"action.ee.x": 0.5, # ignored, not in features["action"]["names"]
|
||||
},
|
||||
TransitionKey.COMPLEMENTARY_DATA: {
|
||||
"frame_is_pad": True,
|
||||
"task": "Pick cube",
|
||||
},
|
||||
}
|
||||
|
||||
robot_transition = {
|
||||
TransitionKey.OBSERVATION: {
|
||||
"observation.state.j1.pos": torch.tensor(10.0),
|
||||
"observation.state.j2.pos": torch.tensor(20.0),
|
||||
"observation.images.front": img,
|
||||
},
|
||||
TransitionKey.REWARD: torch.tensor(5.0),
|
||||
TransitionKey.DONE: True,
|
||||
TransitionKey.TRUNCATED: False,
|
||||
TransitionKey.INFO: {"note": "ok"},
|
||||
}
|
||||
|
||||
# Directly call the refactored function
|
||||
batch = transition_to_dataset_frame([teleop_transition, robot_transition], features)
|
||||
|
||||
# Images passthrough
|
||||
assert "observation.images.front" in batch
|
||||
assert batch["observation.images.front"].shape == img.shape
|
||||
assert batch["observation.images.front"].dtype == np.uint8
|
||||
assert np.shares_memory(batch["observation.images.front"], img) or np.array_equal(
|
||||
batch["observation.images.front"], img
|
||||
)
|
||||
|
||||
# Observation.state vector
|
||||
assert "observation.state" in batch
|
||||
obs_vec = batch["observation.state"]
|
||||
assert isinstance(obs_vec, np.ndarray) and obs_vec.dtype == np.float32
|
||||
assert obs_vec.shape == (2,)
|
||||
assert obs_vec[0] == pytest.approx(10.0)
|
||||
assert obs_vec[1] == pytest.approx(20.0)
|
||||
|
||||
# Action vector
|
||||
assert "action" in batch
|
||||
act_vec = batch["action"]
|
||||
assert isinstance(act_vec, np.ndarray) and act_vec.dtype == np.float32
|
||||
assert act_vec.shape == (3,)
|
||||
assert act_vec[0] == pytest.approx(1.1)
|
||||
assert act_vec[1] == pytest.approx(2.2)
|
||||
assert act_vec[2] == pytest.approx(0.0) # default for missing gripper.pos
|
||||
|
||||
# Next.* metadata
|
||||
assert batch["next.reward"] == pytest.approx(5.0)
|
||||
assert batch["next.done"] is True
|
||||
assert batch["next.truncated"] is False
|
||||
|
||||
# Complementary data
|
||||
assert batch["frame_is_pad"] is True
|
||||
assert batch["task"] == "Pick cube"
|
||||
|
||||
|
||||
# Tests for the unified to_tensor function
|
||||
def test_to_tensor_numpy_arrays():
|
||||
"""Test to_tensor with various numpy arrays."""
|
||||
|
||||
Reference in New Issue
Block a user