mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-15 16:49:55 +00:00
8a3d64033f
* feat(rewards): add RewardModelConfig and PreTrainedRewardModel base classes * refactor(rewards): migrate Classifier from policies/sac/reward_model/ to rewards/classifier/ * refactor(rewards): migrate SARM from policies/sarm/ to rewards/sarm/ * refactor(rewards): add rewards/factory.py and remove reward model code from policies/factory.py * refactor(rewards): update imports and delete old reward model locations * test(rewards): add reward model tests and update existing test imports * fix(rewards): restore full Classifier and SARM implementations * test(rewards): restore missing CUDA and mixed precision classifier processor tests * refactor(lerobot_train.py): remove rabc specific configuration and replace it with a generic samplerweight class in lerobot_train * refactor(lerobot_train.py): add missing sampling weight script * linter + missing files * add testing for sampl weighter * revert some useless changes, improve typing * update docs * add automatic detection of the progress path * remove type exp * improve comment * fix: move rabc.py to rewards/sarm/ and update import paths * refactor(imports): update reward model imports to new module structure * refactor(imports): update reward model imports to reflect new module structure * refactor(imports): conditionally import pandas based on availability * feat(configs): add reward_model field to TrainPipelineConfig and Hub fields to RewardModelConfig * refactor(policies): remove reward model branches from policy factory and __init__ * refactor(rewards): expand __init__ facade and fix SARMConfig __post_init__ crash * feat(train): route reward model training through rewards/factory instead of policies/factory * refactor(train): streamline reward model training logic * fix(rewards): ensure FileNotFoundError is raised for missing config_file * refactor(train): update __get_path_fields__ to include reward_model for config loading * refactor(classifier): remove redundant input normalization in predict_reward method * fix(train): raise ValueError for non-trainable reward models in train function * refactor(pretrained_rm): add model card template * refactor(tests): reward models * refactor(sarm): update reset method and remove unused action prediction methods * refactor(wandb): differentiate tags for reward model and policy training in cfg_to_group function * fix(train): raise ValueError for PEFT usage in reward model training * refactor(rewards): enhance RewardModelConfig with device handling and delta indices properties --------- Co-authored-by: Michel Aractingi <michel.aractingi@huggingface.co>
133 lines
5.3 KiB
Python
133 lines
5.3 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import logging
|
|
from pprint import pformat
|
|
|
|
import torch
|
|
|
|
from lerobot.configs import PreTrainedConfig
|
|
from lerobot.configs.rewards import RewardModelConfig
|
|
from lerobot.configs.train import TrainPipelineConfig
|
|
from lerobot.transforms import ImageTransforms
|
|
from lerobot.utils.constants import ACTION, IMAGENET_STATS, OBS_PREFIX, REWARD
|
|
|
|
from .dataset_metadata import LeRobotDatasetMetadata
|
|
from .lerobot_dataset import LeRobotDataset
|
|
from .multi_dataset import MultiLeRobotDataset
|
|
from .streaming_dataset import StreamingLeRobotDataset
|
|
|
|
|
|
def resolve_delta_timestamps(
|
|
cfg: PreTrainedConfig | RewardModelConfig, ds_meta: LeRobotDatasetMetadata
|
|
) -> dict[str, list] | None:
|
|
"""Resolves delta_timestamps by reading from the 'delta_indices' properties of the config.
|
|
|
|
Args:
|
|
cfg (PreTrainedConfig | RewardModelConfig): The config to read delta_indices from. Both
|
|
``PreTrainedConfig`` and concrete ``RewardModelConfig`` subclasses expose the
|
|
``{observation,action,reward}_delta_indices`` properties used below.
|
|
ds_meta (LeRobotDatasetMetadata): The dataset from which features and fps are used to build
|
|
delta_timestamps against.
|
|
|
|
Returns:
|
|
dict[str, list] | None: A dictionary of delta_timestamps, e.g.:
|
|
{
|
|
"observation.state": [-0.04, -0.02, 0]
|
|
"observation.action": [-0.02, 0, 0.02]
|
|
}
|
|
returns `None` if the resulting dict is empty.
|
|
"""
|
|
delta_timestamps = {}
|
|
for key in ds_meta.features:
|
|
if key == REWARD and cfg.reward_delta_indices is not None:
|
|
delta_timestamps[key] = [i / ds_meta.fps for i in cfg.reward_delta_indices]
|
|
if key == ACTION and cfg.action_delta_indices is not None:
|
|
delta_timestamps[key] = [i / ds_meta.fps for i in cfg.action_delta_indices]
|
|
if key.startswith(OBS_PREFIX) and cfg.observation_delta_indices is not None:
|
|
delta_timestamps[key] = [i / ds_meta.fps for i in cfg.observation_delta_indices]
|
|
|
|
if len(delta_timestamps) == 0:
|
|
delta_timestamps = None
|
|
|
|
return delta_timestamps
|
|
|
|
|
|
def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDataset:
|
|
"""Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
|
|
|
|
Args:
|
|
cfg (TrainPipelineConfig): A TrainPipelineConfig config which contains a DatasetConfig and a PreTrainedConfig.
|
|
|
|
Raises:
|
|
NotImplementedError: The MultiLeRobotDataset is currently deactivated.
|
|
|
|
Returns:
|
|
LeRobotDataset | MultiLeRobotDataset
|
|
"""
|
|
image_transforms = (
|
|
ImageTransforms(cfg.dataset.image_transforms) if cfg.dataset.image_transforms.enable else None
|
|
)
|
|
|
|
if isinstance(cfg.dataset.repo_id, str):
|
|
ds_meta = LeRobotDatasetMetadata(
|
|
cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
|
|
)
|
|
delta_timestamps = resolve_delta_timestamps(cfg.trainable_config, ds_meta)
|
|
if not cfg.dataset.streaming:
|
|
dataset = LeRobotDataset(
|
|
cfg.dataset.repo_id,
|
|
root=cfg.dataset.root,
|
|
episodes=cfg.dataset.episodes,
|
|
delta_timestamps=delta_timestamps,
|
|
image_transforms=image_transforms,
|
|
revision=cfg.dataset.revision,
|
|
video_backend=cfg.dataset.video_backend,
|
|
return_uint8=True,
|
|
tolerance_s=cfg.tolerance_s,
|
|
)
|
|
else:
|
|
dataset = StreamingLeRobotDataset(
|
|
cfg.dataset.repo_id,
|
|
root=cfg.dataset.root,
|
|
episodes=cfg.dataset.episodes,
|
|
delta_timestamps=delta_timestamps,
|
|
image_transforms=image_transforms,
|
|
revision=cfg.dataset.revision,
|
|
max_num_shards=cfg.num_workers,
|
|
tolerance_s=cfg.tolerance_s,
|
|
return_uint8=True,
|
|
)
|
|
else:
|
|
raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
|
|
dataset = MultiLeRobotDataset(
|
|
cfg.dataset.repo_id,
|
|
# TODO(aliberts): add proper support for multi dataset
|
|
# delta_timestamps=delta_timestamps,
|
|
image_transforms=image_transforms,
|
|
video_backend=cfg.dataset.video_backend,
|
|
)
|
|
logging.info(
|
|
"Multiple datasets were provided. Applied the following index mapping to the provided datasets: "
|
|
f"{pformat(dataset.repo_id_to_index, indent=2)}"
|
|
)
|
|
|
|
if cfg.dataset.use_imagenet_stats:
|
|
for key in dataset.meta.camera_keys:
|
|
for stats_type, stats in IMAGENET_STATS.items():
|
|
dataset.meta.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)
|
|
|
|
return dataset
|