mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-25 21:50:03 +00:00
dc67b2ff3f
Before this change the PEFT-wrapped policy did not save the policy's config alongside the adapter config / weights which prevented us from changing the policy config. Now the policy config is saved both in full training and PEFT training. This change makes loading the PEFT policy adapter much easier as well.
166 lines
6.3 KiB
Python
166 lines
6.3 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from termcolor import colored
|
|
from torch.optim import Optimizer
|
|
from torch.optim.lr_scheduler import LRScheduler
|
|
|
|
from lerobot.common.constants import (
|
|
CHECKPOINTS_DIR,
|
|
LAST_CHECKPOINT_LINK,
|
|
PRETRAINED_MODEL_DIR,
|
|
TRAINING_STATE_DIR,
|
|
TRAINING_STEP,
|
|
)
|
|
from lerobot.common.datasets.utils import load_json, write_json
|
|
from lerobot.common.optim.optimizers import load_optimizer_state, save_optimizer_state
|
|
from lerobot.common.optim.schedulers import load_scheduler_state, save_scheduler_state
|
|
from lerobot.common.policies.pretrained import PreTrainedPolicy
|
|
from lerobot.common.utils.random_utils import load_rng_state, save_rng_state
|
|
from lerobot.configs.train import TrainPipelineConfig
|
|
|
|
|
|
def log_output_dir(out_dir):
|
|
logging.info(colored("Output dir:", "yellow", attrs=["bold"]) + f" {out_dir}")
|
|
|
|
|
|
def get_step_identifier(step: int, total_steps: int) -> str:
|
|
num_digits = max(6, len(str(total_steps)))
|
|
return f"{step:0{num_digits}d}"
|
|
|
|
|
|
def get_step_checkpoint_dir(output_dir: Path, total_steps: int, step: int) -> Path:
|
|
"""Returns the checkpoint sub-directory corresponding to the step number."""
|
|
step_identifier = get_step_identifier(step, total_steps)
|
|
return output_dir / CHECKPOINTS_DIR / step_identifier
|
|
|
|
|
|
def save_training_step(step: int, save_dir: Path) -> None:
|
|
write_json({"step": step}, save_dir / TRAINING_STEP)
|
|
|
|
|
|
def load_training_step(save_dir: Path) -> int:
|
|
training_step = load_json(save_dir / TRAINING_STEP)
|
|
return training_step["step"]
|
|
|
|
|
|
def update_last_checkpoint(checkpoint_dir: Path) -> Path:
|
|
last_checkpoint_dir = checkpoint_dir.parent / LAST_CHECKPOINT_LINK
|
|
if last_checkpoint_dir.is_symlink():
|
|
last_checkpoint_dir.unlink()
|
|
relative_target = checkpoint_dir.relative_to(checkpoint_dir.parent)
|
|
last_checkpoint_dir.symlink_to(relative_target)
|
|
|
|
|
|
def save_checkpoint(
|
|
checkpoint_dir: Path,
|
|
step: int,
|
|
cfg: TrainPipelineConfig,
|
|
policy: PreTrainedPolicy,
|
|
optimizer: Optimizer,
|
|
scheduler: LRScheduler | None = None,
|
|
) -> None:
|
|
"""This function creates the following directory structure:
|
|
|
|
005000/ # training step at checkpoint
|
|
├── pretrained_model/
|
|
│ ├── config.json # policy config
|
|
│ ├── model.safetensors # policy weights
|
|
│ └── train_config.json # train config
|
|
└── training_state/
|
|
├── optimizer_param_groups.json # optimizer param groups
|
|
├── optimizer_state.safetensors # optimizer state
|
|
├── rng_state.safetensors # rng states
|
|
├── scheduler_state.json # scheduler state
|
|
└── training_step.json # training step
|
|
|
|
Args:
|
|
cfg (TrainPipelineConfig): The training config used for this run.
|
|
step (int): The training step at that checkpoint.
|
|
policy (PreTrainedPolicy): The policy to save.
|
|
optimizer (Optimizer | None, optional): The optimizer to save the state from. Defaults to None.
|
|
scheduler (LRScheduler | None, optional): The scheduler to save the state from. Defaults to None.
|
|
"""
|
|
pretrained_dir = checkpoint_dir / PRETRAINED_MODEL_DIR
|
|
policy.save_pretrained(pretrained_dir)
|
|
cfg.save_pretrained(pretrained_dir)
|
|
if cfg.use_peft:
|
|
# When using PEFT, policy.save_pretrained will only write the adapter weights + config, not the
|
|
# policy config which we need for loading the model. In this case we'll write it ourselves.
|
|
policy.config.save_pretrained(pretrained_dir)
|
|
save_training_state(checkpoint_dir, step, optimizer, scheduler)
|
|
|
|
|
|
def save_training_state(
|
|
checkpoint_dir: Path,
|
|
train_step: int,
|
|
optimizer: Optimizer | None = None,
|
|
scheduler: LRScheduler | None = None,
|
|
) -> None:
|
|
"""
|
|
Saves the training step, optimizer state, scheduler state, and rng state.
|
|
|
|
Args:
|
|
save_dir (Path): The directory to save artifacts to.
|
|
train_step (int): Current training step.
|
|
optimizer (Optimizer | None, optional): The optimizer from which to save the state_dict.
|
|
Defaults to None.
|
|
scheduler (LRScheduler | None, optional): The scheduler from which to save the state_dict.
|
|
Defaults to None.
|
|
"""
|
|
save_dir = checkpoint_dir / TRAINING_STATE_DIR
|
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
save_training_step(train_step, save_dir)
|
|
save_rng_state(save_dir)
|
|
if optimizer is not None:
|
|
save_optimizer_state(optimizer, save_dir)
|
|
if scheduler is not None:
|
|
save_scheduler_state(scheduler, save_dir)
|
|
|
|
|
|
def load_training_state(
|
|
checkpoint_dir: Path, optimizer: Optimizer, scheduler: LRScheduler | None
|
|
) -> tuple[int, Optimizer, LRScheduler | None]:
|
|
"""
|
|
Loads the training step, optimizer state, scheduler state, and rng state.
|
|
This is used to resume a training run.
|
|
|
|
Args:
|
|
checkpoint_dir (Path): The checkpoint directory. Should contain a 'training_state' dir.
|
|
optimizer (Optimizer): The optimizer to load the state_dict to.
|
|
scheduler (LRScheduler | None): The scheduler to load the state_dict to (can be None).
|
|
|
|
Raises:
|
|
NotADirectoryError: If 'checkpoint_dir' doesn't contain a 'training_state' dir
|
|
|
|
Returns:
|
|
tuple[int, Optimizer, LRScheduler | None]: training step, optimizer and scheduler with their
|
|
state_dict loaded.
|
|
"""
|
|
training_state_dir = checkpoint_dir / TRAINING_STATE_DIR
|
|
if not training_state_dir.is_dir():
|
|
raise NotADirectoryError(training_state_dir)
|
|
|
|
load_rng_state(training_state_dir)
|
|
step = load_training_step(training_state_dir)
|
|
optimizer = load_optimizer_state(optimizer, training_state_dir)
|
|
if scheduler is not None:
|
|
scheduler = load_scheduler_state(scheduler, training_state_dir)
|
|
|
|
return step, optimizer, scheduler
|