mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-17 08:17:02 +00:00
refactor(training): rename eval_freq to env_eval_freq
- Rename eval_freq to env_eval_freq to distinguish sim environment evaluation from offline loss evaluation.
This commit is contained in:
@@ -167,9 +167,9 @@ jobs:
|
||||
|
||||
# ── LIBERO TRAIN+EVAL SMOKE ──────────────────────────────────────────────
|
||||
# Train SmolVLA for 1 step (batch_size=1, dataset episode 0 only) then
|
||||
# immediately runs eval inside the training loop (eval_freq=1, 1 episode).
|
||||
# immediately runs eval inside the training loop (env_eval_freq=1, 1 episode).
|
||||
# Tests the full train→eval-within-training pipeline end-to-end.
|
||||
- name: Run Libero train+eval smoke (1 step, eval_freq=1)
|
||||
- name: Run Libero train+eval smoke (1 step, env_eval_freq=1)
|
||||
if: env.HF_USER_TOKEN != ''
|
||||
run: |
|
||||
docker run --name libero-train-smoke --gpus all \
|
||||
@@ -196,7 +196,7 @@ jobs:
|
||||
--output_dir=/tmp/train-smoke \
|
||||
--steps=1 \
|
||||
--batch_size=1 \
|
||||
--eval_freq=1 \
|
||||
--env_eval_freq=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.use_async_envs=false \
|
||||
|
||||
@@ -58,7 +58,7 @@ test-act-ete-train:
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=4 \
|
||||
--eval_freq=2 \
|
||||
--env_eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_freq=2 \
|
||||
@@ -96,7 +96,7 @@ test-diffusion-ete-train:
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=2 \
|
||||
--eval_freq=2 \
|
||||
--env_eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_checkpoint=true \
|
||||
@@ -126,7 +126,7 @@ test-tdmpc-ete-train:
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=2 \
|
||||
--eval_freq=2 \
|
||||
--env_eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_checkpoint=true \
|
||||
@@ -161,7 +161,7 @@ test-smolvla-ete-train:
|
||||
--dataset.episodes="[0]" \
|
||||
--batch_size=2 \
|
||||
--steps=4 \
|
||||
--eval_freq=2 \
|
||||
--env_eval_freq=2 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval.batch_size=1 \
|
||||
--save_freq=2 \
|
||||
|
||||
@@ -719,7 +719,7 @@ Example configuration for training the [reward classifier](https://huggingface.c
|
||||
"num_workers": 4,
|
||||
"steps": 5000,
|
||||
"log_freq": 10,
|
||||
"eval_freq": 1000,
|
||||
"env_eval_freq": 1000,
|
||||
"save_freq": 1000,
|
||||
"save_checkpoint": true,
|
||||
"seed": 2,
|
||||
|
||||
@@ -143,7 +143,7 @@ lerobot-train \
|
||||
--batch_size=4 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval_freq=1000
|
||||
--env_eval_freq=1000
|
||||
```
|
||||
|
||||
## Reproducing published results
|
||||
|
||||
@@ -173,7 +173,7 @@ lerobot-train \
|
||||
--batch_size=4 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval_freq=1000
|
||||
--env_eval_freq=1000
|
||||
```
|
||||
|
||||
## Relationship to LIBERO
|
||||
|
||||
@@ -120,11 +120,11 @@ lerobot-train \
|
||||
--batch_size=4 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--eval_freq=1000
|
||||
--env_eval_freq=1000
|
||||
```
|
||||
|
||||
## Practical tips
|
||||
|
||||
- Use the one-hot task conditioning for multi-task training (MT10/MT50 conventions) so policies have explicit task context.
|
||||
- Inspect the dataset task descriptions and the `info["is_success"]` keys when writing post-processing or logging so your success metrics line up with the benchmark.
|
||||
- Adjust `batch_size`, `steps`, and `eval_freq` to match your compute budget.
|
||||
- Adjust `batch_size`, `steps`, and `env_eval_freq` to match your compute budget.
|
||||
|
||||
@@ -103,7 +103,7 @@ accelerate launch \
|
||||
--batch_size=32 \
|
||||
--num_workers=4 \
|
||||
--log_freq=20 \
|
||||
--eval_freq=-1 \
|
||||
--env_eval_freq=-1 \
|
||||
--save_checkpoint=true \
|
||||
--save_freq=2000
|
||||
```
|
||||
@@ -142,7 +142,7 @@ accelerate launch \
|
||||
--batch_size=32 \
|
||||
--num_workers=4 \
|
||||
--log_freq=20 \
|
||||
--eval_freq=-1 \
|
||||
--env_eval_freq=-1 \
|
||||
--save_checkpoint=true \
|
||||
--save_freq=2000
|
||||
```
|
||||
|
||||
@@ -314,7 +314,7 @@ lerobot-train \
|
||||
--steps=30000 \
|
||||
--save_freq=1000 \
|
||||
--log_freq=100 \
|
||||
--eval_freq=1000 \
|
||||
--env_eval_freq=1000 \
|
||||
--policy.type=multi_task_dit \
|
||||
--policy.device=cuda \
|
||||
--policy.horizon=32 \
|
||||
|
||||
@@ -166,7 +166,7 @@ lerobot-train \
|
||||
--output_dir=./outputs/smolvla_robocasa_CloseFridge \
|
||||
--steps=100000 \
|
||||
--batch_size=4 \
|
||||
--eval_freq=5000 \
|
||||
--env_eval_freq=5000 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=5 \
|
||||
--save_freq=10000
|
||||
|
||||
@@ -165,7 +165,7 @@ lerobot-train \
|
||||
--output_dir=./outputs/smolvla_vlabench_primitive \
|
||||
--steps=100000 \
|
||||
--batch_size=4 \
|
||||
--eval_freq=5000 \
|
||||
--env_eval_freq=5000 \
|
||||
--eval.batch_size=1 \
|
||||
--eval.n_episodes=1 \
|
||||
--save_freq=10000
|
||||
|
||||
@@ -100,7 +100,8 @@ class TrainPipelineConfig(HubMixin):
|
||||
prefetch_factor: int = 4
|
||||
persistent_workers: bool = True
|
||||
steps: int = 100_000
|
||||
eval_freq: int = 20_000
|
||||
# Run policy in the simulation environment every N steps to measure reward/success (0 = disabled).
|
||||
env_eval_freq: int = 20_000
|
||||
log_freq: int = 200
|
||||
tolerance_s: float = 1e-4
|
||||
save_checkpoint: bool = True
|
||||
|
||||
@@ -256,7 +256,7 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
|
||||
# On real-world data, no need to create an environment as evaluations are done outside train.py,
|
||||
# using the eval.py instead, with gym_dora environment and dora-rs.
|
||||
eval_env = None
|
||||
if cfg.eval_freq > 0 and cfg.env is not None and is_main_process:
|
||||
if cfg.env_eval_freq > 0 and cfg.env is not None and is_main_process:
|
||||
logging.info("Creating env")
|
||||
eval_env = make_env(cfg.env, n_envs=cfg.eval.batch_size, use_async_envs=cfg.eval.use_async_envs)
|
||||
|
||||
@@ -534,7 +534,7 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
|
||||
train_tracker.step()
|
||||
is_log_step = cfg.log_freq > 0 and step % cfg.log_freq == 0
|
||||
is_saving_step = step % cfg.save_freq == 0 or step == cfg.steps
|
||||
is_eval_step = cfg.eval_freq > 0 and step % cfg.eval_freq == 0
|
||||
is_env_eval_step = cfg.env_eval_freq > 0 and step % cfg.env_eval_freq == 0
|
||||
|
||||
if is_log_step:
|
||||
# Collective reduce must run on every rank, before the main-process gate below.
|
||||
@@ -579,7 +579,7 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
|
||||
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
if cfg.env and is_eval_step:
|
||||
if cfg.env and is_env_eval_step:
|
||||
if is_main_process:
|
||||
step_id = get_step_identifier(step, cfg.steps)
|
||||
logging.info(f"Eval policy at step {step}")
|
||||
|
||||
@@ -134,7 +134,7 @@ class TestMultiGPUTraining:
|
||||
f"--output_dir={output_dir}",
|
||||
"--batch_size=4",
|
||||
"--steps=10",
|
||||
"--eval_freq=-1",
|
||||
"--env_eval_freq=-1",
|
||||
"--log_freq=5",
|
||||
"--save_freq=10",
|
||||
"--seed=42",
|
||||
@@ -177,7 +177,7 @@ class TestMultiGPUTraining:
|
||||
f"--output_dir={output_dir}",
|
||||
"--batch_size=4",
|
||||
"--steps=20",
|
||||
"--eval_freq=-1",
|
||||
"--env_eval_freq=-1",
|
||||
"--log_freq=5",
|
||||
"--save_freq=10",
|
||||
"--seed=42",
|
||||
|
||||
Reference in New Issue
Block a user