From d60a700d2b32590ed113d694fd87617e43506081 Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Sun, 5 Apr 2026 21:23:13 +0200 Subject: [PATCH] chore(policy): multi dit docs (#3285) * docs(policy): add libero results multi task dit + remove readme in src code * docs(policy): add hyperlink to doc file in src code * chore(style): pre-commit --- docs/source/multi_task_dit.mdx | 48 +++++++++++++++++++ src/lerobot/policies/multi_task_dit/README.md | 38 +-------------- 2 files changed, 49 insertions(+), 37 deletions(-) mode change 100644 => 120000 src/lerobot/policies/multi_task_dit/README.md diff --git a/docs/source/multi_task_dit.mdx b/docs/source/multi_task_dit.mdx index c3cced708..450d8a9f2 100644 --- a/docs/source/multi_task_dit.mdx +++ b/docs/source/multi_task_dit.mdx @@ -331,6 +331,54 @@ lerobot-train \ --wandb.project=multitask_dit ``` +## Libero Results + +``` +python -m lerobot.scripts.lerobot_train \ + --dataset.repo_id=HuggingFaceVLA/libero \ + --policy.type=multi_task_dit \ + --policy.push_to_hub=false \ + --output_dir="./outputs/multitask_dit_libero" \ + --job_name="multitask-dit-libero" \ + --wandb.enable=true \ + --wandb.project=multitask_dit_libero \ + --dataset.image_transforms.enable=true \ + --dataset.image_transforms.max_num_transforms=4 \ + --dataset.image_transforms.tfs='{"brightness":{"type":"ColorJitter","kwargs":{"brightness":[0.75,1.25]}},"contrast":{"type":"ColorJitter","kwargs":{"contrast":[0.6,1.4]}},"saturation":{"type":"ColorJitter","kwargs":{"saturation":[0.8,1.2]}},"hue":{"type":"ColorJitter","kwargs":{"hue":[-0.05,0.05]}},"sharpness":{"type":"SharpnessJitter","kwargs":{"sharpness":[0.6,1.4]}},"rotation":{"type":"RandomRotation","kwargs":{"degrees":[-5,5]}},"translation":{"type":"RandomAffine","kwargs":{"degrees":0,"translate":[0.1,0.1]}}}' \ + --dataset.video_backend=torchcodec \ + --policy.use_amp=true \ + --policy.horizon=48 \ + --policy.n_obs_steps=2 \ + --policy.use_rope=true \ + --policy.use_positional_encoding=false \ + --policy.hidden_dim=768 \ + --policy.num_layers=8 \ + --policy.num_heads=12 \ + --policy.dropout=0.1 \ + --policy.timestep_embed_dim=256 \ + --policy.objective=diffusion \ + --policy.optimizer_lr=3e-4 \ + --policy.optimizer_weight_decay=0 \ + --policy.scheduler_warmup_steps=0 \ + --policy.vision_encoder_name=openai/clip-vit-base-patch16 \ + --policy.image_resize_shape=[256,256] \ + --policy.image_crop_is_random=true \ + --policy.text_encoder_name=openai/clip-vit-base-patch16 \ + --policy.vision_encoder_lr_multiplier=0.1 \ + --policy.device=cuda \ + --num_workers=8 \ + --save_freq=4000 \ + --log_freq=100 \ + --steps=100000 \ + --batch_size=320 +``` + +Results: + +| LIBERO Spatial | LIBERO Object | LIBERO Goal | LIBERO 10 | Average | +| -------------- | ------------- | ----------- | --------- | ------- | +| 87.0 | 98.2 | 93.8 | 83.2 | 90.6 | + ## References For more details on the technical implementation and architecture, see: diff --git a/src/lerobot/policies/multi_task_dit/README.md b/src/lerobot/policies/multi_task_dit/README.md deleted file mode 100644 index f24fa927e..000000000 --- a/src/lerobot/policies/multi_task_dit/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# Multitask DiT Policy - -## Citation - -If you use this work, please cite the following works: - -```bibtex -@misc{jones2025multitaskditpolicy, - author = {Bryson Jones}, - title = {Dissecting and Open-Sourcing Multitask Diffusion Transformer Policy}, - year = {2025}, - url = {https://brysonkjones.substack.com/p/dissecting-and-open-sourcing-multitask-diffusion-transformer-policy}, - note = {Blog post} -} -``` - -```bibtex -@misc{trilbmteam2025carefulexaminationlargebehaviormodels, - author = {TRI LBM Team}, - title = {A Careful Examination of Large Behavior Models for Multitask Dexterous Manipulation}, - year = {2025}, - eprint = {arXiv:2507.05331}, - archivePrefix = {arXiv}, - primaryClass = {cs.RO}, - url = {https://arxiv.org/abs/2507.05331} -} -``` - -```bibtex -@misc{bostondynamics2025largebehaviormodelsatlas, - author = {Boston Dynamics and TRI Research Team}, - title = {Large Behavior Models and Atlas Find New Footing}, - year = {2025}, - url = {https://bostondynamics.com/blog/large-behavior-models-atlas-find-new-footing/}, - note = {Blog post} -} -``` diff --git a/src/lerobot/policies/multi_task_dit/README.md b/src/lerobot/policies/multi_task_dit/README.md new file mode 120000 index 000000000..0311278c9 --- /dev/null +++ b/src/lerobot/policies/multi_task_dit/README.md @@ -0,0 +1 @@ +../../../../docs/source/policy_multi_task_dit_README.md \ No newline at end of file