mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-23 12:40:08 +00:00
less video prefetch
This commit is contained in:
@@ -61,7 +61,7 @@ class RLearNConfig(PreTrainedConfig):
|
|||||||
use_tanh_head: bool = False # when True, bound outputs in [-1, 1]
|
use_tanh_head: bool = False # when True, bound outputs in [-1, 1]
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
learning_rate: float = 3e-5
|
learning_rate: float = 1e-4
|
||||||
weight_decay: float = 0.01
|
weight_decay: float = 0.01
|
||||||
|
|
||||||
# Performance optimizations
|
# Performance optimizations
|
||||||
|
|||||||
@@ -82,15 +82,17 @@ _ Open X-Embodiment (OXE)
|
|||||||
- benchmark siglip 2 vs this implementation forward pass, debug speed [x]
|
- benchmark siglip 2 vs this implementation forward pass, debug speed [x]
|
||||||
- use siglip 2 [x]
|
- use siglip 2 [x]
|
||||||
- Overfit on one episode []
|
- Overfit on one episode []
|
||||||
- Cleanup code? []
|
- Cleanup code? [] + enable language loss
|
||||||
- Convert python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 --repo-id=IPEC-COMMUNITY/bc_z_lerobot and train on 1 percent
|
- Convert python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 --repo-id=IPEC-COMMUNITY/bc_z_lerobot and train on 1 percent
|
||||||
- Then on 10 percent
|
- Then on 10 percent []
|
||||||
- Ablation dino v2 vs dino v3 base 86 M
|
- Ablation 16 sucessive frame vs 16 frame samples with stride 2 or 4 []
|
||||||
- Add more artificial text to dataset generated by vlm (google gemini) []
|
- Add more artificial text to dataset generated by vlm (google gemini) []
|
||||||
- See google gemini vlm caption [] https://gemini.google.com/app/7e332ffaf32580f2
|
- See google gemini vlm caption [] https://gemini.google.com/app/7e332ffaf32580f2
|
||||||
- Multiple captions per video, creat method to generate as much data as possible etc [] https://arxiv.org/abs/2508.13446, https://arxiv.org/pdf/2412.04453
|
- Multiple captions per video, creat method to generate as much data as possible etc [] https://arxiv.org/abs/2508.13446, https://arxiv.org/pdf/2412.04453
|
||||||
- How can we improve spatial aware learning? solve issue of Contrastive learning and position
|
|
||||||
- Extend evaluation []
|
|
||||||
- Add other datasets from OXE metioned in rewind []
|
- Add other datasets from OXE metioned in rewind []
|
||||||
|
- Extend evaluation []
|
||||||
- Ablation for size vision encoder, language encoder, temporal head []
|
- Ablation for size vision encoder, language encoder, temporal head []
|
||||||
- Add other datasets metnioned here []
|
- Add other datasets metnioned here []
|
||||||
|
- How can we improve spatial aware learning? solve issue of Contrastive learning and position []
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -258,7 +258,7 @@ def train(cfg: TrainPipelineConfig):
|
|||||||
pin_memory=device.type == "cuda",
|
pin_memory=device.type == "cuda",
|
||||||
drop_last=False,
|
drop_last=False,
|
||||||
persistent_workers=cfg.num_workers > 0, # Keep workers alive between epochs
|
persistent_workers=cfg.num_workers > 0, # Keep workers alive between epochs
|
||||||
prefetch_factor=12, # Maximum prefetch for video pipeline
|
prefetch_factor=6, # Maximum prefetch for video pipeline
|
||||||
timeout=30, # Prevent hanging on video decode errors
|
timeout=30, # Prevent hanging on video decode errors
|
||||||
)
|
)
|
||||||
dl_iter = cycle(dataloader)
|
dl_iter = cycle(dataloader)
|
||||||
|
|||||||
Reference in New Issue
Block a user