mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 00:59:46 +00:00
less video prefetch
This commit is contained in:
@@ -61,7 +61,7 @@ class RLearNConfig(PreTrainedConfig):
|
||||
use_tanh_head: bool = False # when True, bound outputs in [-1, 1]
|
||||
|
||||
# Training
|
||||
learning_rate: float = 3e-5
|
||||
learning_rate: float = 1e-4
|
||||
weight_decay: float = 0.01
|
||||
|
||||
# Performance optimizations
|
||||
|
||||
@@ -82,15 +82,17 @@ _ Open X-Embodiment (OXE)
|
||||
- benchmark siglip 2 vs this implementation forward pass, debug speed [x]
|
||||
- use siglip 2 [x]
|
||||
- Overfit on one episode []
|
||||
- Cleanup code? []
|
||||
- Cleanup code? [] + enable language loss
|
||||
- Convert python -m lerobot.datasets.v21.convert_dataset_v20_to_v21 --repo-id=IPEC-COMMUNITY/bc_z_lerobot and train on 1 percent
|
||||
- Then on 10 percent
|
||||
- Ablation dino v2 vs dino v3 base 86 M
|
||||
- Then on 10 percent []
|
||||
- Ablation 16 sucessive frame vs 16 frame samples with stride 2 or 4 []
|
||||
- Add more artificial text to dataset generated by vlm (google gemini) []
|
||||
- See google gemini vlm caption [] https://gemini.google.com/app/7e332ffaf32580f2
|
||||
- Multiple captions per video, creat method to generate as much data as possible etc [] https://arxiv.org/abs/2508.13446, https://arxiv.org/pdf/2412.04453
|
||||
- How can we improve spatial aware learning? solve issue of Contrastive learning and position
|
||||
- Extend evaluation []
|
||||
- Add other datasets from OXE metioned in rewind []
|
||||
- Extend evaluation []
|
||||
- Ablation for size vision encoder, language encoder, temporal head []
|
||||
- Add other datasets metnioned here []
|
||||
- How can we improve spatial aware learning? solve issue of Contrastive learning and position []
|
||||
|
||||
|
||||
|
||||
@@ -258,7 +258,7 @@ def train(cfg: TrainPipelineConfig):
|
||||
pin_memory=device.type == "cuda",
|
||||
drop_last=False,
|
||||
persistent_workers=cfg.num_workers > 0, # Keep workers alive between epochs
|
||||
prefetch_factor=12, # Maximum prefetch for video pipeline
|
||||
prefetch_factor=6, # Maximum prefetch for video pipeline
|
||||
timeout=30, # Prevent hanging on video decode errors
|
||||
)
|
||||
dl_iter = cycle(dataloader)
|
||||
|
||||
Reference in New Issue
Block a user