diff --git a/debug_image.png b/debug_image.png new file mode 100644 index 000000000..c2fe742c8 Binary files /dev/null and b/debug_image.png differ diff --git a/examples/dataset/load_libero.py b/examples/dataset/load_libero.py new file mode 100644 index 000000000..80e49c36a --- /dev/null +++ b/examples/dataset/load_libero.py @@ -0,0 +1,18 @@ +import torch +from huggingface_hub import HfApi + +import lerobot +from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata + +dataset = LeRobotDataset(repo_id="lerobot/libero") + +dataloader = torch.utils.data.DataLoader( + dataset, + num_workers=0, + batch_size=4, + shuffle=True, +) +batch = next(iter(dataloader)) +print(batch.keys()) + +breakpoint() diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index a6d8f7e3b..daf0e3b74 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -162,7 +162,7 @@ class LeRobotDatasetMetadata: self.info = load_info(self.root) check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION) self.tasks = load_tasks(self.root) - self.tasks_high_level = load_tasks_high_level(self.root) + # self.tasks_high_level = load_tasks_high_level(self.root) self.episodes = load_episodes(self.root) self.stats = load_stats(self.root) diff --git a/src/lerobot/policies/pi05/modeling_pi05.py b/src/lerobot/policies/pi05/modeling_pi05.py index acaccd660..b278954d5 100644 --- a/src/lerobot/policies/pi05/modeling_pi05.py +++ b/src/lerobot/policies/pi05/modeling_pi05.py @@ -1438,7 +1438,7 @@ class PI05Pytorch(nn.Module): # see openpi `PI0Pytorch` # Apply mask and compute mean loss masked_fast_loss = fast_loss_per_token * fast_action_masks.float() fast_loss = masked_fast_loss.sum() / fast_action_masks.sum().clamp(min=1) - + breakpoint() return { "fast_loss": fast_loss, "loss": fast_loss, diff --git a/src/lerobot/policies/pi05/processor_pi05.py b/src/lerobot/policies/pi05/processor_pi05.py index 3ef6b093b..63feda122 100644 --- a/src/lerobot/policies/pi05/processor_pi05.py +++ b/src/lerobot/policies/pi05/processor_pi05.py @@ -102,7 +102,7 @@ class Pi05PrepareStateAndLanguageTokenizerProcessorStep(ProcessorStep): full_prompt = f"High level task: {cleaned_high_level_task}; State: {state_str}; Subtask: {cleaned_text}" else: full_prompt = f"Task: {cleaned_text}, State: {state_str};\nAction: " - + low_level_prompts.append(full_prompt) transition[TransitionKey.COMPLEMENTARY_DATA][self.task_key] = low_level_prompts diff --git a/src/lerobot/policies/pi05/train.sh b/src/lerobot/policies/pi05/train.sh index ef448f328..004a040fd 100644 --- a/src/lerobot/policies/pi05/train.sh +++ b/src/lerobot/policies/pi05/train.sh @@ -2,7 +2,7 @@ export CUDA_LAUNCH_BLOCKING=1 lerobot-train \ --dataset.repo_id=local \ --dataset.root=/fsx/jade_choghari/outputs/collect-data-pgen \ - --output_dir=/fsx/jade_choghari/outputs/pi0_fast_fruit1 \ + --output_dir=/fsx/jade_choghari/outputs/pi0_fast_fruit2 \ --job_name=pi0_training \ --policy.repo_id=jade_choghari/pi0-base1 \ --policy.path=lerobot/pi05_base \ @@ -14,9 +14,10 @@ lerobot-train \ "observation.images.left_wrist": "observation.images.left_wrist_0_rgb", "observation.images.right_wrist": "observation.images.right_wrist_0_rgb", }' \ - --batch_size=4 \ + --batch_size=16 \ --policy.device=cuda \ - --wandb.enable=true \ - --wandb.disable_artifact=true \ - --wandb.project=pi05hi-training \ + --policy.fast_only=true \ + # --wandb.enable=true \ + # --wandb.disable_artifact=true \ + # --wandb.project=pi05hi-training \ # /fsx/jade_choghari/.cache/huggingface/lerobot/jadechoghari/collect-data \ No newline at end of file diff --git a/src/lerobot/policies/pi05/train_fast.sh b/src/lerobot/policies/pi05/train_fast.sh index 3f6b6e778..00753ef11 100644 --- a/src/lerobot/policies/pi05/train_fast.sh +++ b/src/lerobot/policies/pi05/train_fast.sh @@ -1,9 +1,6 @@ python src/lerobot/policies/pi05/train_fast_tokenizer.py \ - --repo_id "local" \ - --root "/fsx/jade_choghari/outputs/collect-data-pgen" \ - --action_horizon 16 \ - --encoded_dims "0:15" \ + --repo_id "lerobot/libero" \ --action_horizon 50 \ + --encoded_dims "0:6" \ --vocab_size 1024 \ - --scale 10.0 \ --output_dir "/fsx/jade_choghari/outputs/fast_tokenizer" diff --git a/src/lerobot/policies/pi05/train_libero.sh b/src/lerobot/policies/pi05/train_libero.sh new file mode 100644 index 000000000..f3874e958 --- /dev/null +++ b/src/lerobot/policies/pi05/train_libero.sh @@ -0,0 +1,19 @@ +export CUDA_LAUNCH_BLOCKING=1 +lerobot-train \ + --dataset.repo_id=local \ + --dataset.root=/fsx/jade_choghari/data/libero \ + --output_dir=/fsx/jade_choghari/outputs/libero_training_fast_1 \ + --job_name=libero_training_fast \ + --policy.repo_id=jade_choghari/pi05-fast-libero \ + --policy.path=/fsx/jade_choghari/models/libero-pi-fast \ + --policy.dtype=bfloat16 \ + --steps=200000 \ + --save_freq=30000 \ + --batch_size=16 \ + --policy.device=cuda \ + --policy.fast_only=true \ + --policy.gradient_checkpointing=true \ + # --wandb.enable=true \ + # --wandb.disable_artifact=true \ + # --wandb.project=pi05-libero-training \ +# /fsx/jade_choghari/.cache/huggingface/lerobot/jadechoghari/collect-data \ No newline at end of file diff --git a/src/lerobot/policies/pi05/train_multi.sh b/src/lerobot/policies/pi05/train_multi.sh index 3c8321139..1d741b69e 100644 --- a/src/lerobot/policies/pi05/train_multi.sh +++ b/src/lerobot/policies/pi05/train_multi.sh @@ -1,23 +1,16 @@ -rm -rf /fsx/jade_choghari/outputs/pi0_multi_training accelerate launch --multi_gpu --num_processes=2 \ $(which lerobot-train) \ - --dataset.repo_id=local \ - --dataset.root=/fsx/jade_choghari/outputs/collect-data-pgen \ - --output_dir=/fsx/jade_choghari/outputs/pi0_multi_training \ - --job_name=pi0_multi_training \ - --policy.repo_id=jadechoghari/pi0-base1 \ - --policy.path=lerobot/pi05_base \ + --dataset.repo_id=lerobot/libero \ + --output_dir=/fsx/jade_choghari/outputs/libero_training_fast \ + --job_name=libero_training_fast \ + --policy.repo_id=jade_choghari/pi05-fast-libero \ + --policy.path=/fsx/jade_choghari/models/libero-pi-fast \ --policy.dtype=bfloat16 \ - --steps=50000 \ - --save_freq=5000 \ - --rename_map='{ - "observation.images.base": "observation.images.base_0_rgb", - "observation.images.left_wrist": "observation.images.left_wrist_0_rgb", - "observation.images.right_wrist": "observation.images.right_wrist_0_rgb", - }' \ - --policy.gradient_checkpointing=true \ - --batch_size=1 \ - --policy.device=cpu - # --wandb.enable=true \ - # --wandb.disable_artifact=true \ - # --wandb.project=pi05hi-training \ + --steps=200000 \ + --save_freq=30000 \ + --batch_size=16 \ + --policy.device=cuda \ + --policy.fast_only=true \ + --wandb.enable=true \ + --wandb.disable_artifact=true \ + --wandb.project=pi05-libero-training \ diff --git a/src/lerobot/processor/tokenizer_processor.py b/src/lerobot/processor/tokenizer_processor.py index 59afddab1..77288ed1c 100644 --- a/src/lerobot/processor/tokenizer_processor.py +++ b/src/lerobot/processor/tokenizer_processor.py @@ -453,10 +453,11 @@ class ActionTokenizerProcessorStep(ActionProcessorStep): tokenizer_name: str | None = None tokenizer: Any | None = None trust_remote_code: bool = True - max_action_tokens: int = 32 + max_action_tokens: int = 256 # Internal tokenizer instance (not part of the config) action_tokenizer: Any = field(default=None, init=False, repr=False) - + _paligemma_tokenizer: Any = field(default=None, init=False, repr=False) + _fast_skip_tokens: int = field(default=128, init=False, repr=False) def __post_init__(self): """ Initializes the action tokenizer after the dataclass is created. @@ -488,6 +489,9 @@ class ActionTokenizerProcessorStep(ActionProcessorStep): "Either 'tokenizer' or 'tokenizer_name' must be provided. " "Pass a tokenizer object directly or a tokenizer name to auto-load." ) + + self._paligemma_tokenizer = AutoTokenizer.from_pretrained("google/paligemma-3b-pt-224", trust_remote_code=True, add_eos_token=True, add_bos_token=False) + self._fast_skip_tokens = 128 # Skip last 128 tokens in PaliGemma vocab since they are special tokens def __call__(self, transition: EnvTransition) -> EnvTransition: """ @@ -520,6 +524,11 @@ class ActionTokenizerProcessorStep(ActionProcessorStep): new_transition[TransitionKey.COMPLEMENTARY_DATA] = complementary_data return new_transition + def _act_tokens_to_paligemma_tokens(self, tokens: torch.Tensor) -> torch.Tensor: + """ + Converts action tokens to PaliGemma tokens. + """ + return self._paligemma_tokenizer.vocab_size - 1 - self._fast_skip_tokens - tokens def _tokenize_action(self, action: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: """ Tokenizes the action tensor and creates a mask. @@ -568,8 +577,14 @@ class ActionTokenizerProcessorStep(ActionProcessorStep): if tokens.dim() > 1: tokens = tokens.flatten() + tokens = torch.cat([self._act_tokens_to_paligemma_tokens(tokens), torch.tensor(self._paligemma_tokenizer.encode("|"), device=action.device)]) # Truncate or pad to max_action_tokens if len(tokens) > self.max_action_tokens: + import logging + logging.warning( + f"Token length ({len(tokens)}) exceeds max length ({self.max_action_tokens}), truncating. " + "Consider increasing the `max_token_len` in your model config if this happens frequently." + ) tokens = tokens[:self.max_action_tokens] mask = torch.ones(self.max_action_tokens, dtype=torch.bool, device=action.device) else: diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 1ebdee600..424d8368f 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -206,6 +206,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): ds_meta=dataset.meta, rename_map=cfg.rename_map, ) + # Wait for all processes to finish policy creation before continuing accelerator.wait_for_everyone()