add eos token in tokenizer, working

This commit is contained in:
Jade Choghari
2025-12-14 14:54:07 +00:00
parent 522396a15a
commit fddd044306
6 changed files with 107 additions and 12 deletions
+4 -1
View File
@@ -19,8 +19,10 @@ pre_processor, post_processor = make_pre_post_processors(
pretrained_path="/fsx/jade_choghari/outputs/pi0_training_new/checkpoints/last/pretrained_model", pretrained_path="/fsx/jade_choghari/outputs/pi0_training_new/checkpoints/last/pretrained_model",
) )
delta_timestamps = {'action': [0.0, 0.03333333333333333, 0.06666666666666667, 0.1, 0.13333333333333333, 0.16666666666666666, 0.2, 0.23333333333333334, 0.26666666666666666, 0.3, 0.3333333333333333, 0.36666666666666664, 0.4, 0.43333333333333335, 0.4666666666666667, 0.5, 0.5333333333333333, 0.5666666666666667, 0.6, 0.6333333333333333, 0.6666666666666666, 0.7, 0.7333333333333333, 0.7666666666666667, 0.8, 0.8333333333333334, 0.8666666666666667, 0.9, 0.9333333333333333, 0.9666666666666667, 1.0, 1.0333333333333334, 1.0666666666666667, 1.1, 1.1333333333333333, 1.1666666666666667, 1.2, 1.2333333333333334, 1.2666666666666666, 1.3, 1.3333333333333333, 1.3666666666666667, 1.4, 1.4333333333333333, 1.4666666666666666, 1.5, 1.5333333333333334, 1.5666666666666667, 1.6, 1.6333333333333333]}
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/outputs/pgen_annotations1", delta_timestamps=delta_timestamps)
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/outputs/pgen_annotations1")
# rename map --rename_map='{ # rename map --rename_map='{
# "observation.images.side": "observation.images.base_0_rgb", # "observation.images.side": "observation.images.base_0_rgb",
# "observation.images.up": "observation.images.left_wrist_0_rgb" # "observation.images.up": "observation.images.left_wrist_0_rgb"
@@ -45,6 +47,7 @@ dataloader = torch.utils.data.DataLoader(
batch = next(iter(dataloader)) batch = next(iter(dataloader))
batch = pre_processor(batch) batch = pre_processor(batch)
breakpoint()
policy.train() policy.train()
# run inference # run inference
# action = policy.select_action(batch) # action = policy.select_action(batch)
+1 -1
View File
@@ -7,4 +7,4 @@ python examples/dataset/annotate.py \
--repo-id lerobot/svla_so101_pickplace \ --repo-id lerobot/svla_so101_pickplace \
--video-key observation.images.side \ --video-key observation.images.side \
--model Qwen/Qwen3-VL-30B-A3B-Instruct \ --model Qwen/Qwen3-VL-30B-A3B-Instruct \
--episodes 3 5 7 44 --episodes 5
+64
View File
@@ -0,0 +1,64 @@
Fine tune output
(Pdb) images[2].mean()
tensor(-1., device='cuda:0')
(Pdb) images[1].mean()
tensor(-0.5780, device='cuda:0')
(Pdb) images[0].mean()
tensor(-0.7716, device='cuda:0')
(Pdb) (Pdb) high_level_task[0]
tensor([ 2, 7978, 2403, 6911, 235292, 5651, 3124, 573, 18571,
7762, 6643, 573, 9010, 72993, 21810, 4894, 3040, 235292,
235248, 235274, 235274, 235274, 728, 235274, 235248, 235284, 235308,
235308, 235248, 235274, 235318, 235315, 235248, 235274, 235310, 235318,
235248, 235284, 235318, 235248, 235274, 235284, 235321, 235248, 235274,
235284, 235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284,
235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321,
235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321, 235248,
235274, 235284, 235321, 235248, 235274, 235284, 235321, 235248, 235274,
235284, 235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284,
235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321,
235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321, 235248,
235274, 235284, 235321, 235248, 235274, 235284, 235321, 235248, 235274,
235284, 235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284,
235321, 235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321,
235248, 235274, 235284, 235321, 235248, 235274, 235284, 235321, 235289,
4284, 8277, 235292, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], device='cuda:0')
(Pdb) subtask_tokens[0]
tensor([ 2, 28040, 7762, 14574, 6643, 9010, 37901, 21810, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
device='cuda:0')
(Pdb) actions.shape
torch.Size([4, 50, 32])
(Pdb) actions.mean()
tensor(0.0143, device='cuda:0')
(Pdb)
Inference:
+4 -7
View File
@@ -756,7 +756,7 @@ class PI05Pytorch(nn.Module): # see openpi `PI0Pytorch`
time_expanded = time[:, None, None] time_expanded = time[:, None, None]
x_t = time_expanded * noise + (1 - time_expanded) * actions x_t = time_expanded * noise + (1 - time_expanded) * actions
u_t = noise - actions u_t = noise - actions
# Embed prefix (images + high_level_task + subtask_tokens) # Embed prefix (images + high_level_task + subtask_tokens)
# Use high_level_task (prompt WITHOUT subtask) + subtask_tokens to predict # Use high_level_task (prompt WITHOUT subtask) + subtask_tokens to predict
prefix_embs, prefix_pad_masks, prefix_att_masks, total_T_images = self.embed_prefix( prefix_embs, prefix_pad_masks, prefix_att_masks, total_T_images = self.embed_prefix(
@@ -809,8 +809,7 @@ class PI05Pytorch(nn.Module): # see openpi `PI0Pytorch`
# Apply mask and compute mean loss over valid tokens # Apply mask and compute mean loss over valid tokens
masked_loss = loss_per_token * subtask_masks.float() masked_loss = loss_per_token * subtask_masks.float()
subtask_loss = masked_loss.sum() / subtask_masks.sum().clamp(min=1) subtask_loss = masked_loss.sum() / subtask_masks.sum().clamp(min=1)
breakpoint()
# Convert embeddings to bfloat16 if needed for the model # Convert embeddings to bfloat16 if needed for the model
if ( if (
self.paligemma_with_expert.paligemma.language_model.layers[0].self_attn.q_proj.weight.dtype self.paligemma_with_expert.paligemma.language_model.layers[0].self_attn.q_proj.weight.dtype
@@ -912,7 +911,6 @@ class PI05Pytorch(nn.Module): # see openpi `PI0Pytorch`
# Embed the generated token and append to prefix # Embed the generated token and append to prefix
next_token_unsqueezed = next_token.unsqueeze(1) # (B, 1) next_token_unsqueezed = next_token.unsqueeze(1) # (B, 1)
breakpoint()
def next_token_embed_func(next_token_unsqueezed): def next_token_embed_func(next_token_unsqueezed):
next_emb = self.paligemma_with_expert.embed_language_tokens(next_token_unsqueezed) next_emb = self.paligemma_with_expert.embed_language_tokens(next_token_unsqueezed)
@@ -1419,7 +1417,7 @@ class PI05Policy(PreTrainedPolicy):
# Use high_level_task tokens (WITHOUT subtask) for inference - we'll generate the subtask # Use high_level_task tokens (WITHOUT subtask) for inference - we'll generate the subtask
high_level_task = batch[f"{OBS_LANGUAGE_HIGH_LEVEL_TASK_TOKENS}"] high_level_task = batch[f"{OBS_LANGUAGE_HIGH_LEVEL_TASK_TOKENS}"]
high_level_task_masks = batch[f"{OBS_LANGUAGE_HIGH_LEVEL_TASK_ATTENTION_MASK}"] high_level_task_masks = batch[f"{OBS_LANGUAGE_HIGH_LEVEL_TASK_ATTENTION_MASK}"]
breakpoint()
# Sample actions using the model (pass through RTC kwargs, no separate state needed for PI05) # Sample actions using the model (pass through RTC kwargs, no separate state needed for PI05)
actions = self.model.sample_actions( actions = self.model.sample_actions(
images, img_masks, high_level_task, high_level_task_masks, images, img_masks, high_level_task, high_level_task_masks,
@@ -1451,7 +1449,7 @@ class PI05Policy(PreTrainedPolicy):
valid_tokens = subtask_tokens[i][subtask_masks[i].bool()] valid_tokens = subtask_tokens[i][subtask_masks[i].bool()]
if len(valid_tokens) > 0: if len(valid_tokens) > 0:
decoded_text = self.tokenizer.decode(valid_tokens, skip_special_tokens=True) decoded_text = self.tokenizer.decode(valid_tokens, skip_special_tokens=True)
print(f"[Training] Ground truth subtask {i}: {decoded_text}") # print(f"[Training] Ground truth subtask {i}: {decoded_text}")
# Compute loss (no separate state needed for PI05) # Compute loss (no separate state needed for PI05)
# high_level_task = instruction tokens WITHOUT subtask (e.g., "High level task: X; State: Y; Subtask:") # high_level_task = instruction tokens WITHOUT subtask (e.g., "High level task: X; State: Y; Subtask:")
@@ -1461,7 +1459,6 @@ class PI05Policy(PreTrainedPolicy):
# Extract the total loss # Extract the total loss
loss = loss_dict["loss"] loss = loss_dict["loss"]
breakpoint()
# Prepare detailed loss dictionary for logging # Prepare detailed loss dictionary for logging
detailed_loss_dict = { detailed_loss_dict = {
"loss": loss.item(), "loss": loss.item(),
+33 -3
View File
@@ -302,15 +302,17 @@ class TokenizerProcessorStep(ObservationProcessorStep):
def _tokenize_text(self, text: str | list[str]) -> dict[str, torch.Tensor]: def _tokenize_text(self, text: str | list[str]) -> dict[str, torch.Tensor]:
""" """
A wrapper around the tokenizer call. A wrapper around the tokenizer call that appends an EOS token to each sequence.
Args: Args:
text: A string or list of strings to tokenize. text: A string or list of strings to tokenize.
Returns: Returns:
A dictionary containing tokenized 'input_ids' and 'attention_mask' as PyTorch tensors. A dictionary containing tokenized 'input_ids' and 'attention_mask' as PyTorch tensors,
with EOS token appended at the end of each sequence.
""" """
return self.input_tokenizer( # Tokenize normally
tokenized = self.input_tokenizer(
text, text,
max_length=self.max_length, max_length=self.max_length,
truncation=self.truncation, truncation=self.truncation,
@@ -318,6 +320,34 @@ class TokenizerProcessorStep(ObservationProcessorStep):
padding_side=self.padding_side, padding_side=self.padding_side,
return_tensors="pt", return_tensors="pt",
) )
# Get EOS token ID
eos_token_id = self.input_tokenizer.eos_token_id
if eos_token_id is None:
# Some tokenizers don't have an EOS token, skip modification
return tokenized
# Append EOS token to each sequence (before padding)
input_ids = tokenized["input_ids"]
attention_mask = tokenized["attention_mask"]
for i in range(input_ids.shape[0]):
# Find the position of the last non-padding token
non_pad_positions = (attention_mask[i] == 1).nonzero(as_tuple=True)[0]
if len(non_pad_positions) > 0:
last_token_pos = non_pad_positions[-1].item()
# Check if there's room to add EOS token
if last_token_pos + 1 < self.max_length:
# Insert EOS token after the last real token
input_ids[i, last_token_pos + 1] = eos_token_id
attention_mask[i, last_token_pos + 1] = 1
else:
# If at max length, replace the last token with EOS
input_ids[i, last_token_pos] = eos_token_id
return {"input_ids": input_ids, "attention_mask": attention_mask}
def get_config(self) -> dict[str, Any]: def get_config(self) -> dict[str, Any]:
""" """
+1
View File
@@ -90,6 +90,7 @@ def update_policy(
# Let accelerator handle mixed precision # Let accelerator handle mixed precision
with accelerator.autocast(): with accelerator.autocast():
loss, output_dict = policy.forward(batch) loss, output_dict = policy.forward(batch)
breakpoint()
# TODO(rcadene): policy.unnormalize_outputs(out_dict) # TODO(rcadene): policy.unnormalize_outputs(out_dict)
# Use accelerator's backward method # Use accelerator's backward method