mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-16 09:09:48 +00:00
update subtask annotate
This commit is contained in:
@@ -16,13 +16,14 @@ TEMPERATURE=0.9
|
||||
SAMPLE_INTERVAL=5.0 # generate dialogue every 1 second (all episodes processed)
|
||||
|
||||
# Run subtask annotation
|
||||
# python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/subtask_annotate.py \
|
||||
# --repo-id "$REPO_ID" \
|
||||
# --video-key observation.images.image \
|
||||
# --output-dir "$OUTPUT_DIR" \
|
||||
# --skip-existing \
|
||||
# --output-repo-id "jadechoghari/libero10-annotate" \
|
||||
# --batch-size "$BATCH_SIZE" \
|
||||
python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/subtask_annotate.py \
|
||||
--repo-id "$REPO_ID" \
|
||||
--video-key observation.images.image \
|
||||
--output-dir "$OUTPUT_DIR" \
|
||||
--skip-existing \
|
||||
--output-repo-id "jadechoghari/libero10-annotate" \
|
||||
--batch-size "$BATCH_SIZE" \
|
||||
|
||||
# run synthetic data generation (all episodes processed)
|
||||
# python examples/dataset/annotate_pgen.py \
|
||||
# --repo-id "$REPO_ID" \
|
||||
@@ -41,10 +42,10 @@ SAMPLE_INTERVAL=5.0 # generate dialogue every 1 second (all episodes processed)
|
||||
# add --push-to-hub flag
|
||||
|
||||
# efficient batch processing: 4 episodes at once
|
||||
python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/high_level_annotate.py \
|
||||
--data-dir "/fsx/jade_choghari/outputs/libero-10-annotate" \
|
||||
--output-dir "$OUTPUT_DIR" \
|
||||
--video-mode \
|
||||
--video-key observation.images.image \
|
||||
--video-batch-size "$BATCH_SIZE" \
|
||||
--sample-interval 5.0
|
||||
# python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/high_level_annotate.py \
|
||||
# --data-dir "/fsx/jade_choghari/outputs/libero-10-annotate" \
|
||||
# --output-dir "$OUTPUT_DIR" \
|
||||
# --video-mode \
|
||||
# --video-key observation.images.image \
|
||||
# --video-batch-size "$BATCH_SIZE" \
|
||||
# --sample-interval 5.0
|
||||
|
||||
@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
|
||||
import lerobot
|
||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||
|
||||
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/.cache/huggingface/lerobot/lerobot/libero_10/")
|
||||
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/outputs/libero-10-annotate")
|
||||
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
@@ -15,12 +15,18 @@ dataloader = torch.utils.data.DataLoader(
|
||||
|
||||
batch = next(iter(dataloader))
|
||||
print(batch.keys())
|
||||
print(batch['task_index_high_level'].shape)
|
||||
print(batch['task_index_high_level'])
|
||||
print(batch['user_prompt'][0])
|
||||
print(batch['robot_utterance'][0])
|
||||
print(batch['task'][0])
|
||||
# print(batch['task_index_high_level'].shape)
|
||||
# print(batch['task_index_high_level'])
|
||||
# print(batch['user_prompt'][0])
|
||||
# print(batch['robot_utterance'][0])
|
||||
# print(batch['task'][0])
|
||||
|
||||
valid_episode_list = []
|
||||
for episode_idx in range(len(dataset.meta.episodes)):
|
||||
subtask_index = dataset[episode_idx]["subtask_index"]
|
||||
valid_episode_list.append(episode_idx)
|
||||
|
||||
print(len(valid_episode_list))
|
||||
|
||||
# read this parquet /fsx/jade_choghari/outputs/pgen_annotations1/meta/tasks.parquett
|
||||
# import pandas as pd
|
||||
|
||||
@@ -1012,8 +1012,9 @@ def create_subtask_index_array(
|
||||
console = Console()
|
||||
|
||||
# Array to store subtask index for each frame
|
||||
# Initialize with -1 to indicate unannotated frames
|
||||
full_dataset_length = len(dataset)
|
||||
subtask_indices = np.zeros(full_dataset_length, dtype=np.int64)
|
||||
subtask_indices = np.full(full_dataset_length, -1, dtype=np.int64)
|
||||
|
||||
console.print(f"[cyan]Creating subtask_index array for {full_dataset_length} frames...[/cyan]")
|
||||
|
||||
|
||||
@@ -54,8 +54,8 @@ class Pi05FullPrepareStateTokenizerProcessorStep(ProcessorStep):
|
||||
"""
|
||||
|
||||
max_state_dim: int = 32
|
||||
user_prompt_key: str = "user_prompt"
|
||||
command_key: str = "task"
|
||||
user_prompt_key: str = "task"
|
||||
command_key: str = "subtask"
|
||||
|
||||
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
||||
transition = transition.copy()
|
||||
@@ -85,7 +85,7 @@ class Pi05FullPrepareStateTokenizerProcessorStep(ProcessorStep):
|
||||
for i, user_prompt in enumerate(user_prompts):
|
||||
cleaned_text = user_prompt.strip().replace("_", " ").replace("\n", " ")
|
||||
state_str = " ".join(map(str, discretized_states[i]))
|
||||
full_prompt = f"User prompt: {cleaned_text}, State: {state_str};\n"
|
||||
full_prompt = f"Task: {cleaned_text}, State: {state_str};\n"
|
||||
full_prompts.append(full_prompt)
|
||||
|
||||
transition[TransitionKey.COMPLEMENTARY_DATA][self.user_prompt_key] = full_prompts
|
||||
|
||||
@@ -340,11 +340,21 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
||||
|
||||
# create dataloader for offline training
|
||||
if hasattr(cfg.policy, "drop_n_last_frames"):
|
||||
# loop over dataset subtask parquet file to find episode indices that don't have subtask index != -1
|
||||
# valid_episode_list passed to episode_indexes_to_use
|
||||
valid_episode_list = []
|
||||
for episode_idx in range(len(dataset.meta.episodes)):
|
||||
subtask_index = dataset[episode_idx]["subtask_index"]
|
||||
if subtask_index != -1:
|
||||
valid_episode_list.append(episode_idx)
|
||||
|
||||
episode_indices_to_use = valid_episode_list
|
||||
|
||||
shuffle = False
|
||||
sampler = EpisodeAwareSampler(
|
||||
dataset.meta.episodes["dataset_from_index"],
|
||||
dataset.meta.episodes["dataset_to_index"],
|
||||
episode_indices_to_use=dataset.episodes,
|
||||
episode_indices_to_use=episode_indices_to_use,
|
||||
drop_n_last_frames=cfg.policy.drop_n_last_frames,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user