mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 12:09:42 +00:00
update subtask annotate
This commit is contained in:
@@ -16,13 +16,14 @@ TEMPERATURE=0.9
|
|||||||
SAMPLE_INTERVAL=5.0 # generate dialogue every 1 second (all episodes processed)
|
SAMPLE_INTERVAL=5.0 # generate dialogue every 1 second (all episodes processed)
|
||||||
|
|
||||||
# Run subtask annotation
|
# Run subtask annotation
|
||||||
# python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/subtask_annotate.py \
|
python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/subtask_annotate.py \
|
||||||
# --repo-id "$REPO_ID" \
|
--repo-id "$REPO_ID" \
|
||||||
# --video-key observation.images.image \
|
--video-key observation.images.image \
|
||||||
# --output-dir "$OUTPUT_DIR" \
|
--output-dir "$OUTPUT_DIR" \
|
||||||
# --skip-existing \
|
--skip-existing \
|
||||||
# --output-repo-id "jadechoghari/libero10-annotate" \
|
--output-repo-id "jadechoghari/libero10-annotate" \
|
||||||
# --batch-size "$BATCH_SIZE" \
|
--batch-size "$BATCH_SIZE" \
|
||||||
|
|
||||||
# run synthetic data generation (all episodes processed)
|
# run synthetic data generation (all episodes processed)
|
||||||
# python examples/dataset/annotate_pgen.py \
|
# python examples/dataset/annotate_pgen.py \
|
||||||
# --repo-id "$REPO_ID" \
|
# --repo-id "$REPO_ID" \
|
||||||
@@ -41,10 +42,10 @@ SAMPLE_INTERVAL=5.0 # generate dialogue every 1 second (all episodes processed)
|
|||||||
# add --push-to-hub flag
|
# add --push-to-hub flag
|
||||||
|
|
||||||
# efficient batch processing: 4 episodes at once
|
# efficient batch processing: 4 episodes at once
|
||||||
python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/high_level_annotate.py \
|
# python /admin/home/jade_choghari/lerobot/src/lerobot/policies/pi05_full/annotate/high_level_annotate.py \
|
||||||
--data-dir "/fsx/jade_choghari/outputs/libero-10-annotate" \
|
# --data-dir "/fsx/jade_choghari/outputs/libero-10-annotate" \
|
||||||
--output-dir "$OUTPUT_DIR" \
|
# --output-dir "$OUTPUT_DIR" \
|
||||||
--video-mode \
|
# --video-mode \
|
||||||
--video-key observation.images.image \
|
# --video-key observation.images.image \
|
||||||
--video-batch-size "$BATCH_SIZE" \
|
# --video-batch-size "$BATCH_SIZE" \
|
||||||
--sample-interval 5.0
|
# --sample-interval 5.0
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
|
|||||||
import lerobot
|
import lerobot
|
||||||
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
from lerobot.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
|
||||||
|
|
||||||
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/.cache/huggingface/lerobot/lerobot/libero_10/")
|
dataset = LeRobotDataset(repo_id="local", root="/fsx/jade_choghari/outputs/libero-10-annotate")
|
||||||
|
|
||||||
dataloader = torch.utils.data.DataLoader(
|
dataloader = torch.utils.data.DataLoader(
|
||||||
dataset,
|
dataset,
|
||||||
@@ -15,12 +15,18 @@ dataloader = torch.utils.data.DataLoader(
|
|||||||
|
|
||||||
batch = next(iter(dataloader))
|
batch = next(iter(dataloader))
|
||||||
print(batch.keys())
|
print(batch.keys())
|
||||||
print(batch['task_index_high_level'].shape)
|
# print(batch['task_index_high_level'].shape)
|
||||||
print(batch['task_index_high_level'])
|
# print(batch['task_index_high_level'])
|
||||||
print(batch['user_prompt'][0])
|
# print(batch['user_prompt'][0])
|
||||||
print(batch['robot_utterance'][0])
|
# print(batch['robot_utterance'][0])
|
||||||
print(batch['task'][0])
|
# print(batch['task'][0])
|
||||||
|
|
||||||
|
valid_episode_list = []
|
||||||
|
for episode_idx in range(len(dataset.meta.episodes)):
|
||||||
|
subtask_index = dataset[episode_idx]["subtask_index"]
|
||||||
|
valid_episode_list.append(episode_idx)
|
||||||
|
|
||||||
|
print(len(valid_episode_list))
|
||||||
|
|
||||||
# read this parquet /fsx/jade_choghari/outputs/pgen_annotations1/meta/tasks.parquett
|
# read this parquet /fsx/jade_choghari/outputs/pgen_annotations1/meta/tasks.parquett
|
||||||
# import pandas as pd
|
# import pandas as pd
|
||||||
|
|||||||
@@ -1012,8 +1012,9 @@ def create_subtask_index_array(
|
|||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
# Array to store subtask index for each frame
|
# Array to store subtask index for each frame
|
||||||
|
# Initialize with -1 to indicate unannotated frames
|
||||||
full_dataset_length = len(dataset)
|
full_dataset_length = len(dataset)
|
||||||
subtask_indices = np.zeros(full_dataset_length, dtype=np.int64)
|
subtask_indices = np.full(full_dataset_length, -1, dtype=np.int64)
|
||||||
|
|
||||||
console.print(f"[cyan]Creating subtask_index array for {full_dataset_length} frames...[/cyan]")
|
console.print(f"[cyan]Creating subtask_index array for {full_dataset_length} frames...[/cyan]")
|
||||||
|
|
||||||
|
|||||||
@@ -54,8 +54,8 @@ class Pi05FullPrepareStateTokenizerProcessorStep(ProcessorStep):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
max_state_dim: int = 32
|
max_state_dim: int = 32
|
||||||
user_prompt_key: str = "user_prompt"
|
user_prompt_key: str = "task"
|
||||||
command_key: str = "task"
|
command_key: str = "subtask"
|
||||||
|
|
||||||
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
def __call__(self, transition: EnvTransition) -> EnvTransition:
|
||||||
transition = transition.copy()
|
transition = transition.copy()
|
||||||
@@ -85,7 +85,7 @@ class Pi05FullPrepareStateTokenizerProcessorStep(ProcessorStep):
|
|||||||
for i, user_prompt in enumerate(user_prompts):
|
for i, user_prompt in enumerate(user_prompts):
|
||||||
cleaned_text = user_prompt.strip().replace("_", " ").replace("\n", " ")
|
cleaned_text = user_prompt.strip().replace("_", " ").replace("\n", " ")
|
||||||
state_str = " ".join(map(str, discretized_states[i]))
|
state_str = " ".join(map(str, discretized_states[i]))
|
||||||
full_prompt = f"User prompt: {cleaned_text}, State: {state_str};\n"
|
full_prompt = f"Task: {cleaned_text}, State: {state_str};\n"
|
||||||
full_prompts.append(full_prompt)
|
full_prompts.append(full_prompt)
|
||||||
|
|
||||||
transition[TransitionKey.COMPLEMENTARY_DATA][self.user_prompt_key] = full_prompts
|
transition[TransitionKey.COMPLEMENTARY_DATA][self.user_prompt_key] = full_prompts
|
||||||
|
|||||||
@@ -340,11 +340,21 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
|
|||||||
|
|
||||||
# create dataloader for offline training
|
# create dataloader for offline training
|
||||||
if hasattr(cfg.policy, "drop_n_last_frames"):
|
if hasattr(cfg.policy, "drop_n_last_frames"):
|
||||||
|
# loop over dataset subtask parquet file to find episode indices that don't have subtask index != -1
|
||||||
|
# valid_episode_list passed to episode_indexes_to_use
|
||||||
|
valid_episode_list = []
|
||||||
|
for episode_idx in range(len(dataset.meta.episodes)):
|
||||||
|
subtask_index = dataset[episode_idx]["subtask_index"]
|
||||||
|
if subtask_index != -1:
|
||||||
|
valid_episode_list.append(episode_idx)
|
||||||
|
|
||||||
|
episode_indices_to_use = valid_episode_list
|
||||||
|
|
||||||
shuffle = False
|
shuffle = False
|
||||||
sampler = EpisodeAwareSampler(
|
sampler = EpisodeAwareSampler(
|
||||||
dataset.meta.episodes["dataset_from_index"],
|
dataset.meta.episodes["dataset_from_index"],
|
||||||
dataset.meta.episodes["dataset_to_index"],
|
dataset.meta.episodes["dataset_to_index"],
|
||||||
episode_indices_to_use=dataset.episodes,
|
episode_indices_to_use=episode_indices_to_use,
|
||||||
drop_n_last_frames=cfg.policy.drop_n_last_frames,
|
drop_n_last_frames=cfg.policy.drop_n_last_frames,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user