From 589788e760b6941b134baaba33e2613e169ee6bb Mon Sep 17 00:00:00 2001 From: Jade Choghari Date: Sun, 16 Nov 2025 11:22:05 +0100 Subject: [PATCH] more eval fixes --- src/lerobot/scripts/lerobot_eval.py | 88 ++++++++++++++++------------- test_3.py | 3 +- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/src/lerobot/scripts/lerobot_eval.py b/src/lerobot/scripts/lerobot_eval.py index 3ef5ffad0..aff733c34 100644 --- a/src/lerobot/scripts/lerobot_eval.py +++ b/src/lerobot/scripts/lerobot_eval.py @@ -45,7 +45,17 @@ Note that in both examples, the repo/folder should contain at least `config.json You can learn about the CLI options for this script in the `EvalPipelineConfig` in lerobot/configs/eval.py """ +import sys +import os +# ABSOLUTE PATH TO YOUR PROJECT ROOT +PROJECT_ROOT = "/home/jade_choghari/robot/lerobot" + +# Add root to sys.path BEFORE any imports +if PROJECT_ROOT not in sys.path: + sys.path.insert(0, PROJECT_ROOT) +from xvla.models.modeling_xvla import XVLA +from xvla.models.processing_xvla import XVLAProcessor import concurrent.futures as cf import json import logging @@ -155,14 +165,11 @@ def rollout( disable=inside_slurm(), # we dont want progress bar when we use slurm, since it clutters the logs leave=False, ) - from transformers import AutoProcessor, AutoModel - model = AutoModel.from_pretrained( - "2toINF/X-VLA-WidowX", - trust_remote_code=True, - device="cuda" - ) + + model = XVLA.from_pretrained("/raid/jade/models/xvla-libero") + model.eval() model.to("cuda") - processor = AutoProcessor.from_pretrained("2toINF/X-VLA-WidowX", num_views=2, trust_remote_code=True) + processor = XVLAProcessor.from_pretrained("/raid/jade/models/xvla-libero", num_views=2) from collections import deque action_queue = deque(maxlen=30) @@ -174,40 +181,43 @@ def rollout( all_observations.append(deepcopy(observation)) # Infer "task" from attributes of environments. + observation[f"observation.images.image"] = observation[f"observation.images.image"] * 255 + observation[f"observation.images.image2"] = observation[f"observation.images.image2"] * 255 # TODO: works with SyncVectorEnv but not AsyncVectorEnv observation = add_envs_task(env, observation) - inputs = processor([observation[f"observation.images.image"], observation[f"observation.images.image2"]], observation["task"], do_rescale=False) + inputs = processor([observation[f"observation.images.image"], observation[f"observation.images.image2"]], observation["task"]) observation = preprocessor(observation) - observation["observation.images.image"] = inputs["image_input"][:, 0, ...].to("cuda") - observation["observation.images.image2"] = inputs["image_input"][:, 1, ...].to("cuda") - observation["observation.language.tokens"] = inputs["input_ids"].to("cuda") + inputs_1 = policy._build_model_inputs(observation) + for k in inputs.keys() & inputs_1.keys(): # intersection of keys + a = inputs[k].to("cuda") + b = inputs_1[k].to("cuda") - # (Pdb) inputs.keys() - # dict_keys(['input_ids', 'image_input', 'image_mask', 'proprio', 'domain_id']) - # image_input should be torch.Size([1, 2, 3, 224, 224]) - img0 = observation["observation.images.image"] # [1, 3, 224, 224] - img1 = observation["observation.images.image2"] # [1, 3, 224, 224] - img0 = img0.unsqueeze(1) # [1, 1, 3, 224, 224] - img1 = img1.unsqueeze(1) # [1, 1, 3, 224, 224] - obs = {} - obs['input_ids'] = observation["observation.language.tokens"].to("cuda") - obs['image_input'] = torch.cat([img0, img1], dim=1).to("cuda") - obs['domain_id'] = torch.tensor([int(3)], dtype=torch.long).to("cuda") - obs['proprio'] = observation["observation.state"].to("cuda") - obs['image_mask'] = inputs["image_mask"].to("cuda") + print(f"\nšŸ”Ž Key: {k}") - with torch.inference_mode(): - action_1 = policy.select_action(observation).to("cpu").numpy() - if len(action_queue) == 0: - action = model.generate_actions(**obs, steps=10) # shape (1, 30, 20) - actions_np = action.detach().cpu().numpy() - # add each timestep as (1, 20) - for t in range(actions_np.shape[1]): - act_t = actions_np[:, t, :] - action_queue.append(act_t) - action = action_queue.popleft() + # Check shape + print(" shape:", a.shape, b.shape) + + # Check if close + if torch.allclose(a, b, atol=1e-5, rtol=1e-5): + print(" āœ”ļø tensors are equal (allclose)") else: - action = action_queue.popleft() + diff = torch.abs(a - b) + print(" āŒ tensors differ") + print(" max diff:", diff.max().item()) + print(" mean diff:", diff.mean().item()) + breakpoint() + with torch.inference_mode(): + action = policy.select_action(observation).to("cpu").numpy() + # if len(action_queue) == 0: + # action = model.generate_actions(**inputs_1, steps=10) # shape (1, 30, 20) + # actions_np = action.detach().cpu().numpy() + # # add each timestep as (1, 20) + # for t in range(actions_np.shape[1]): + # act_t = actions_np[:, t, :] + # action_queue.append(act_t) + # action = action_queue.popleft() + # else: + # action = action_queue.popleft() # action = postprocessor(action) # breakpoint() # .to("cpu").numpy() @@ -216,10 +226,10 @@ def rollout( target_act = action[:, 9:10] action_numpy = np.concatenate([target_eef, target_axis, target_act], axis=-1) - target_eef_1 = action_1[:, :3] - target_axis_1 = Rotate6D_to_AxisAngle(action_1[:, 3:9]) - target_act_1 = action_1[:, 9:10] - action_numpy_1 = np.concatenate([target_eef_1, target_axis_1, target_act_1], axis=-1) + # target_eef_1 = action_1[:, :3] + # target_axis_1 = Rotate6D_to_AxisAngle(action_1[:, 3:9]) + # target_act_1 = action_1[:, 9:10] + # action_numpy_1 = np.concatenate([target_eef_1, target_axis_1, target_act_1], axis=-1) breakpoint() # Convert to CPU / numpy. diff --git a/test_3.py b/test_3.py index 90514fb94..26a2f6c5d 100644 --- a/test_3.py +++ b/test_3.py @@ -68,6 +68,7 @@ inputs.update({ "domain_id": domain_id.to("cuda"), }) +breakpoint() for k in inputs.keys() & inputs_1.keys(): # intersection of keys a = inputs[k] b = inputs_1[k].to("cuda") @@ -100,7 +101,7 @@ action = model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().num action_1 = policy.model.generate_actions(**inputs, steps=10).squeeze(0).float().cpu().numpy() #np all close -print(np.allclose(action, action_1, atol=1e-4, rtol=1e-4)) +print(np.allclose(action, action_1, atol=1e-2, rtol=1e-2)) print("max diff:", np.max(np.abs(action - action_1))) print("mean diff:", np.mean(np.abs(action - action_1))) breakpoint() \ No newline at end of file