diff --git a/examples/unitree_g1/motion_loader.py b/examples/unitree_g1/motion_loader.py new file mode 100644 index 000000000..c4c7e6267 --- /dev/null +++ b/examples/unitree_g1/motion_loader.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Load an SMPL motion clip and expose it in SONIC's encoder format. + +SONIC's whole-body tracking mode (``encode_mode == 2``) consumes a flat +720-vector ``smpl_joints_10frame_step1`` = 10 consecutive frames x 24 SMPL +joints x 3 (xyz) at 50 Hz. + +IMPORTANT - frame convention: the encoder expects each frame's joints with the +body's *root orientation removed* (per-frame canonical), exactly like the live +deploy stream's ``smpl_joints_local`` (see ``process_smpl_joints`` in the GEAR +PICO teleop and ``smpl_joints_multi_future_local`` in training). The reference +``smpl_filtered`` clips instead store **world-frame** joints (heading retained), +so feeding them raw makes the robot move but mis-track / never face-forward. +This loader therefore canonicalizes on load using the clip's per-frame root +orientation (``pose_aa[:, :3]``): + + A = Rx(+90deg) * rotvec(pose_aa[:, :3]) # y-up -> z-up root quat + local = base120 * A^-1 * joints # remove root orient + +with ``base120 = quat(0.5,0.5,0.5,0.5)`` (SMPL base rotation). This reproduces +the deployed transform (verified: per-frame hip-heading std -> 0). + +Clip is read from a numpy ``.npz``. Expected keys: + smpl_joints : (T, 24, 3) float32 -- world-frame joint positions, 50 fps + pose_aa : (T, 72) float32 -- SMPL axis-angle (root = [:, :3]) + transl : (T, 3) float32 -- global root translation (optional) + fps : scalar + +Example: + python examples/unitree_g1/motion_loader.py \ + --motion examples/unitree_g1/motions/walk_forward.npz +""" + +import argparse + +import numpy as np + +WINDOW = 10 # frames per encoder window (smpl_joints_10frame_step1) +N_JOINTS = 24 +JOINT_DIM = 3 +SMPL_OBS_DIM = WINDOW * N_JOINTS * JOINT_DIM # 720 + + +def canonicalize_smpl_joints(smpl_joints: np.ndarray, root_aa: np.ndarray) -> np.ndarray: + """Remove per-frame root orientation -> SONIC ``smpl_joints_local`` format. + + Args: + smpl_joints: (T, 24, 3) world-frame (z-up) SMPL joint positions. + root_aa: (T, 3) SMPL global-orient axis-angle (y-up convention). + + Returns: + (T, 24, 3) per-frame root-orientation-removed joints. + """ + from scipy.spatial.transform import Rotation as R + + rx90 = R.from_euler("x", 90, degrees=True) # smpl_root_ytoz_up + base120 = R.from_quat([0.5, 0.5, 0.5, 0.5]) # remove_smpl_base_rot + a = rx90 * R.from_rotvec(root_aa) # z-up root quat (left-mult) + b_inv = base120 * a.inv() # inv(remove_smpl_base_rot(a)) + return np.einsum("tij,tkj->tki", b_inv.as_matrix(), smpl_joints).astype(np.float32) + + +class SmplMotion: + """A single SMPL clip with SONIC-format windowing.""" + + def __init__(self, path: str, loop: bool = True, canonicalize: bool = True): + data = np.load(path) + smpl_joints = data["smpl_joints"].astype(np.float32) # (T, 24, 3) + self.pose_aa = data["pose_aa"].astype(np.float32) if "pose_aa" in data.files else None + self.transl = data["transl"].astype(np.float32) if "transl" in data.files else None + self.fps = float(data["fps"]) if "fps" in data.files else 50.0 + self.loop = loop + + if smpl_joints.ndim != 3 or smpl_joints.shape[1:] != (N_JOINTS, JOINT_DIM): + raise ValueError( + f"Expected smpl_joints (T, {N_JOINTS}, {JOINT_DIM}), got {smpl_joints.shape}" + ) + + # Reference clips store world-frame joints; the encoder wants per-frame + # root-orientation-removed joints. Canonicalize when we have the root pose. + self.canonicalized = False + if canonicalize and self.pose_aa is not None: + smpl_joints = canonicalize_smpl_joints(smpl_joints, self.pose_aa[:, :3]) + self.canonicalized = True + self.smpl_joints = smpl_joints + + self.num_frames = self.smpl_joints.shape[0] + self._cursor = 0 + + def window(self, start: int) -> np.ndarray: + """Return the 720-vector for the 10-frame window beginning at ``start``. + + Frames are laid out oldest->newest, joint-major within a frame: + [f0_j0_xyz, f0_j1_xyz, ..., f9_j23_xyz]. + """ + idx = np.arange(start, start + WINDOW) + if self.loop: + idx = np.mod(idx, self.num_frames) + else: + idx = np.clip(idx, 0, self.num_frames - 1) + return self.smpl_joints[idx].reshape(-1).astype(np.float32) + + def reset(self): + self._cursor = 0 + + def step(self) -> np.ndarray: + """Advance one frame and return the current 720-vector window.""" + w = self.window(self._cursor) + self._cursor += 1 + if self.loop: + self._cursor %= self.num_frames + return w + + @property + def done(self) -> bool: + return (not self.loop) and (self._cursor + WINDOW >= self.num_frames) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--motion", required=True, help="Path to motion .npz") + parser.add_argument("--no-loop", action="store_true") + parser.add_argument("--no-canon", action="store_true", + help="Skip canonicalization (feed raw stored joints)") + args = parser.parse_args() + + m = SmplMotion(args.motion, loop=not args.no_loop, canonicalize=not args.no_canon) + duration = m.num_frames / m.fps + print(f"Loaded '{args.motion}'") + print(f" frames={m.num_frames} fps={m.fps:.1f} duration={duration:.1f}s") + print(f" smpl_joints={m.smpl_joints.shape} canonicalized={m.canonicalized} " + f"pose_aa={None if m.pose_aa is None else m.pose_aa.shape} " + f"transl={None if m.transl is None else m.transl.shape}") + + # Sanity: after canonicalization the per-frame body heading should be fixed. + j = m.smpl_joints + v = (j[:, 2, :2] - j[:, 1, :2]) # R_hip - L_hip, horizontal + a = np.arctan2(v[:, 1], v[:, 0]) + rlen = np.clip(np.hypot(np.cos(a).mean(), np.sin(a).mean()), 1e-9, 1.0) + circ_std = np.degrees(np.sqrt(-2 * np.log(rlen))) + print(f" hip-heading circ-std={circ_std:.1f} deg " + f"(~0 => orientation removed; large => world-frame)") + + w0 = m.window(0) + print(f" window(0): shape={w0.shape} (expected {SMPL_OBS_DIM}) " + f"min={w0.min():.3f} max={w0.max():.3f}") + assert w0.shape == (SMPL_OBS_DIM,), "window must be 720-dim for obs[922:1642]" + + # Simulate a few control ticks. + print(" stepping 5 ticks:") + for t in range(5): + w = m.step() + print(f" t={t} cursor={m._cursor} window_norm={np.linalg.norm(w):.2f}") + + print("OK: motion loads and yields SONIC-format 720-vec windows.") + + +if __name__ == "__main__": + main() diff --git a/examples/unitree_g1/motions/dance_hiphop.npz b/examples/unitree_g1/motions/dance_hiphop.npz new file mode 100644 index 000000000..7fa5e7bea Binary files /dev/null and b/examples/unitree_g1/motions/dance_hiphop.npz differ diff --git a/examples/unitree_g1/motions/dance_jackson.npz b/examples/unitree_g1/motions/dance_jackson.npz new file mode 100644 index 000000000..cad8c9d2f Binary files /dev/null and b/examples/unitree_g1/motions/dance_jackson.npz differ diff --git a/examples/unitree_g1/motions/eating_apple.npz b/examples/unitree_g1/motions/eating_apple.npz new file mode 100644 index 000000000..fc47e1e10 Binary files /dev/null and b/examples/unitree_g1/motions/eating_apple.npz differ diff --git a/examples/unitree_g1/motions/high_jump.npz b/examples/unitree_g1/motions/high_jump.npz new file mode 100644 index 000000000..84a86a14f Binary files /dev/null and b/examples/unitree_g1/motions/high_jump.npz differ diff --git a/examples/unitree_g1/motions/hiphop_ii.npz b/examples/unitree_g1/motions/hiphop_ii.npz new file mode 100644 index 000000000..40efdecf7 Binary files /dev/null and b/examples/unitree_g1/motions/hiphop_ii.npz differ diff --git a/examples/unitree_g1/motions/horse_riding.npz b/examples/unitree_g1/motions/horse_riding.npz new file mode 100644 index 000000000..d8fe0c63b Binary files /dev/null and b/examples/unitree_g1/motions/horse_riding.npz differ diff --git a/examples/unitree_g1/motions/jump_360.npz b/examples/unitree_g1/motions/jump_360.npz new file mode 100644 index 000000000..caf9af433 Binary files /dev/null and b/examples/unitree_g1/motions/jump_360.npz differ diff --git a/examples/unitree_g1/motions/mambo_kicks.npz b/examples/unitree_g1/motions/mambo_kicks.npz new file mode 100644 index 000000000..c2c23dbd8 Binary files /dev/null and b/examples/unitree_g1/motions/mambo_kicks.npz differ diff --git a/examples/unitree_g1/motions/reach_jump.npz b/examples/unitree_g1/motions/reach_jump.npz new file mode 100644 index 000000000..984bb53e1 Binary files /dev/null and b/examples/unitree_g1/motions/reach_jump.npz differ diff --git a/examples/unitree_g1/motions/turn_jump_270.npz b/examples/unitree_g1/motions/turn_jump_270.npz new file mode 100644 index 000000000..881fcb70e Binary files /dev/null and b/examples/unitree_g1/motions/turn_jump_270.npz differ diff --git a/examples/unitree_g1/motions/turn_jump_360.npz b/examples/unitree_g1/motions/turn_jump_360.npz new file mode 100644 index 000000000..e9a4ee21d Binary files /dev/null and b/examples/unitree_g1/motions/turn_jump_360.npz differ diff --git a/examples/unitree_g1/motions/victory_dance.npz b/examples/unitree_g1/motions/victory_dance.npz new file mode 100644 index 000000000..89a8aab10 Binary files /dev/null and b/examples/unitree_g1/motions/victory_dance.npz differ diff --git a/examples/unitree_g1/motions/walk_forward.npz b/examples/unitree_g1/motions/walk_forward.npz new file mode 100644 index 000000000..c64d72dcd Binary files /dev/null and b/examples/unitree_g1/motions/walk_forward.npz differ diff --git a/examples/unitree_g1/pkl_to_npz.py b/examples/unitree_g1/pkl_to_npz.py new file mode 100644 index 000000000..b7de3a69a --- /dev/null +++ b/examples/unitree_g1/pkl_to_npz.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Convert a GEAR-SONIC / BONES-SEED ``smpl_filtered`` clip (.pkl) to .npz. + +The reference clips are zlib-compressed joblib pickles holding a dict with +``pose_aa`` (T, 72), ``transl`` (T, 3), ``smpl_joints`` (T, 24, 3), ``fps``. +``motion_loader.SmplMotion`` consumes the .npz form so the runtime needs no +joblib dependency. Canonicalization (root-orientation removal) happens at load +time in ``motion_loader``, so this converter just repackages the raw arrays. + +Run this in an environment that has ``joblib`` (e.g. the sonic teleop venv): + + python examples/unitree_g1/pkl_to_npz.py \ + --pkl sample_data/smpl_filtered/walk_forward_amateur_001__A001.pkl \ + --out examples/unitree_g1/motions/walk_forward.npz +""" + +import argparse +from pathlib import Path + +import numpy as np + + +def load_pkl(path: str) -> dict: + try: + import joblib + + return joblib.load(path) + except Exception: + # joblib clips are zlib-compressed pickles; fall back to manual inflate. + import pickle + import zlib + + with open(path, "rb") as f: + raw = f.read() + try: + raw = zlib.decompress(raw) + except zlib.error: + pass + return pickle.loads(raw) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--pkl", required=True, help="Input smpl_filtered .pkl") + parser.add_argument("--out", required=True, help="Output .npz path") + args = parser.parse_args() + + d = load_pkl(args.pkl) + if not isinstance(d, dict) or "smpl_joints" not in d: + raise ValueError(f"Unexpected pkl structure; keys={list(d) if isinstance(d, dict) else type(d)}") + + smpl_joints = np.asarray(d["smpl_joints"], np.float32) + if smpl_joints.ndim != 3 or smpl_joints.shape[1:] != (24, 3): + raise ValueError(f"smpl_joints must be (T,24,3), got {smpl_joints.shape}") + + out = {"smpl_joints": smpl_joints, "fps": np.float32(d.get("fps", 50.0))} + if "pose_aa" in d: + out["pose_aa"] = np.asarray(d["pose_aa"], np.float32) + else: + print("[warn] no pose_aa -> loader cannot canonicalize (will feed raw)") + if "transl" in d: + out["transl"] = np.asarray(d["transl"], np.float32) + + Path(args.out).parent.mkdir(parents=True, exist_ok=True) + np.savez_compressed(args.out, **out) + dur = smpl_joints.shape[0] / float(out["fps"]) + print(f"Wrote {args.out}") + print(f" frames={smpl_joints.shape[0]} fps={float(out['fps']):.1f} duration={dur:.1f}s " + f"keys={sorted(out)}") + + +if __name__ == "__main__": + main()