mirror of
https://github.com/huggingface/lerobot.git
synced 2026-06-17 16:27:04 +00:00
add motion loader
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Load an SMPL motion clip and expose it in SONIC's encoder format.
|
||||
|
||||
SONIC's whole-body tracking mode (``encode_mode == 2``) consumes a flat
|
||||
720-vector ``smpl_joints_10frame_step1`` = 10 consecutive frames x 24 SMPL
|
||||
joints x 3 (xyz) at 50 Hz.
|
||||
|
||||
IMPORTANT - frame convention: the encoder expects each frame's joints with the
|
||||
body's *root orientation removed* (per-frame canonical), exactly like the live
|
||||
deploy stream's ``smpl_joints_local`` (see ``process_smpl_joints`` in the GEAR
|
||||
PICO teleop and ``smpl_joints_multi_future_local`` in training). The reference
|
||||
``smpl_filtered`` clips instead store **world-frame** joints (heading retained),
|
||||
so feeding them raw makes the robot move but mis-track / never face-forward.
|
||||
This loader therefore canonicalizes on load using the clip's per-frame root
|
||||
orientation (``pose_aa[:, :3]``):
|
||||
|
||||
A = Rx(+90deg) * rotvec(pose_aa[:, :3]) # y-up -> z-up root quat
|
||||
local = base120 * A^-1 * joints # remove root orient
|
||||
|
||||
with ``base120 = quat(0.5,0.5,0.5,0.5)`` (SMPL base rotation). This reproduces
|
||||
the deployed transform (verified: per-frame hip-heading std -> 0).
|
||||
|
||||
Clip is read from a numpy ``.npz``. Expected keys:
|
||||
smpl_joints : (T, 24, 3) float32 -- world-frame joint positions, 50 fps
|
||||
pose_aa : (T, 72) float32 -- SMPL axis-angle (root = [:, :3])
|
||||
transl : (T, 3) float32 -- global root translation (optional)
|
||||
fps : scalar
|
||||
|
||||
Example:
|
||||
python examples/unitree_g1/motion_loader.py \
|
||||
--motion examples/unitree_g1/motions/walk_forward.npz
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
import numpy as np
|
||||
|
||||
WINDOW = 10 # frames per encoder window (smpl_joints_10frame_step1)
|
||||
N_JOINTS = 24
|
||||
JOINT_DIM = 3
|
||||
SMPL_OBS_DIM = WINDOW * N_JOINTS * JOINT_DIM # 720
|
||||
|
||||
|
||||
def canonicalize_smpl_joints(smpl_joints: np.ndarray, root_aa: np.ndarray) -> np.ndarray:
|
||||
"""Remove per-frame root orientation -> SONIC ``smpl_joints_local`` format.
|
||||
|
||||
Args:
|
||||
smpl_joints: (T, 24, 3) world-frame (z-up) SMPL joint positions.
|
||||
root_aa: (T, 3) SMPL global-orient axis-angle (y-up convention).
|
||||
|
||||
Returns:
|
||||
(T, 24, 3) per-frame root-orientation-removed joints.
|
||||
"""
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
|
||||
rx90 = R.from_euler("x", 90, degrees=True) # smpl_root_ytoz_up
|
||||
base120 = R.from_quat([0.5, 0.5, 0.5, 0.5]) # remove_smpl_base_rot
|
||||
a = rx90 * R.from_rotvec(root_aa) # z-up root quat (left-mult)
|
||||
b_inv = base120 * a.inv() # inv(remove_smpl_base_rot(a))
|
||||
return np.einsum("tij,tkj->tki", b_inv.as_matrix(), smpl_joints).astype(np.float32)
|
||||
|
||||
|
||||
class SmplMotion:
|
||||
"""A single SMPL clip with SONIC-format windowing."""
|
||||
|
||||
def __init__(self, path: str, loop: bool = True, canonicalize: bool = True):
|
||||
data = np.load(path)
|
||||
smpl_joints = data["smpl_joints"].astype(np.float32) # (T, 24, 3)
|
||||
self.pose_aa = data["pose_aa"].astype(np.float32) if "pose_aa" in data.files else None
|
||||
self.transl = data["transl"].astype(np.float32) if "transl" in data.files else None
|
||||
self.fps = float(data["fps"]) if "fps" in data.files else 50.0
|
||||
self.loop = loop
|
||||
|
||||
if smpl_joints.ndim != 3 or smpl_joints.shape[1:] != (N_JOINTS, JOINT_DIM):
|
||||
raise ValueError(
|
||||
f"Expected smpl_joints (T, {N_JOINTS}, {JOINT_DIM}), got {smpl_joints.shape}"
|
||||
)
|
||||
|
||||
# Reference clips store world-frame joints; the encoder wants per-frame
|
||||
# root-orientation-removed joints. Canonicalize when we have the root pose.
|
||||
self.canonicalized = False
|
||||
if canonicalize and self.pose_aa is not None:
|
||||
smpl_joints = canonicalize_smpl_joints(smpl_joints, self.pose_aa[:, :3])
|
||||
self.canonicalized = True
|
||||
self.smpl_joints = smpl_joints
|
||||
|
||||
self.num_frames = self.smpl_joints.shape[0]
|
||||
self._cursor = 0
|
||||
|
||||
def window(self, start: int) -> np.ndarray:
|
||||
"""Return the 720-vector for the 10-frame window beginning at ``start``.
|
||||
|
||||
Frames are laid out oldest->newest, joint-major within a frame:
|
||||
[f0_j0_xyz, f0_j1_xyz, ..., f9_j23_xyz].
|
||||
"""
|
||||
idx = np.arange(start, start + WINDOW)
|
||||
if self.loop:
|
||||
idx = np.mod(idx, self.num_frames)
|
||||
else:
|
||||
idx = np.clip(idx, 0, self.num_frames - 1)
|
||||
return self.smpl_joints[idx].reshape(-1).astype(np.float32)
|
||||
|
||||
def reset(self):
|
||||
self._cursor = 0
|
||||
|
||||
def step(self) -> np.ndarray:
|
||||
"""Advance one frame and return the current 720-vector window."""
|
||||
w = self.window(self._cursor)
|
||||
self._cursor += 1
|
||||
if self.loop:
|
||||
self._cursor %= self.num_frames
|
||||
return w
|
||||
|
||||
@property
|
||||
def done(self) -> bool:
|
||||
return (not self.loop) and (self._cursor + WINDOW >= self.num_frames)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--motion", required=True, help="Path to motion .npz")
|
||||
parser.add_argument("--no-loop", action="store_true")
|
||||
parser.add_argument("--no-canon", action="store_true",
|
||||
help="Skip canonicalization (feed raw stored joints)")
|
||||
args = parser.parse_args()
|
||||
|
||||
m = SmplMotion(args.motion, loop=not args.no_loop, canonicalize=not args.no_canon)
|
||||
duration = m.num_frames / m.fps
|
||||
print(f"Loaded '{args.motion}'")
|
||||
print(f" frames={m.num_frames} fps={m.fps:.1f} duration={duration:.1f}s")
|
||||
print(f" smpl_joints={m.smpl_joints.shape} canonicalized={m.canonicalized} "
|
||||
f"pose_aa={None if m.pose_aa is None else m.pose_aa.shape} "
|
||||
f"transl={None if m.transl is None else m.transl.shape}")
|
||||
|
||||
# Sanity: after canonicalization the per-frame body heading should be fixed.
|
||||
j = m.smpl_joints
|
||||
v = (j[:, 2, :2] - j[:, 1, :2]) # R_hip - L_hip, horizontal
|
||||
a = np.arctan2(v[:, 1], v[:, 0])
|
||||
rlen = np.clip(np.hypot(np.cos(a).mean(), np.sin(a).mean()), 1e-9, 1.0)
|
||||
circ_std = np.degrees(np.sqrt(-2 * np.log(rlen)))
|
||||
print(f" hip-heading circ-std={circ_std:.1f} deg "
|
||||
f"(~0 => orientation removed; large => world-frame)")
|
||||
|
||||
w0 = m.window(0)
|
||||
print(f" window(0): shape={w0.shape} (expected {SMPL_OBS_DIM}) "
|
||||
f"min={w0.min():.3f} max={w0.max():.3f}")
|
||||
assert w0.shape == (SMPL_OBS_DIM,), "window must be 720-dim for obs[922:1642]"
|
||||
|
||||
# Simulate a few control ticks.
|
||||
print(" stepping 5 ticks:")
|
||||
for t in range(5):
|
||||
w = m.step()
|
||||
print(f" t={t} cursor={m._cursor} window_norm={np.linalg.norm(w):.2f}")
|
||||
|
||||
print("OK: motion loads and yields SONIC-format 720-vec windows.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Convert a GEAR-SONIC / BONES-SEED ``smpl_filtered`` clip (.pkl) to .npz.
|
||||
|
||||
The reference clips are zlib-compressed joblib pickles holding a dict with
|
||||
``pose_aa`` (T, 72), ``transl`` (T, 3), ``smpl_joints`` (T, 24, 3), ``fps``.
|
||||
``motion_loader.SmplMotion`` consumes the .npz form so the runtime needs no
|
||||
joblib dependency. Canonicalization (root-orientation removal) happens at load
|
||||
time in ``motion_loader``, so this converter just repackages the raw arrays.
|
||||
|
||||
Run this in an environment that has ``joblib`` (e.g. the sonic teleop venv):
|
||||
|
||||
python examples/unitree_g1/pkl_to_npz.py \
|
||||
--pkl sample_data/smpl_filtered/walk_forward_amateur_001__A001.pkl \
|
||||
--out examples/unitree_g1/motions/walk_forward.npz
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def load_pkl(path: str) -> dict:
|
||||
try:
|
||||
import joblib
|
||||
|
||||
return joblib.load(path)
|
||||
except Exception:
|
||||
# joblib clips are zlib-compressed pickles; fall back to manual inflate.
|
||||
import pickle
|
||||
import zlib
|
||||
|
||||
with open(path, "rb") as f:
|
||||
raw = f.read()
|
||||
try:
|
||||
raw = zlib.decompress(raw)
|
||||
except zlib.error:
|
||||
pass
|
||||
return pickle.loads(raw)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--pkl", required=True, help="Input smpl_filtered .pkl")
|
||||
parser.add_argument("--out", required=True, help="Output .npz path")
|
||||
args = parser.parse_args()
|
||||
|
||||
d = load_pkl(args.pkl)
|
||||
if not isinstance(d, dict) or "smpl_joints" not in d:
|
||||
raise ValueError(f"Unexpected pkl structure; keys={list(d) if isinstance(d, dict) else type(d)}")
|
||||
|
||||
smpl_joints = np.asarray(d["smpl_joints"], np.float32)
|
||||
if smpl_joints.ndim != 3 or smpl_joints.shape[1:] != (24, 3):
|
||||
raise ValueError(f"smpl_joints must be (T,24,3), got {smpl_joints.shape}")
|
||||
|
||||
out = {"smpl_joints": smpl_joints, "fps": np.float32(d.get("fps", 50.0))}
|
||||
if "pose_aa" in d:
|
||||
out["pose_aa"] = np.asarray(d["pose_aa"], np.float32)
|
||||
else:
|
||||
print("[warn] no pose_aa -> loader cannot canonicalize (will feed raw)")
|
||||
if "transl" in d:
|
||||
out["transl"] = np.asarray(d["transl"], np.float32)
|
||||
|
||||
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
|
||||
np.savez_compressed(args.out, **out)
|
||||
dur = smpl_joints.shape[0] / float(out["fps"])
|
||||
print(f"Wrote {args.out}")
|
||||
print(f" frames={smpl_joints.shape[0]} fps={float(out['fps']):.1f} duration={dur:.1f}s "
|
||||
f"keys={sorted(out)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user