push to specific repo

add push to hub
add aggregate
2026-05-12 07:09:43 +00:00 · 2025-12-02 18:35:16 +01:00 · 2025-12-02 18:30:11 +01:00 · 2025-12-02 18:27:50 +01:00 · 2025-12-02 18:11:20 +01:00 · 2025-12-02 17:26:44 +01:00
8 changed files with 837 additions and 947 deletions
@@ -0,0 +1,245 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Aggregate EgoDex shards into a single dataset.
+
+After distributed processing creates multiple shards, this script combines
+them into a single unified dataset.
+
+Reference: https://arxiv.org/abs/2505.11709, https://github.com/apple/ml-egodex
+"""
+
+import argparse
+import logging
+from pathlib import Path
+
+from datatrove.executor import LocalPipelineExecutor
+from datatrove.executor.slurm import SlurmPipelineExecutor
+from datatrove.pipeline.base import PipelineStep
+
+
+class AggregateEgoDexDatasets(PipelineStep):
+    """Datatrove pipeline step for aggregating EgoDex shards."""
+
+    def __init__(
+        self,
+        repo_ids: list[str],
+        aggregated_repo_id: str,
+        local_dir: Path | str | None = None,
+        push_to_hub: bool = False,
+        hf_repo_id: str | None = None,
+    ):
+        super().__init__()
+        self.repo_ids = repo_ids
+        self.aggr_repo_id = aggregated_repo_id
+        self.local_dir = Path(local_dir) if local_dir else None
+        self.push_to_hub = push_to_hub
+        self.hf_repo_id = hf_repo_id if hf_repo_id else aggregated_repo_id
+
+    def run(self, data=None, rank: int = 0, world_size: int = 1):
+        import logging
+
+        from lerobot.datasets.aggregate import aggregate_datasets
+        from lerobot.datasets.lerobot_dataset import LeRobotDataset
+        from lerobot.utils.utils import init_logging
+
+        init_logging()
+
+        # Only worker 0 performs aggregation (aggregate_datasets handles parallelism internally)
+        if rank == 0:
+            logging.info(f"Starting aggregation of {len(self.repo_ids)} shards into {self.aggr_repo_id}")
+
+            # Build roots list if local_dir is specified
+            roots = None
+            if self.local_dir:
+                roots = [self.local_dir / repo_id for repo_id in self.repo_ids]
+                # Filter to only existing directories
+                existing_roots = [r for r in roots if r.exists()]
+                if len(existing_roots) != len(self.repo_ids):
+                    logging.warning(
+                        f"Only {len(existing_roots)} of {len(self.repo_ids)} shard directories found. "
+                        "Missing shards will be skipped."
+                    )
+                # Update repo_ids to match existing roots
+                existing_repo_ids = [
+                    repo_id for repo_id, r in zip(self.repo_ids, roots, strict=False) if r.exists()
+                ]
+                roots = existing_roots
+                self.repo_ids = existing_repo_ids
+
+            if len(self.repo_ids) == 0:
+                logging.error("No shard directories found. Nothing to aggregate.")
+                return
+
+            aggr_root = self.local_dir / self.aggr_repo_id if self.local_dir else None
+
+            aggregate_datasets(
+                repo_ids=self.repo_ids,
+                aggr_repo_id=self.aggr_repo_id,
+                roots=roots,
+                aggr_root=aggr_root,
+            )
+            logging.info("Aggregation complete!")
+
+            # Push to Hugging Face Hub if requested
+            if self.push_to_hub:
+                logging.info(f"Pushing to Hugging Face Hub as {self.hf_repo_id}...")
+                dataset = LeRobotDataset(
+                    repo_id=self.aggr_repo_id,
+                    root=aggr_root,
+                )
+                # Update repo_id for pushing to different HF account if specified
+                dataset.repo_id = self.hf_repo_id
+                dataset.push_to_hub(
+                    tags=["egodex", "hand", "dexterous", "lerobot"],
+                    license="cc-by-nc-nd-4.0",
+                )
+                logging.info("Push to hub complete!")
+        else:
+            logging.info(f"Worker {rank} skipping - only worker 0 performs aggregation")
+
+
+def make_aggregate_executor(
+    repo_id,
+    num_shards,
+    job_name,
+    logs_dir,
+    partition,
+    cpus_per_task,
+    mem_per_cpu,
+    local_dir,
+    push_to_hub,
+    hf_repo_id,
+    slurm=True,
+):
+    """Create executor for aggregating EgoDex shards."""
+    # Generate repo IDs for all shards
+    repo_ids = [f"{repo_id}_world_{num_shards}_rank_{rank}" for rank in range(num_shards)]
+
+    kwargs = {
+        "pipeline": [
+            AggregateEgoDexDatasets(repo_ids, repo_id, local_dir, push_to_hub, hf_repo_id),
+        ],
+        "logging_dir": str(logs_dir / job_name),
+    }
+
+    if slurm:
+        kwargs.update(
+            {
+                "job_name": job_name,
+                "tasks": 1,  # Only need 1 task for aggregation
+                "workers": 1,  # Only need 1 worker
+                "time": "24:00:00",  # 24 hours for aggregation
+                "partition": partition,
+                "cpus_per_task": cpus_per_task,
+                "sbatch_args": {"mem-per-cpu": mem_per_cpu},
+            }
+        )
+        executor = SlurmPipelineExecutor(**kwargs)
+    else:
+        kwargs.update(
+            {
+                "tasks": 1,
+                "workers": 1,
+            }
+        )
+        executor = LocalPipelineExecutor(**kwargs)
+
+    return executor
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Aggregate EgoDex dataset shards into a single unified dataset."
+    )
+
+    parser.add_argument(
+        "--repo-id",
+        type=str,
+        required=True,
+        help="Repository identifier (base name without shard suffix, e.g., pepijn/egodex-test)",
+    )
+    parser.add_argument(
+        "--num-shards",
+        type=int,
+        required=True,
+        help="Number of shards to aggregate (must match --workers from slurm_port_egodex.py)",
+    )
+    parser.add_argument(
+        "--logs-dir",
+        type=Path,
+        default=Path("logs"),
+        help="Path to logs directory for datatrove",
+    )
+    parser.add_argument(
+        "--job-name",
+        type=str,
+        default="aggr_egodex",
+        help="Job name used in SLURM",
+    )
+    parser.add_argument(
+        "--slurm",
+        type=int,
+        default=1,
+        help="Launch over SLURM. Use --slurm 0 to launch locally (for debugging)",
+    )
+    parser.add_argument(
+        "--partition",
+        type=str,
+        help="SLURM partition (ideally CPU partition)",
+    )
+    parser.add_argument(
+        "--cpus-per-task",
+        type=int,
+        default=16,
+        help="Number of CPUs for aggregation task",
+    )
+    parser.add_argument(
+        "--mem-per-cpu",
+        type=str,
+        default="8G",
+        help="Memory per CPU for aggregation",
+    )
+    parser.add_argument(
+        "--local-dir",
+        type=Path,
+        default=None,
+        help="Local directory where shards are stored. If not specified, uses default HF cache.",
+    )
+    parser.add_argument(
+        "--push-to-hub",
+        action="store_true",
+        help="Push aggregated dataset to Hugging Face Hub after aggregation.",
+    )
+    parser.add_argument(
+        "--hf-repo-id",
+        type=str,
+        default=None,
+        help="Hugging Face repo ID for upload (e.g., username/dataset-name). Defaults to --repo-id.",
+    )
+
+    args = parser.parse_args()
+    kwargs = vars(args)
+    kwargs["slurm"] = kwargs.pop("slurm") == 1
+
+    aggregate_executor = make_aggregate_executor(**kwargs)
+    aggregate_executor.run()
+
+
+if __name__ == "__main__":
+    main()
+
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+# Download EgoDex dataset
+# Reference: https://arxiv.org/abs/2505.11709, https://github.com/apple/ml-egodex
+#
+# Usage: ./download_egodex.sh [output_dir] [parts...]
+#
+# Examples:
+#   ./download_egodex.sh ./data test           # Download test set only (16 GB)
+#   ./download_egodex.sh ./data part1 part2    # Download training parts 1 and 2
+#   ./download_egodex.sh ./data all            # Download everything (~1.7 TB)
+#
+# Available parts:
+#   test   - Test set (16 GB)
+#   part1  - Training set part 1 (300 GB)
+#   part2  - Training set part 2 (300 GB)
+#   part3  - Training set part 3 (300 GB)
+#   part4  - Training set part 4 (300 GB)
+#   part5  - Training set part 5 (300 GB)
+#   extra  - Additional data (200 GB)
+#   all    - Download all parts (~1.7 TB total)
+
+set -e
+
+BASE_URL="https://ml-site.cdn-apple.com/datasets/egodex"
+
+# Map part names to filenames
+declare -A PART_FILES=(
+    ["test"]="test.zip"
+    ["part1"]="part1.zip"
+    ["part2"]="part2.zip"
+    ["part3"]="part3.zip"
+    ["part4"]="part4.zip"
+    ["part5"]="part5.zip"
+    ["extra"]="extra.zip"
+)
+
+ALL_PARTS=("test" "part1" "part2" "part3" "part4" "part5" "extra")
+
+usage() {
+    echo "Usage: $0 <output_dir> <parts...>"
+    echo ""
+    echo "Examples:"
+    echo "  $0 ./data test           # Download test set only (16 GB)"
+    echo "  $0 ./data part1 part2    # Download training parts 1 and 2"
+    echo "  $0 ./data all            # Download everything (~1.7 TB)"
+    echo ""
+    echo "Available parts: test, part1, part2, part3, part4, part5, extra, all"
+    exit 1
+}
+
+download_part() {
+    local output_dir="$1"
+    local part="$2"
+    local filename="${PART_FILES[$part]}"
+    local url="${BASE_URL}/${filename}"
+    local output_file="${output_dir}/${filename}"
+
+    echo "----------------------------------------"
+    echo "Downloading: ${part} (${filename})"
+    echo "URL: ${url}"
+    echo "Output: ${output_file}"
+    echo "----------------------------------------"
+
+    # Download with curl, showing progress
+    curl -L --progress-bar "${url}" -o "${output_file}"
+
+    # Unzip
+    echo "Extracting ${filename}..."
+    unzip -q "${output_file}" -d "${output_dir}"
+
+    # Optionally remove zip file to save space
+    # Uncomment the next line if you want to delete zips after extraction
+    # rm "${output_file}"
+
+    echo "Done: ${part}"
+    echo ""
+}
+
+# Check arguments
+if [ $# -lt 2 ]; then
+    usage
+fi
+
+OUTPUT_DIR="$1"
+shift
+
+# Create output directory
+mkdir -p "${OUTPUT_DIR}"
+
+# Determine which parts to download
+PARTS_TO_DOWNLOAD=()
+
+for arg in "$@"; do
+    if [ "$arg" == "all" ]; then
+        PARTS_TO_DOWNLOAD=("${ALL_PARTS[@]}")
+        break
+    elif [ -n "${PART_FILES[$arg]}" ]; then
+        PARTS_TO_DOWNLOAD+=("$arg")
+    else
+        echo "Error: Unknown part '${arg}'"
+        echo "Available parts: test, part1, part2, part3, part4, part5, extra, all"
+        exit 1
+    fi
+done
+
+if [ ${#PARTS_TO_DOWNLOAD[@]} -eq 0 ]; then
+    echo "Error: No valid parts specified"
+    usage
+fi
+
+echo "========================================"
+echo "EgoDex Dataset Download"
+echo "========================================"
+echo "Output directory: ${OUTPUT_DIR}"
+echo "Parts to download: ${PARTS_TO_DOWNLOAD[*]}"
+echo "========================================"
+echo ""
+
+# Download each part
+for part in "${PARTS_TO_DOWNLOAD[@]}"; do
+    download_part "${OUTPUT_DIR}" "${part}"
+done
+
+echo "========================================"
+echo "Download complete!"
+echo "Data saved to: ${OUTPUT_DIR}"
+echo "========================================"
+
@@ -0,0 +1,443 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Distributed EgoDex dataset porting using SLURM and datatrove.
+
+EgoDex is a large-scale dataset for egocentric dexterous manipulation collected
+with ARKit on Apple Vision Pro. This script converts EgoDex data to LeRobot format.
+
+Reference: https://arxiv.org/abs/2505.11709, https://github.com/apple/ml-egodex 
+"""
+
+import argparse
+from pathlib import Path
+
+import cv2
+import h5py
+import mediapy as mpy
+import numpy as np
+from datatrove.executor import LocalPipelineExecutor
+from datatrove.executor.slurm import SlurmPipelineExecutor
+from datatrove.pipeline.base import PipelineStep
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+# Image dimensions
+DEFAULT_IMAGE_HEIGHT = 1080
+DEFAULT_IMAGE_WIDTH = 1920
+
+class PortEgoDexShards(PipelineStep):
+    def __init__(
+        self,
+        raw_dir: Path | str,
+        repo_id: str,
+        local_dir: Path | str = None,
+        percentage: float = 100.0,
+    ):
+        super().__init__()
+        self.raw_dir = Path(raw_dir)
+        self.repo_id = repo_id
+        self.local_dir = Path(local_dir) if local_dir else Path("data/local_datasets")
+        self.percentage = percentage
+
+    def run(self, data=None, rank: int = 0, world_size: int = 1):
+        from pathlib import Path
+
+        import cv2
+        import h5py
+        import mediapy as mpy
+        import numpy as np
+
+        from lerobot.datasets.lerobot_dataset import LeRobotDataset
+        from lerobot.utils.utils import init_logging
+
+        def _get_state_for_single_frame(transforms_group, frame_idx):
+            """
+            Construct 48D hand state representation from EgoDex.
+            
+            State vector composition (per hand = 24D, total = 48D):
+            - Wrist 3D position (3)
+            - Wrist orientation in 6D representation (6)
+            - 5 fingertip 3D positions (15)
+            """
+            state_vector = []
+            fingertip_joints = {
+                "left": [
+                    "leftThumbTip",
+                    "leftIndexFingerTip",
+                    "leftMiddleFingerTip",
+                    "leftRingFingerTip",
+                    "leftLittleFingerTip",
+                ],
+                "right": [
+                    "rightThumbTip",
+                    "rightIndexFingerTip",
+                    "rightMiddleFingerTip",
+                    "rightRingFingerTip",
+                    "rightLittleFingerTip",
+                ],
+            }
+
+            for hand_side in ["left", "right"]:
+                hand_key = f"{hand_side}Hand"
+                hand_transform = transforms_group[hand_key][frame_idx]
+
+                # 1. Wrist 3D position
+                hand_position = hand_transform[:3, 3]
+                state_vector.extend(hand_position)
+
+                # 2. Wrist orientation in compact 6D representation
+                rotation_matrix = hand_transform[:3, :3]
+                rotation_6d = np.concatenate([rotation_matrix[:, 0], rotation_matrix[:, 1]])
+                state_vector.extend(rotation_6d)
+
+                # 3. 3D positions of 5 fingertips
+                for fingertip in fingertip_joints[hand_side]:
+                    fingertip_transform = transforms_group[fingertip][frame_idx]
+                    fingertip_pos = fingertip_transform[:3, 3]
+                    state_vector.extend(fingertip_pos)
+
+            # Also return camera extrinsics for optional coordinate frame transformations
+            return np.array(state_vector, dtype=np.float32), transforms_group["camera"][frame_idx]
+
+        def get_state_and_action_from_egodex_annotations(demo):
+            """
+            Convert EgoDex demo annotations into states and actions.
+            
+            The "action" is the state at time t+1 (next-pose prediction).
+            """
+            transforms_group = demo["transforms"]
+            total_frames = list(transforms_group.values())[0].shape[0]
+
+            states_list, extrinsics_list = [], []
+            for frame_idx in range(total_frames):
+                state_vector, extrinsics = _get_state_for_single_frame(transforms_group, frame_idx)
+                states_list.append(state_vector)
+                extrinsics_list.append(extrinsics.flatten())  # Flatten 4x4 to 16D
+
+            state = np.array(states_list, dtype=np.float32)
+            extrinsics = np.array(extrinsics_list, dtype=np.float32)
+
+            # Shift by 1 timestep to convert state to action
+            action = np.roll(state, -1, axis=0)
+
+            return state, action, extrinsics
+
+        def process_demo(hdf5_file_path, video_path):
+            """Process a single EgoDex demo and return frames for LeRobot."""
+            video = mpy.read_video(str(video_path))
+            video = np.asarray(video)
+            num_frames = video.shape[0]
+            frames = []
+
+            with h5py.File(hdf5_file_path, "r") as demo:
+                state, action, extrinsics = get_state_and_action_from_egodex_annotations(demo)
+
+                # Get natural language task description
+                if demo.attrs.get("llm_type") == "reversible":
+                    direction = demo.attrs.get("which_llm_description", "1")
+                    lang_instruction = demo.attrs.get(
+                        "llm_description" if direction == "1" else "llm_description2",
+                        "manipulation task",
+                    )
+                else:
+                    lang_instruction = demo.attrs.get("llm_description", "manipulation task")
+
+                for step_idx in range(num_frames):
+                    # Resize image to default dimensions
+                    image_resized = cv2.resize(
+                        video[step_idx],
+                        (DEFAULT_IMAGE_WIDTH, DEFAULT_IMAGE_HEIGHT),
+                        interpolation=cv2.INTER_AREA,
+                    )
+                    frame = {
+                        "task": lang_instruction,
+                        "observation.image": image_resized,
+                        "observation.state": state[step_idx],
+                        "observation.extrinsics": extrinsics[step_idx],
+                        "action": action[step_idx],
+                    }
+                    frames.append(frame)
+
+            return frames
+
+        init_logging()
+
+        # Define EgoDex features
+        EGODEX_FEATURES = {
+            "observation.image": {
+                "dtype": "video",
+                "shape": (DEFAULT_IMAGE_HEIGHT, DEFAULT_IMAGE_WIDTH, 3),
+                "names": ["height", "width", "rgb"],
+            },
+            "observation.state": {
+                "dtype": "float32",
+                "shape": (48,),
+                "names": [
+                    # Left hand wrist position (3)
+                    "left_wrist_x",
+                    "left_wrist_y",
+                    "left_wrist_z",
+                    # Left hand wrist rotation 6D (6)
+                    "left_rot_0",
+                    "left_rot_1",
+                    "left_rot_2",
+                    "left_rot_3",
+                    "left_rot_4",
+                    "left_rot_5",
+                    # Left fingertips (15)
+                    "left_thumb_x",
+                    "left_thumb_y",
+                    "left_thumb_z",
+                    "left_index_x",
+                    "left_index_y",
+                    "left_index_z",
+                    "left_middle_x",
+                    "left_middle_y",
+                    "left_middle_z",
+                    "left_ring_x",
+                    "left_ring_y",
+                    "left_ring_z",
+                    "left_little_x",
+                    "left_little_y",
+                    "left_little_z",
+                    # Right hand wrist position (3)
+                    "right_wrist_x",
+                    "right_wrist_y",
+                    "right_wrist_z",
+                    # Right hand wrist rotation 6D (6)
+                    "right_rot_0",
+                    "right_rot_1",
+                    "right_rot_2",
+                    "right_rot_3",
+                    "right_rot_4",
+                    "right_rot_5",
+                    # Right fingertips (15)
+                    "right_thumb_x",
+                    "right_thumb_y",
+                    "right_thumb_z",
+                    "right_index_x",
+                    "right_index_y",
+                    "right_index_z",
+                    "right_middle_x",
+                    "right_middle_y",
+                    "right_middle_z",
+                    "right_ring_x",
+                    "right_ring_y",
+                    "right_ring_z",
+                    "right_little_x",
+                    "right_little_y",
+                    "right_little_z",
+                ],
+            },
+            "observation.extrinsics": {
+                "dtype": "float32",
+                "shape": (16,),
+                "names": [f"extrinsic_{i}" for i in range(16)],
+            },
+            "action": {
+                "dtype": "float32",
+                "shape": (48,),
+                "names": [f"action_{i}" for i in range(48)],
+            },
+        }
+
+        # 1. Discover all HDF5 files
+        files = sorted(list(self.raw_dir.rglob("*.hdf5")))
+        if not files:
+            print(f"No HDF5 files found in {self.raw_dir}")
+            return
+
+        # 2. Apply percentage filter
+        if self.percentage < 100:
+            num_files = max(1, int(len(files) * self.percentage / 100))
+            files = files[:num_files]
+            print(f"Processing {self.percentage}% of dataset: {num_files} files")
+
+        # 3. Assign files to this worker
+        my_files = files[rank::world_size]
+        if not my_files:
+            print(f"Rank {rank} has no files to process.")
+            return
+
+        print(f"Rank {rank} processing {len(my_files)} files out of {len(files)} total.")
+
+        # 4. Create a LeRobot dataset for this shard
+        shard_repo_id = f"{self.repo_id}_world_{world_size}_rank_{rank}"
+        shard_root = self.local_dir / shard_repo_id if self.local_dir else None
+
+        dataset = LeRobotDataset.create(
+            repo_id=shard_repo_id,
+            fps=30,
+            robot_type="hand",
+            features=EGODEX_FEATURES,
+            root=shard_root,
+        )
+
+        # 5. Process each file
+        for input_h5 in my_files:
+            try:
+                # Derive corresponding video path
+                video_path = input_h5.with_suffix(".mp4")
+                if not video_path.exists():
+                    print(f"Warning: Video file not found for {input_h5}, skipping.")
+                    continue
+
+                # Process demo and add frames
+                frames = process_demo(input_h5, video_path)
+                for frame in frames:
+                    dataset.add_frame(frame)
+                dataset.save_episode()
+
+                # Clean up to avoid OOM
+                del frames
+
+            except Exception as e:
+                print(f"Error processing {input_h5}: {e}")
+                continue
+
+        # 6. Finalize the dataset
+        dataset.finalize()
+
+
+def make_port_executor(
+    raw_dir,
+    repo_id,
+    job_name,
+    logs_dir,
+    workers,
+    partition,
+    cpus_per_task,
+    mem_per_cpu,
+    local_dir,
+    percentage,
+    slurm=True,
+):
+    kwargs = {
+        "pipeline": [
+            PortEgoDexShards(raw_dir, repo_id, local_dir, percentage),
+        ],
+        "logging_dir": str(logs_dir / job_name),
+    }
+
+    if slurm:
+        kwargs.update(
+            {
+                "job_name": job_name,
+                "tasks": workers,
+                "workers": workers,
+                "time": "10:00:00",  # EgoDex is large, allow more time
+                "partition": partition,
+                "cpus_per_task": cpus_per_task,
+                "sbatch_args": {"mem-per-cpu": mem_per_cpu},
+            }
+        )
+        executor = SlurmPipelineExecutor(**kwargs)
+    else:
+        kwargs.update(
+            {
+                "tasks": workers,
+                "workers": 1,  # Run locally sequentially for debugging
+            }
+        )
+        executor = LocalPipelineExecutor(**kwargs)
+
+    return executor
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert EgoDex dataset to LeRobot format using SLURM."
+    )
+
+    parser.add_argument(
+        "--raw-dir",
+        type=Path,
+        required=True,
+        help="Directory containing input EgoDex data (HDF5 + MP4 files).",
+    )
+    parser.add_argument(
+        "--repo-id",
+        type=str,
+        required=True,
+        help="Repository identifier (e.g., user/egodex-lerobot).",
+    )
+    parser.add_argument(
+        "--logs-dir",
+        type=Path,
+        default=Path("logs"),
+        help="Path to logs directory.",
+    )
+    parser.add_argument(
+        "--job-name",
+        type=str,
+        default="port_egodex",
+        help="Job name used in SLURM.",
+    )
+    parser.add_argument(
+        "--slurm",
+        type=int,
+        default=1,
+        help="Launch over SLURM. Use --slurm 0 to launch sequentially (useful for debugging).",
+    )
+    parser.add_argument(
+        "--workers",
+        type=int,
+        default=50,
+        help="Number of SLURM workers.",
+    )
+    parser.add_argument(
+        "--partition",
+        type=str,
+        help="SLURM partition.",
+    )
+    parser.add_argument(
+        "--cpus-per-task",
+        type=int,
+        default=4,
+        help="Number of CPUs per worker.",
+    )
+    parser.add_argument(
+        "--mem-per-cpu",
+        type=str,
+        default="4G",
+        help="Memory per CPU.",
+    )
+    parser.add_argument(
+        "--percentage",
+        type=float,
+        default=100.0,
+        help="Percentage of dataset to process (e.g., 1.0 for 1%%). Useful for testing.",
+    )
+    parser.add_argument(
+        "--local-dir",
+        type=Path,
+        default=None,
+        help="Local directory to save the LeRobot dataset. Defaults to data/local_datasets.",
+    )
+
+    args = parser.parse_args()
+    kwargs = vars(args)
+    kwargs["slurm"] = kwargs.pop("slurm") == 1
+
+    port_executor = make_port_executor(**kwargs)
+    port_executor.run()
+
+
+if __name__ == "__main__":
+    main()
+
@@ -1,479 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example: Holosoma Whole-Body Locomotion (23-DOF and 29-DOF)
-
-This example demonstrates loading Holosoma whole-body locomotion policies
-and running them on the Unitree G1 robot.
-
-Supports both:
- 23-DOF native policies (82D observations, 23D actions)
- 29-DOF policies (100D observations, 29D actions)
-"""
-
-import argparse
-import logging
-import threading
-import time
-
-import numpy as np
-import onnxruntime as ort
-from huggingface_hub import hf_hub_download
-
-from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
-from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# =============================================================================
-# 29-DOF Configuration
-# =============================================================================
-# fmt: off
-HOLOSOMA_29DOF_DEFAULT_ANGLES = np.array([
-    -0.312, 0.0, 0.0, 0.669, -0.363, 0.0,  # left leg
-    -0.312, 0.0, 0.0, 0.669, -0.363, 0.0,  # right leg
-    0.0, 0.0, 0.0,                          # waist (yaw, roll, pitch)
-    0.2, 0.2, 0.0, 0.6, 0.0, 0.0, 0.0,     # left arm
-    0.2, -0.2, 0.0, 0.6, 0.0, 0.0, 0.0,    # right arm
-], dtype=np.float32)
-
-HOLOSOMA_29DOF_KP = np.array([
-    40.179238471, 99.098427777, 40.179238471, 99.098427777, 28.501246196, 28.501246196,  # left leg
-    40.179238471, 99.098427777, 40.179238471, 99.098427777, 28.501246196, 28.501246196,  # right leg
-    40.179238471, 28.501246196, 28.501246196,  # waist
-    14.250623098, 14.250623098, 14.250623098, 14.250623098, 14.250623098, 16.778327481, 16.778327481,  # left arm
-    14.250623098, 14.250623098, 14.250623098, 14.250623098, 14.250623098, 16.778327481, 16.778327481,  # right arm
-], dtype=np.float32)
-
-HOLOSOMA_29DOF_KD = np.array([
-    2.557889765, 6.308801854, 2.557889765, 6.308801854, 1.814445687, 1.814445687,  # left leg
-    2.557889765, 6.308801854, 2.557889765, 6.308801854, 1.814445687, 1.814445687,  # right leg
-    2.557889765, 1.814445687, 1.814445687,  # waist
-    0.907222843, 0.907222843, 0.907222843, 0.907222843, 0.907222843, 1.068141502, 1.068141502,  # left arm
-    0.907222843, 0.907222843, 0.907222843, 0.907222843, 0.907222843, 1.068141502, 1.068141502,  # right arm
-], dtype=np.float32)
-
-# =============================================================================
-# 23-DOF Configuration (native G1-23: no waist_roll/pitch, no wrist_pitch/yaw)
-# Derived from 29-DOF Holosoma values
-# =============================================================================
-# Joint order: 6 left leg, 6 right leg, 1 waist_yaw, 5 left arm, 5 right arm
-HOLOSOMA_23DOF_DEFAULT_ANGLES = np.array([
-    -0.312, 0.0, 0.0, 0.669, -0.363, 0.0,  # left leg (from 29-DOF)
-    -0.312, 0.0, 0.0, 0.669, -0.363, 0.0,  # right leg (from 29-DOF)
-    0.0,                                    # waist_yaw only (from 29-DOF)
-    0.2, 0.2, 0.0, 0.6, 0.0,               # left arm first 5 joints (from 29-DOF)
-    0.2, -0.2, 0.0, 0.6, 0.0,              # right arm first 5 joints (from 29-DOF)
-], dtype=np.float32)
-
-HOLOSOMA_23DOF_KP = np.array([
-    40.179238471, 99.098427777, 40.179238471, 99.098427777, 28.501246196, 28.501246196,  # left leg
-    40.179238471, 99.098427777, 40.179238471, 99.098427777, 28.501246196, 28.501246196,  # right leg
-    40.179238471,                                                                         # waist_yaw
-    14.250623098, 14.250623098, 14.250623098, 14.250623098, 14.250623098,                 # left arm
-    14.250623098, 14.250623098, 14.250623098, 14.250623098, 14.250623098,                 # right arm
-], dtype=np.float32)
-
-HOLOSOMA_23DOF_KD = np.array([
-    2.557889765, 6.308801854, 2.557889765, 6.308801854, 1.814445687, 1.814445687,  # left leg
-    2.557889765, 6.308801854, 2.557889765, 6.308801854, 1.814445687, 1.814445687,  # right leg
-    2.557889765,                                                                    # waist_yaw
-    0.907222843, 0.907222843, 0.907222843, 0.907222843, 0.907222843,               # left arm
-    0.907222843, 0.907222843, 0.907222843, 0.907222843, 0.907222843,               # right arm
-], dtype=np.float32)
-
-# Maps 23-DOF policy index → 29-DOF motor index
-# 23-DOF: legs(0-11), waist_yaw(12), L_arm(13-17), R_arm(18-22)
-# 29-DOF: legs(0-11), waist(12-14), L_arm(15-21), R_arm(22-28)
-DOF_23_TO_MOTOR_MAP = [
-    0, 1, 2, 3, 4, 5,       # left leg → motor 0-5
-    6, 7, 8, 9, 10, 11,     # right leg → motor 6-11
-    12,                      # waist_yaw → motor 12
-    15, 16, 17, 18, 19,     # left arm (skip wrist_pitch/yaw) → motor 15-19
-    22, 23, 24, 25, 26,     # right arm (skip wrist_pitch/yaw) → motor 22-26
-]
-# fmt: on
-
-# Control parameters
-LOCOMOTION_CONTROL_DT = 0.02  # 50Hz
-LOCOMOTION_ACTION_SCALE = 0.25
-ANG_VEL_SCALE = 0.25
-DOF_POS_SCALE = 1.0
-DOF_VEL_SCALE = 0.05
-GAIT_PERIOD = 1.0
-
-DEFAULT_HOLOSOMA_REPO_ID = "nepyope/holosoma_locomotion"
-
-
-def load_holosoma_policy(
-    repo_id: str = DEFAULT_HOLOSOMA_REPO_ID,
-    policy_name: str = "fastsac",
-    local_path: str | None = None,
-) -> tuple[ort.InferenceSession, int]:
-    """Load Holosoma policy and detect observation dimension.
-
-    Returns:
-        (policy, obs_dim) tuple where obs_dim is 82 (23-DOF) or 100 (29-DOF)
-    """
-    if local_path is not None:
-        logger.info(f"Loading policy from local path: {local_path}")
-        policy_path = local_path
-    else:
-        logger.info(f"Loading policy from Hugging Face Hub: {repo_id}")
-        policy_path = hf_hub_download(repo_id=repo_id, filename=f"{policy_name}_g1_29dof.onnx")
-
-    policy = ort.InferenceSession(policy_path)
-
-    # Detect observation dimension from model input shape
-    input_shape = policy.get_inputs()[0].shape
-    obs_dim = input_shape[1] if len(input_shape) > 1 else input_shape[0]
-
-    logger.info(f"Policy loaded successfully")
-    logger.info(f"  Input: {policy.get_inputs()[0].name}, shape: {input_shape} → obs_dim={obs_dim}")
-    logger.info(f"  Output: {policy.get_outputs()[0].name}, shape: {policy.get_outputs()[0].shape}")
-
-    return policy, obs_dim
-
-
-class HolosomaLocomotionController:
-    """
-    Handles Holosoma whole-body locomotion for Unitree G1.
-    Supports both 23-DOF (82D obs) and 29-DOF (100D obs) policies.
-    """
-
-    def __init__(self, policy, robot, config, obs_dim: int = 100):
-        self.policy = policy
-        self.robot = robot
-        self.config = config
-        self.obs_dim = obs_dim
-
-        # Detect policy type from observation dimension
-        self.is_23dof = (obs_dim == 82)
-        self.num_dof = 23 if self.is_23dof else 29
-
-        # Velocity commands
-        self.locomotion_cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32)
-
-        # State variables sized for policy type
-        self.qj = np.zeros(self.num_dof, dtype=np.float32)
-        self.dqj = np.zeros(self.num_dof, dtype=np.float32)
-        self.locomotion_action = np.zeros(self.num_dof, dtype=np.float32)
-        self.locomotion_obs = np.zeros(obs_dim, dtype=np.float32)
-        self.last_unscaled_action = np.zeros(self.num_dof, dtype=np.float32)
-
-        # Select config based on DOF
-        if self.is_23dof:
-            self.default_angles = HOLOSOMA_23DOF_DEFAULT_ANGLES
-            self.kp = HOLOSOMA_23DOF_KP
-            self.kd = HOLOSOMA_23DOF_KD
-            self.motor_map = DOF_23_TO_MOTOR_MAP
-        else:
-            self.default_angles = HOLOSOMA_29DOF_DEFAULT_ANGLES
-            self.kp = HOLOSOMA_29DOF_KP
-            self.kd = HOLOSOMA_29DOF_KD
-            self.motor_map = list(range(29))  # Identity map for 29-DOF
-
-        # Phase state for gait
-        self.phase = np.zeros((1, 2), dtype=np.float32)
-        self.phase[0, 0] = 0.0
-        self.phase[0, 1] = np.pi
-        self.phase_dt = 2 * np.pi / (50.0 * GAIT_PERIOD)
-        self.is_standing = False
-
-        self.counter = 0
-        self.locomotion_running = False
-        self.locomotion_thread = None
-
-        logger.info(f"HolosomaLocomotionController initialized")
-        logger.info(f"  Mode: {'23-DOF (82D obs)' if self.is_23dof else '29-DOF (100D obs)'}")
-        logger.info(f"  Action dim: {self.num_dof}")
-
-    def holosoma_locomotion_run(self):
-        """Main locomotion loop - handles both 23-DOF and 29-DOF."""
-        self.counter += 1
-
-        if self.counter == 1:
-            print("\n" + "=" * 60)
-            print(f"🚀 RUNNING HOLOSOMA {self.num_dof}-DOF LOCOMOTION POLICY")
-            print(f"   {self.obs_dim}D observations → {self.num_dof}D actions")
-            print("=" * 60 + "\n")
-
-        robot_state = self.robot.get_observation()
-        if robot_state is None:
-            return
-
-        # Remote controller
-        if robot_state.wireless_remote is not None:
-            self.robot.remote_controller.set(robot_state.wireless_remote)
-        else:
-            self.robot.remote_controller.lx = 0.0
-            self.robot.remote_controller.ly = 0.0
-            self.robot.remote_controller.rx = 0.0
-            self.robot.remote_controller.ry = 0.0
-
-        # Deadzone
-        ly = self.robot.remote_controller.ly if abs(self.robot.remote_controller.ly) > 0.1 else 0.0
-        lx = self.robot.remote_controller.lx if abs(self.robot.remote_controller.lx) > 0.1 else 0.0
-        rx = self.robot.remote_controller.rx if abs(self.robot.remote_controller.rx) > 0.1 else 0.0
-
-        self.locomotion_cmd[0] = ly
-        self.locomotion_cmd[1] = -lx
-        self.locomotion_cmd[2] = -rx
-
-        # Read joint states using motor map
-        for i in range(self.num_dof):
-            motor_idx = self.motor_map[i]
-            self.qj[i] = robot_state.motor_state[motor_idx].q
-            self.dqj[i] = robot_state.motor_state[motor_idx].dq
-
-        # IMU
-        quat = robot_state.imu_state.quaternion
-        ang_vel = np.array(robot_state.imu_state.gyroscope, dtype=np.float32)
-        gravity_orientation = self.robot.get_gravity_orientation(quat)
-
-        # Scale observations
-        qj_obs = (self.qj - self.default_angles) * DOF_POS_SCALE
-        dqj_obs = self.dqj * DOF_VEL_SCALE
-        ang_vel_scaled = ang_vel * ANG_VEL_SCALE
-
-        # Phase update
-        cmd_norm = np.linalg.norm(self.locomotion_cmd[:2])
-        ang_cmd_norm = np.abs(self.locomotion_cmd[2])
-
-        if cmd_norm < 0.01 and ang_cmd_norm < 0.01:
-            self.phase[0, :] = np.pi * np.ones(2)
-            self.is_standing = True
-        elif self.is_standing:
-            self.phase = np.array([[0.0, np.pi]], dtype=np.float32)
-            self.is_standing = False
-        else:
-            phase_tp1 = self.phase + self.phase_dt
-            self.phase = np.fmod(phase_tp1 + np.pi, 2 * np.pi) - np.pi
-
-        sin_phase = np.sin(self.phase[0, :])
-        cos_phase = np.cos(self.phase[0, :])
-
-        # Build observation (format depends on DOF)
-        if self.is_23dof:
-            # 82D: [23 actions, 3 ang_vel, 1 cmd_yaw, 2 cmd_lin, 2 cos, 23 pos, 23 vel, 3 grav, 2 sin]
-            self.locomotion_obs[0:23] = self.last_unscaled_action
-            self.locomotion_obs[23:26] = ang_vel_scaled
-            self.locomotion_obs[26] = self.locomotion_cmd[2]
-            self.locomotion_obs[27:29] = self.locomotion_cmd[:2]
-            self.locomotion_obs[29:31] = cos_phase
-            self.locomotion_obs[31:54] = qj_obs
-            self.locomotion_obs[54:77] = dqj_obs
-            self.locomotion_obs[77:80] = gravity_orientation
-            self.locomotion_obs[80:82] = sin_phase
-        else:
-            # 100D: [29 actions, 3 ang_vel, 1 cmd_yaw, 2 cmd_lin, 2 cos, 29 pos, 29 vel, 3 grav, 2 sin]
-            self.locomotion_obs[0:29] = self.last_unscaled_action
-            self.locomotion_obs[29:32] = ang_vel_scaled
-            self.locomotion_obs[32] = self.locomotion_cmd[2]
-            self.locomotion_obs[33:35] = self.locomotion_cmd[:2]
-            self.locomotion_obs[35:37] = cos_phase
-            self.locomotion_obs[37:66] = qj_obs
-            self.locomotion_obs[66:95] = dqj_obs
-            self.locomotion_obs[95:98] = gravity_orientation
-            self.locomotion_obs[98:100] = sin_phase
-
-        # Policy inference
-        obs_input = self.locomotion_obs.reshape(1, -1).astype(np.float32)
-        ort_inputs = {self.policy.get_inputs()[0].name: obs_input}
-        ort_outs = self.policy.run(None, ort_inputs)
-
-        raw_action = ort_outs[0].squeeze()
-        clipped_action = np.clip(raw_action, -100.0, 100.0)
-
-        self.last_unscaled_action = clipped_action.copy()
-        self.locomotion_action = clipped_action * LOCOMOTION_ACTION_SCALE
-
-        # Debug
-        if self.counter <= 3:
-            print(f"\n[Holosoma Debug #{self.counter}]")
-            print(f"  Phase: ({self.phase[0, 0]:.3f}, {self.phase[0, 1]:.3f})")
-            print(f"  Cmd: ({self.locomotion_cmd[0]:.2f}, {self.locomotion_cmd[1]:.2f}, {self.locomotion_cmd[2]:.2f})")
-            print(f"  Action range: [{raw_action.min():.3f}, {raw_action.max():.3f}]")
-
-        # Compute target positions
-        target_dof_pos = self.default_angles + self.locomotion_action
-
-        # Send commands to motors via motor map
-        for i in range(self.num_dof):
-            motor_idx = self.motor_map[i]
-            self.robot.msg.motor_cmd[motor_idx].q = target_dof_pos[i]
-            self.robot.msg.motor_cmd[motor_idx].qd = 0
-            self.robot.msg.motor_cmd[motor_idx].kp = self.kp[i]
-            self.robot.msg.motor_cmd[motor_idx].kd = self.kd[i]
-            self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-        # For 23-DOF: zero out missing joints (waist_roll/pitch, wrist_pitch/yaw)
-        if self.is_23dof:
-            missing_motors = [13, 14, 20, 21, 27, 28]  # waist_roll, waist_pitch, wrist_pitch/yaw
-            for motor_idx in missing_motors:
-                self.robot.msg.motor_cmd[motor_idx].q = 0.0
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = 40.0
-                self.robot.msg.motor_cmd[motor_idx].kd = 2.0
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-        self.robot.send_action(self.robot.msg)
-
-    def _locomotion_thread_loop(self):
-        logger.info("Locomotion thread started")
-        while self.locomotion_running:
-            start_time = time.time()
-            try:
-                self.holosoma_locomotion_run()
-            except Exception as e:
-                logger.error(f"Error in locomotion loop: {e}")
-                import traceback
-                traceback.print_exc()
-
-            elapsed = time.time() - start_time
-            sleep_time = max(0, LOCOMOTION_CONTROL_DT - elapsed)
-            time.sleep(sleep_time)
-        logger.info("Locomotion thread stopped")
-
-    def start_locomotion_thread(self):
-        if self.locomotion_running:
-            logger.warning("Locomotion thread already running")
-            return
-        logger.info("Starting locomotion control thread...")
-        self.locomotion_running = True
-        self.locomotion_thread = threading.Thread(target=self._locomotion_thread_loop, daemon=True)
-        self.locomotion_thread.start()
-        logger.info("Locomotion control thread started!")
-
-    def stop_locomotion_thread(self):
-        if not self.locomotion_running:
-            return
-        logger.info("Stopping locomotion control thread...")
-        self.locomotion_running = False
-        if self.locomotion_thread:
-            self.locomotion_thread.join(timeout=2.0)
-        logger.info("Locomotion control thread stopped")
-
-    def reset_robot(self):
-        """Move joints to default position."""
-        logger.info(f"Moving {self.num_dof} joints to default position...")
-
-        total_time = 3.0
-        num_step = int(total_time / self.robot.control_dt)
-
-        robot_state = self.robot.get_observation()
-
-        # Record current positions
-        init_dof_pos = np.zeros(self.num_dof, dtype=np.float32)
-        for i in range(self.num_dof):
-            motor_idx = self.motor_map[i]
-            init_dof_pos[i] = robot_state.motor_state[motor_idx].q
-
-        # Interpolate to target
-        for step in range(num_step):
-            alpha = step / num_step
-            for i in range(self.num_dof):
-                motor_idx = self.motor_map[i]
-                target = self.default_angles[i]
-                self.robot.msg.motor_cmd[motor_idx].q = init_dof_pos[i] * (1 - alpha) + target * alpha
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = self.kp[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = self.kd[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            # Zero missing joints for 23-DOF
-            if self.is_23dof:
-                for motor_idx in [13, 14, 20, 21, 27, 28]:
-                    self.robot.msg.motor_cmd[motor_idx].q = 0.0
-                    self.robot.msg.motor_cmd[motor_idx].qd = 0
-                    self.robot.msg.motor_cmd[motor_idx].kp = 40.0
-                    self.robot.msg.motor_cmd[motor_idx].kd = 2.0
-                    self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            self.robot.msg.crc = self.robot.crc.Crc(self.robot.msg)
-            self.robot.lowcmd_publisher.Write(self.robot.msg)
-            time.sleep(self.robot.control_dt)
-
-        logger.info(f"Reached default position ({self.num_dof} joints)")
-
-        # Hold for 2 seconds
-        logger.info("Holding default position for 2 seconds...")
-        hold_steps = int(2.0 / self.robot.control_dt)
-        for _ in range(hold_steps):
-            for i in range(self.num_dof):
-                motor_idx = self.motor_map[i]
-                self.robot.msg.motor_cmd[motor_idx].q = self.default_angles[i]
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = self.kp[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = self.kd[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            if self.is_23dof:
-                for motor_idx in [13, 14, 20, 21, 27, 28]:
-                    self.robot.msg.motor_cmd[motor_idx].q = 0.0
-                    self.robot.msg.motor_cmd[motor_idx].qd = 0
-                    self.robot.msg.motor_cmd[motor_idx].kp = 40.0
-                    self.robot.msg.motor_cmd[motor_idx].kd = 2.0
-                    self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            self.robot.msg.crc = self.robot.crc.Crc(self.robot.msg)
-            self.robot.lowcmd_publisher.Write(self.robot.msg)
-            time.sleep(self.robot.control_dt)
-
-        logger.info("Ready to start locomotion!")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Holosoma Locomotion Controller for Unitree G1")
-    parser.add_argument("--repo-id", type=str, default=DEFAULT_HOLOSOMA_REPO_ID)
-    parser.add_argument("--policy", type=str, default="fastsac", choices=["fastsac", "ppo"])
-    parser.add_argument("--local-path", type=str, default=None, help="Path to local ONNX file")
-    args = parser.parse_args()
-
-    # Load policy and detect dimensions
-    policy, obs_dim = load_holosoma_policy(
-        repo_id=args.repo_id,
-        policy_name=args.policy,
-        local_path=args.local_path,
-    )
-
-    # Initialize robot
-    config = UnitreeG1Config()
-    robot = UnitreeG1(config)
-
-    # Initialize controller with detected obs_dim
-    controller = HolosomaLocomotionController(
-        policy=policy,
-        robot=robot,
-        config=config,
-        obs_dim=obs_dim,
-    )
-
-    try:
-        controller.reset_robot()
-        controller.start_locomotion_thread()
-
-        logger.info(f"Robot initialized with Holosoma {'23-DOF' if obs_dim == 82 else '29-DOF'} policy")
-        logger.info("Use remote controller: LY=fwd/back, LX=left/right, RX=rotate")
-        logger.info("Press Ctrl+C to stop")
-
-        while True:
-            time.sleep(1.0)
-    except KeyboardInterrupt:
-        print("\nStopping locomotion...")
-        controller.stop_locomotion_thread()
-        print("Done!")
@@ -1,447 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example: Unitree RL 12-DOF Legs-Only Locomotion (TorchScript)
-
-This example demonstrates loading a 12-DOF legs-only locomotion policy
-(TorchScript .pt format) and running it on the Unitree G1 robot.
-
-Key characteristics:
- Single TorchScript policy (.pt)
- 47D observations, 12D actions (legs only)
- Phase-based gait timing
- Arms and waist held at fixed positions
-"""
-
-import argparse
-import logging
-import threading
-import time
-
-import numpy as np
-import torch
-from huggingface_hub import hf_hub_download
-from scipy.spatial.transform import Rotation as R
-
-from lerobot.robots.unitree_g1.config_unitree_g1 import UnitreeG1Config
-from lerobot.robots.unitree_g1.unitree_g1 import UnitreeG1
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# 12-DOF leg joint configuration
-# Joint order: [L_hip_pitch, L_hip_roll, L_hip_yaw, L_knee, L_ankle_pitch, L_ankle_roll,
-#               R_hip_pitch, R_hip_roll, R_hip_yaw, R_knee, R_ankle_pitch, R_ankle_roll]
-LEG_JOINT_INDICES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
-
-# Default leg angles for standing
-DEFAULT_LEG_ANGLES = np.array([
-    -0.1, 0.0, 0.0, 0.3, -0.2, 0.0,   # left leg
-    -0.1, 0.0, 0.0, 0.3, -0.2, 0.0,   # right leg
-], dtype=np.float32)
-
-# KP/KD for leg joints
-LEG_KPS = np.array([150, 150, 150, 300, 40, 40, 150, 150, 150, 300, 40, 40], dtype=np.float32)
-LEG_KDS = np.array([6, 6, 6, 4, 2, 2, 6, 6, 6, 4, 2, 2], dtype=np.float32)
-
-# Waist configuration (held at zero)
-WAIST_JOINT_INDICES = [12, 13, 14]  # yaw, roll, pitch
-WAIST_KPS = np.array([250, 250, 250], dtype=np.float32)
-WAIST_KDS = np.array([5, 5, 5], dtype=np.float32)
-
-# Arm configuration (indices 15-28, held at initial position)
-ARM_JOINT_INDICES = list(range(15, 29))
-ARM_KPS = np.array([80, 80, 80, 80, 40, 40, 40,   # left arm (shoulder + wrist)
-                   80, 80, 80, 80, 40, 40, 40], dtype=np.float32)  # right arm
-ARM_KDS = np.array([3, 3, 3, 3, 1.5, 1.5, 1.5,
-                   3, 3, 3, 3, 1.5, 1.5, 1.5], dtype=np.float32)
-
-# Control parameters
-LOCOMOTION_CONTROL_DT = 0.02  # 50Hz control rate
-LOCOMOTION_ACTION_SCALE = 0.25
-ANG_VEL_SCALE = 0.25
-DOF_POS_SCALE = 1.0
-DOF_VEL_SCALE = 0.05
-CMD_SCALE = np.array([2.0, 2.0, 0.25], dtype=np.float32)
-MAX_CMD = np.array([0.8, 0.5, 1.57], dtype=np.float32)  # max vx, vy, yaw_rate
-
-# Gait parameters
-GAIT_PERIOD = 0.8  # seconds
-
-DEFAULT_REPO_ID = "nepyope/unitree_rl_locomotion"
-
-
-def load_torchscript_policy(
-    repo_id: str = DEFAULT_REPO_ID,
-    filename: str = "motion.pt",
-) -> torch.jit.ScriptModule:
-    """Load TorchScript locomotion policy from Hugging Face Hub.
-
-    Args:
-        repo_id: Hugging Face Hub repository ID containing the policy.
-        filename: Policy filename (default: motion.pt).
-    """
-    logger.info(f"Loading TorchScript policy from Hugging Face Hub ({repo_id}/{filename})...")
-
-    policy_path = hf_hub_download(
-        repo_id=repo_id,
-        filename=filename,
-    )
-
-    policy = torch.jit.load(policy_path)
-    policy.eval()
-
-    logger.info("TorchScript policy loaded successfully")
-
-    return policy
-
-
-class UnitreeRLLocomotionController:
-    """
-    Handles 12-DOF legs-only locomotion control for the Unitree G1 robot.
-
-    This controller manages:
-    - Single TorchScript policy
-    - 47D observations (single frame)
-    - 12D action output (legs only)
-    - Arms and waist held at fixed positions
-    - Phase-based gait timing
-    """
-
-    def __init__(self, policy, robot, config):
-        self.policy = policy
-        self.robot = robot
-        self.config = config
-
-        # Velocity commands (vx, vy, yaw_rate)
-        self.locomotion_cmd = np.array([0.0, 0.0, 0.0], dtype=np.float32)
-
-        # State variables (12 DOF legs)
-        self.qj = np.zeros(12, dtype=np.float32)
-        self.dqj = np.zeros(12, dtype=np.float32)
-        self.locomotion_action = np.zeros(12, dtype=np.float32)
-        self.locomotion_obs = np.zeros(47, dtype=np.float32)
-
-        # Initial arm positions (captured on reset)
-        self.initial_arm_positions = np.zeros(14, dtype=np.float32)
-
-        # Counter for phase calculation
-        self.counter = 0
-
-        # Thread management
-        self.locomotion_running = False
-        self.locomotion_thread = None
-
-        logger.info("UnitreeRLLocomotionController initialized")
-        logger.info("  Observation dim: 47, Action dim: 12 (legs only)")
-
-    def locomotion_run(self):
-        """12-DOF legs-only locomotion policy loop."""
-        self.counter += 1
-
-        if self.counter == 1:
-            print("\n" + "=" * 60)
-            print("🚀 RUNNING UNITREE RL 12-DOF LOCOMOTION POLICY")
-            print("   47D observations → 12D actions (legs only)")
-            print("   Arms and waist held at fixed positions")
-            print("=" * 60 + "\n")
-
-        # Get current observation
-        robot_state = self.robot.get_observation()
-        if robot_state is None:
-            return
-
-        # Get command from remote controller
-        if robot_state.wireless_remote is not None:
-            self.robot.remote_controller.set(robot_state.wireless_remote)
-        else:
-            self.robot.remote_controller.lx = 0.0
-            self.robot.remote_controller.ly = 0.0
-            self.robot.remote_controller.rx = 0.0
-            self.robot.remote_controller.ry = 0.0
-
-        self.locomotion_cmd[0] = self.robot.remote_controller.ly       # forward/backward
-        self.locomotion_cmd[1] = self.robot.remote_controller.lx * -1  # left/right (inverted)
-        self.locomotion_cmd[2] = self.robot.remote_controller.rx * -1  # yaw (inverted)
-
-        # Get leg joint positions and velocities (12 DOF)
-        for i, motor_idx in enumerate(LEG_JOINT_INDICES):
-            self.qj[i] = robot_state.motor_state[motor_idx].q
-            self.dqj[i] = robot_state.motor_state[motor_idx].dq
-
-        # Get IMU data
-        quat = robot_state.imu_state.quaternion
-        ang_vel = np.array(robot_state.imu_state.gyroscope, dtype=np.float32)
-
-        # Scale observations
-        gravity_orientation = self.robot.get_gravity_orientation(quat)
-        qj_obs = (self.qj - DEFAULT_LEG_ANGLES) * DOF_POS_SCALE
-        dqj_obs = self.dqj * DOF_VEL_SCALE
-        ang_vel_scaled = ang_vel * ANG_VEL_SCALE
-
-        # Calculate phase
-        count = self.counter * LOCOMOTION_CONTROL_DT
-        phase = (count % GAIT_PERIOD) / GAIT_PERIOD
-        sin_phase = np.sin(2 * np.pi * phase)
-        cos_phase = np.cos(2 * np.pi * phase)
-
-        # Build 47D observation vector
-        # [0:3]   - angular velocity (scaled)
-        # [3:6]   - gravity orientation
-        # [6:9]   - velocity command (scaled)
-        # [9:21]  - joint positions (12D, relative to default)
-        # [21:33] - joint velocities (12D, scaled)
-        # [33:45] - previous actions (12D)
-        # [45]    - sin_phase
-        # [46]    - cos_phase
-        self.locomotion_obs[0:3] = ang_vel_scaled
-        self.locomotion_obs[3:6] = gravity_orientation
-        self.locomotion_obs[6:9] = self.locomotion_cmd * CMD_SCALE * MAX_CMD
-        self.locomotion_obs[9:21] = qj_obs
-        self.locomotion_obs[21:33] = dqj_obs
-        self.locomotion_obs[33:45] = self.locomotion_action
-        self.locomotion_obs[45] = sin_phase
-        self.locomotion_obs[46] = cos_phase
-
-        # Run policy inference (TorchScript)
-        obs_tensor = torch.from_numpy(self.locomotion_obs).unsqueeze(0).float()
-        with torch.no_grad():
-            action_tensor = self.policy(obs_tensor)
-        self.locomotion_action = action_tensor.squeeze().numpy()
-
-        # Transform action to target joint positions
-        target_leg_pos = DEFAULT_LEG_ANGLES + self.locomotion_action * LOCOMOTION_ACTION_SCALE
-
-        # Debug logging (first 3 iterations)
-        if self.counter <= 3:
-            print(f"\n[Unitree RL Debug #{self.counter}]")
-            print(f"  Phase: {phase:.3f} (sin={sin_phase:.3f}, cos={cos_phase:.3f})")
-            print(f"  Cmd (vx, vy, yaw): ({self.locomotion_cmd[0]:.2f}, {self.locomotion_cmd[1]:.2f}, {self.locomotion_cmd[2]:.2f})")
-            print(f"  Action range: [{self.locomotion_action.min():.3f}, {self.locomotion_action.max():.3f}]")
-
-        # Send commands to LEG motors (0-11)
-        for i, motor_idx in enumerate(LEG_JOINT_INDICES):
-            self.robot.msg.motor_cmd[motor_idx].q = target_leg_pos[i]
-            self.robot.msg.motor_cmd[motor_idx].qd = 0
-            self.robot.msg.motor_cmd[motor_idx].kp = LEG_KPS[i]
-            self.robot.msg.motor_cmd[motor_idx].kd = LEG_KDS[i]
-            self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-        # Hold WAIST motors at zero (12, 13, 14)
-        for i, motor_idx in enumerate(WAIST_JOINT_INDICES):
-            self.robot.msg.motor_cmd[motor_idx].q = 0.0
-            self.robot.msg.motor_cmd[motor_idx].qd = 0
-            self.robot.msg.motor_cmd[motor_idx].kp = WAIST_KPS[i]
-            self.robot.msg.motor_cmd[motor_idx].kd = WAIST_KDS[i]
-            self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-        # Hold ARM motors at initial position (15-28)
-        for i, motor_idx in enumerate(ARM_JOINT_INDICES):
-            self.robot.msg.motor_cmd[motor_idx].q = self.initial_arm_positions[i]
-            self.robot.msg.motor_cmd[motor_idx].qd = 0
-            self.robot.msg.motor_cmd[motor_idx].kp = ARM_KPS[i]
-            self.robot.msg.motor_cmd[motor_idx].kd = ARM_KDS[i]
-            self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-        # Send command
-        self.robot.send_action(self.robot.msg)
-
-    def _locomotion_thread_loop(self):
-        """Background thread that runs the locomotion policy at specified rate."""
-        logger.info("Locomotion thread started")
-        while self.locomotion_running:
-            start_time = time.time()
-            try:
-                self.locomotion_run()
-            except Exception as e:
-                logger.error(f"Error in locomotion loop: {e}")
-                import traceback
-                traceback.print_exc()
-
-            # Sleep to maintain control rate
-            elapsed = time.time() - start_time
-            sleep_time = max(0, LOCOMOTION_CONTROL_DT - elapsed)
-            time.sleep(sleep_time)
-        logger.info("Locomotion thread stopped")
-
-    def start_locomotion_thread(self):
-        if self.locomotion_running:
-            logger.warning("Locomotion thread already running")
-            return
-
-        logger.info("Starting locomotion control thread...")
-        self.locomotion_running = True
-        self.locomotion_thread = threading.Thread(target=self._locomotion_thread_loop, daemon=True)
-        self.locomotion_thread.start()
-
-        logger.info("Locomotion control thread started!")
-
-    def stop_locomotion_thread(self):
-        if not self.locomotion_running:
-            return
-
-        logger.info("Stopping locomotion control thread...")
-        self.locomotion_running = False
-        if self.locomotion_thread:
-            self.locomotion_thread.join(timeout=2.0)
-        logger.info("Locomotion control thread stopped")
-
-    def reset_robot(self):
-        """Move legs to default standing position over 2 seconds (arms are captured and held)."""
-        logger.info("Moving legs to default position...")
-
-        total_time = 2.0
-        num_step = int(total_time / self.robot.control_dt)
-
-        # Get current state
-        robot_state = self.robot.get_observation()
-
-        # Capture initial arm positions (to hold during locomotion)
-        for i, motor_idx in enumerate(ARM_JOINT_INDICES):
-            self.initial_arm_positions[i] = robot_state.motor_state[motor_idx].q
-        logger.info(f"Captured initial arm positions: {self.initial_arm_positions[:4]}...")
-
-        # Record current leg positions
-        init_leg_pos = np.zeros(12, dtype=np.float32)
-        for i, motor_idx in enumerate(LEG_JOINT_INDICES):
-            init_leg_pos[i] = robot_state.motor_state[motor_idx].q
-
-        # Interpolate legs to default position
-        for step in range(num_step):
-            alpha = step / num_step
-
-            # Interpolate leg positions
-            for i, motor_idx in enumerate(LEG_JOINT_INDICES):
-                target_pos = DEFAULT_LEG_ANGLES[i]
-                self.robot.msg.motor_cmd[motor_idx].q = (
-                    init_leg_pos[i] * (1 - alpha) + target_pos * alpha
-                )
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = LEG_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = LEG_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            # Hold waist at zero
-            for i, motor_idx in enumerate(WAIST_JOINT_INDICES):
-                self.robot.msg.motor_cmd[motor_idx].q = 0.0
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = WAIST_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = WAIST_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            # Hold arms at initial position
-            for i, motor_idx in enumerate(ARM_JOINT_INDICES):
-                self.robot.msg.motor_cmd[motor_idx].q = self.initial_arm_positions[i]
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = ARM_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = ARM_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            self.robot.msg.crc = self.robot.crc.Crc(self.robot.msg)
-            self.robot.lowcmd_publisher.Write(self.robot.msg)
-            time.sleep(self.robot.control_dt)
-
-        logger.info("Reached default leg position")
-
-        # Hold position for 2 seconds
-        logger.info("Holding default position for 2 seconds...")
-        hold_time = 2.0
-        num_hold_steps = int(hold_time / self.robot.control_dt)
-
-        for _ in range(num_hold_steps):
-            # Hold legs at default
-            for i, motor_idx in enumerate(LEG_JOINT_INDICES):
-                self.robot.msg.motor_cmd[motor_idx].q = DEFAULT_LEG_ANGLES[i]
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = LEG_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = LEG_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            # Hold waist at zero
-            for i, motor_idx in enumerate(WAIST_JOINT_INDICES):
-                self.robot.msg.motor_cmd[motor_idx].q = 0.0
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = WAIST_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = WAIST_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            # Hold arms at initial position
-            for i, motor_idx in enumerate(ARM_JOINT_INDICES):
-                self.robot.msg.motor_cmd[motor_idx].q = self.initial_arm_positions[i]
-                self.robot.msg.motor_cmd[motor_idx].qd = 0
-                self.robot.msg.motor_cmd[motor_idx].kp = ARM_KPS[i]
-                self.robot.msg.motor_cmd[motor_idx].kd = ARM_KDS[i]
-                self.robot.msg.motor_cmd[motor_idx].tau = 0
-
-            self.robot.msg.crc = self.robot.crc.Crc(self.robot.msg)
-            self.robot.lowcmd_publisher.Write(self.robot.msg)
-            time.sleep(self.robot.control_dt)
-
-        logger.info("Ready to start locomotion!")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Unitree RL 12-DOF Locomotion Controller for Unitree G1")
-    parser.add_argument(
-        "--repo-id",
-        type=str,
-        default=DEFAULT_REPO_ID,
-        help=f"Hugging Face Hub repo ID for policy (default: {DEFAULT_REPO_ID})",
-    )
-    parser.add_argument(
-        "--filename",
-        type=str,
-        default="motion.pt",
-        help="Policy filename (default: motion.pt)",
-    )
-    args = parser.parse_args()
-
-    # Load policy
-    policy = load_torchscript_policy(repo_id=args.repo_id, filename=args.filename)
-
-    # Initialize robot
-    config = UnitreeG1Config()
-    robot = UnitreeG1(config)
-
-    # Initialize locomotion controller
-    locomotion_controller = UnitreeRLLocomotionController(
-        policy=policy,
-        robot=robot,
-        config=config,
-    )
-
-    # Reset robot and start locomotion thread
-    try:
-        locomotion_controller.reset_robot()
-        locomotion_controller.start_locomotion_thread()
-
-        # Log status
-        logger.info("Robot initialized with Unitree RL locomotion policy")
-        logger.info("Locomotion controller running in background thread")
-        logger.info("Use remote controller to command velocity:")
-        logger.info("  Left stick Y: forward/backward")
-        logger.info("  Left stick X: left/right")
-        logger.info("  Right stick X: rotate")
-        logger.info("Press Ctrl+C to stop")
-
-        # Keep robot alive
-        while True:
-            time.sleep(1.0)
-    except KeyboardInterrupt:
-        print("\nStopping locomotion...")
-        locomotion_controller.stop_locomotion_thread()
-        print("Done!")
-
@@ -52,4 +52,4 @@ class UnitreeG1Config(RobotConfig):
    control_dt: float = 1.0 / 250.0  # 250Hz

    # socket config for ZMQ bridge
-    robot_ip: str = "172.18.129.215"
+    robot_ip: str = "192.168.123.164"
@@ -99,12 +99,11 @@ def state_forward_loop(
    lowstate_sub: ChannelSubscriber,
    lowstate_sock: zmq.Socket,
    state_period: float,
-    shutdown_event: threading.Event,
 ) -> None:
    """Read observation from DDS and forward to ZMQ clients."""
    last_state_time = 0.0

-    while not shutdown_event.is_set():
+    while True:
        # read from DDS
        msg = lowstate_sub.Read()
        if msg is None:
@@ -129,10 +128,7 @@ def cmd_forward_loop(
 ) -> None:
    """Receive commands from ZMQ and forward to DDS."""
    while True:
-        try:
-            payload = lowcmd_sock.recv()
-        except zmq.ContextTerminated:
-            break
+        payload = lowcmd_sock.recv()
        msg_dict = json.loads(payload.decode("utf-8"))

        topic = msg_dict.get("topic", "")
@@ -186,26 +182,30 @@ def main() -> None:
    lowstate_sock.bind(f"tcp://0.0.0.0:{LOWSTATE_PORT}")

    state_period = 0.002  # ~500 hz
-    shutdown_event = threading.Event()

-    # start observation forwarding in background thread
+    # start observation forwarding thread
    t_state = threading.Thread(
        target=state_forward_loop,
-        args=(lowstate_sub, lowstate_sock, state_period, shutdown_event),
+        args=(lowstate_sub, lowstate_sock, state_period),
+        daemon=True,
    )
    t_state.start()

-    print("bridge running (lowstate -> zmq, lowcmd -> dds)")
+    # start action forwarding thread
+    t_cmd = threading.Thread(
+        target=cmd_forward_loop,
+        args=(lowcmd_sock, lowcmd_pub_debug, crc),
+        daemon=True,
+    )
+    t_cmd.start()

-    # run command forwarding in main thread
+    print("bridge running (lowstate -> zmq, lowcmd -> dds)")
+    # keep main thread alive so daemon threads don't exit
    try:
-        cmd_forward_loop(lowcmd_sock, lowcmd_pub_debug, crc)
+        while True:
+            time.sleep(1.0)
    except KeyboardInterrupt:
        print("shutting down bridge...")
-    finally:
-        shutdown_event.set()
-        ctx.term()  # terminates blocking zmq.recv() calls
-        t_state.join(timeout=2.0)


 if __name__ == "__main__":
@@ -138,8 +138,8 @@ class UnitreeG1(Robot):
        self.lowstate_buffer = DataBuffer()

        # initialize subscribe thread to read robot state
-        self._shutdown_event = threading.Event()
        self.subscribe_thread = threading.Thread(target=self._subscribe_motor_state)
+        self.subscribe_thread.daemon = True
        self.subscribe_thread.start()

        while not self.is_connected:
@@ -174,7 +174,7 @@ class UnitreeG1(Robot):
        self.remote_controller = self.RemoteController()

    def _subscribe_motor_state(self):  # polls robot state @ 250Hz
-        while not self._shutdown_event.is_set():
+        while True:
            start_time = time.time()
            msg = self.lowstate_subscriber.Read()
            if msg is not None:
@@ -221,8 +221,7 @@ class UnitreeG1(Robot):
        ChannelFactoryInitialize(0)

    def disconnect(self):
-        self._shutdown_event.set()
-        self.subscribe_thread.join(timeout=2.0)
+        pass

    def get_observation(self) -> dict[str, Any]:
        return self.lowstate_buffer.get_data()
Author	SHA1	Message	Date
Pepijn	9c74cbe599	push to specific repo	2025-12-02 18:35:16 +01:00
Pepijn	fa3919a0ff	add push to hub	2025-12-02 18:30:11 +01:00
Pepijn	e38346316b	add aggregate	2025-12-02 18:27:50 +01:00
Pepijn	2a2b648891	fix use local dir	2025-12-02 18:11:20 +01:00
Pepijn	cf36f4b873	add localdir	2025-12-02 17:26:44 +01:00
Pepijn	e1ae51b02a	Add conversion script	2025-12-02 16:51:36 +01:00