From f51272362ccfb8a3203a1545722ba4b0b9f0f421 Mon Sep 17 00:00:00 2001 From: AdilZouitine Date: Thu, 11 Sep 2025 21:05:20 +0200 Subject: [PATCH] refactor(processor): update migration script for policy normalization and hub integration - Modified the migration script to include a branch argument for pushing to the hub, enhancing flexibility in version control. - Improved error handling by ensuring the policy type is extracted from the configuration, promoting robustness. - Streamlined the process of saving and pushing model components to the hub, allowing for a single commit with optional PR creation. - Updated the commit message and description for better clarity on the migration changes and benefits, ensuring users are informed of the new architecture and usage. --- .../processor/migrate_policy_normalization.py | 145 ++++++++++++++---- 1 file changed, 114 insertions(+), 31 deletions(-) diff --git a/src/lerobot/processor/migrate_policy_normalization.py b/src/lerobot/processor/migrate_policy_normalization.py index e10c88bf4..c4e25a515 100644 --- a/src/lerobot/processor/migrate_policy_normalization.py +++ b/src/lerobot/processor/migrate_policy_normalization.py @@ -35,8 +35,8 @@ This script performs the following steps: Usage: python src/lerobot/processor/migrate_policy_normalization.py \ --pretrained-path lerobot/act_aloha_sim_transfer_cube_human \ - --policy-type act \ - --push-to-hub + --push-to-hub \ + --branch main Note: This script now uses the modern `make_pre_post_processors` and `make_policy_config` factory functions from `lerobot.policies.factory` to create processors and configurations, @@ -54,7 +54,7 @@ from pathlib import Path from typing import Any import torch -from huggingface_hub import hf_hub_download +from huggingface_hub import HfApi, hf_hub_download from safetensors.torch import load_file as load_safetensors from lerobot.configs.types import FeatureType, NormalizationMode, PolicyFeature @@ -368,10 +368,10 @@ def main(): parser.add_argument("--revision", type=str, default=None, help="Revision of the model to load") parser.add_argument("--private", action="store_true", help="Make the hub repository private") parser.add_argument( - "--policy-type", + "--branch", type=str, - required=True, - help="Policy type (act, diffusion, pi0, pi0fast, smolvla, tdmpc, vqbet, sac, classifier)", + default=None, + help="Git branch to use when pushing to hub. If specified, a PR will be created automatically (default: push directly to main)", ) args = parser.parse_args() @@ -421,6 +421,13 @@ def main(): output_dir.mkdir(parents=True, exist_ok=True) + # Extract policy type from config + if "type" not in config: + raise ValueError("Policy type not found in config.json. The config must contain a 'type' field.") + + policy_type = config["type"] + print(f"Detected policy type: {policy_type}") + # Clean up config - remove fields that shouldn't be passed to config constructor cleaned_config = dict(config) @@ -431,9 +438,6 @@ def main(): print(f"Removing '{field}' field from config") del cleaned_config[field] - # Use the policy type from command line argument - policy_type = args.policy_type - # Convert input_features and output_features to PolicyFeature objects if they exist if "input_features" in cleaned_config: cleaned_config["input_features"] = convert_features_to_policy_features( @@ -476,23 +480,15 @@ def main(): else: raise ValueError("--hub-repo-id must be specified when pushing local model to hub") - # Save preprocessor and postprocessor to root directory + # Save all components to local directory first print(f"Saving preprocessor to {output_dir}...") preprocessor.save_pretrained(output_dir) - if args.push_to_hub and hub_repo_id: - preprocessor.push_to_hub(repo_id=hub_repo_id, private=args.private) print(f"Saving postprocessor to {output_dir}...") postprocessor.save_pretrained(output_dir) - if args.push_to_hub and hub_repo_id: - postprocessor.push_to_hub(repo_id=hub_repo_id, private=args.private) - # Save model using the policy's save_pretrained method print(f"Saving model to {output_dir}...") - if args.push_to_hub and hub_repo_id: - policy.save_pretrained(output_dir, push_to_hub=True, repo_id=hub_repo_id, private=args.private) - else: - policy.save_pretrained(output_dir) + policy.save_pretrained(output_dir) # Generate and save model card print("Generating model card...") @@ -512,24 +508,111 @@ def main(): # Save model card locally card.save(str(output_dir / "README.md")) print(f"Model card saved to {output_dir / 'README.md'}") - # Push model card to hub if requested + # Push all files to hub in a single operation if requested if args.push_to_hub and hub_repo_id: - from huggingface_hub import HfApi - api = HfApi() - api.upload_file( - path_or_fileobj=str(output_dir / "README.md"), - path_in_repo="README.md", - repo_id=hub_repo_id, - repo_type="model", - commit_message="Add model card for migrated model", - ) - print("Model card pushed to hub") + + # Determine if we should create a PR (automatically if branch is specified) + create_pr = args.branch is not None + target_location = f"branch '{args.branch}'" if args.branch else "main branch" + + print(f"Pushing all migrated files to {hub_repo_id} on {target_location}...") + + # Upload all files in a single commit with automatic PR creation if branch specified + commit_message = "Migrate policy to PolicyProcessorPipeline system" + commit_description = None + + if create_pr: + # Separate commit description for PR body + commit_description = """🤖 **Automated Policy Migration to PolicyProcessorPipeline** + +This PR migrates your model to the new LeRobot policy format using the modern PolicyProcessorPipeline architecture. + +## What Changed + +### ✨ **New Architecture - PolicyProcessorPipeline** +Your model now uses external PolicyProcessorPipeline components for data processing instead of built-in normalization layers. This provides: +- **Modularity**: Separate preprocessing and postprocessing pipelines +- **Flexibility**: Easy to swap, configure, and debug processing steps +- **Compatibility**: Works with the latest LeRobot ecosystem + +### 🔧 **Normalization Extraction** +We've extracted normalization statistics from your model's state_dict and removed the built-in normalization layers: +- **Extracted patterns**: `normalize_inputs.*`, `unnormalize_outputs.*`, `normalize.*`, `unnormalize.*`, `input_normalizer.*`, `output_normalizer.*` +- **Statistics preserved**: Mean, std, min, max values for all features +- **Clean model**: State dict now contains only core model weights + +### 📦 **Files Added** +- **preprocessor_config.json**: Configuration for input preprocessing pipeline +- **postprocessor_config.json**: Configuration for output postprocessing pipeline +- **model.safetensors**: Clean model weights without normalization layers +- **config.json**: Updated model configuration +- **train_config.json**: Training configuration +- **README.md**: Updated model card with migration information + +### 🚀 **Benefits** +- **Backward Compatible**: Your model behavior remains identical +- **Future Ready**: Compatible with latest LeRobot features and updates +- **Debuggable**: Easy to inspect and modify processing steps +- **Portable**: Processors can be shared and reused across models + +### 💻 **Usage** +```python +# Load your migrated model +from lerobot.policies import get_policy_class +from lerobot.processor import PolicyProcessorPipeline + +# The preprocessor and postprocessor are now external +preprocessor = PolicyProcessorPipeline.from_pretrained("your-model-repo", config_filename="preprocessor_config.json") +postprocessor = PolicyProcessorPipeline.from_pretrained("your-model-repo", config_filename="postprocessor_config.json") +policy = get_policy_class("your-policy-type").from_pretrained("your-model-repo") + +# Process data through the pipeline +processed_batch = preprocessor(raw_batch) +action = policy(processed_batch) +final_action = postprocessor(action) +``` + +*Generated automatically by the LeRobot policy migration script*""" + + upload_kwargs = { + "repo_id": hub_repo_id, + "folder_path": output_dir, + "repo_type": "model", + "commit_message": commit_message, + "revision": args.branch, + "create_pr": create_pr, + "allow_patterns": ["*.json", "*.safetensors", "*.md"], + "ignore_patterns": ["*.tmp", "*.log"], + } + + # Add commit_description for PR body if creating PR + if create_pr and commit_description: + upload_kwargs["commit_description"] = commit_description + + api.upload_folder(**upload_kwargs) + + if create_pr: + print("All files pushed and pull request created successfully!") + else: + print("All files pushed to main branch successfully!") print("\nMigration complete!") print(f"Migrated model saved to: {output_dir}") if args.push_to_hub and hub_repo_id: - print(f"Successfully pushed to https://huggingface.co/{hub_repo_id}") + if args.branch: + print( + f"Successfully pushed all files to branch '{args.branch}' and created PR on https://huggingface.co/{hub_repo_id}" + ) + else: + print(f"Successfully pushed to https://huggingface.co/{hub_repo_id}") + if args.branch: + print(f"\nView the branch at: https://huggingface.co/{hub_repo_id}/tree/{args.branch}") + print( + f"View the PR at: https://huggingface.co/{hub_repo_id}/discussions (look for the most recent PR)" + ) + else: + print(f"\nView the changes at: https://huggingface.co/{hub_repo_id}") if __name__ == "__main__":