diff --git a/docs/source/act.mdx b/docs/source/act.mdx index 520396ac3..8e91edcf9 100644 --- a/docs/source/act.mdx +++ b/docs/source/act.mdx @@ -90,6 +90,6 @@ lerobot-record \ --dataset.single_task="Your task description" \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --policy.path=${HF_USER}/act_policy ``` diff --git a/docs/source/earthrover_mini_plus.mdx b/docs/source/earthrover_mini_plus.mdx index 802e49ae3..508c0e3a9 100644 --- a/docs/source/earthrover_mini_plus.mdx +++ b/docs/source/earthrover_mini_plus.mdx @@ -194,7 +194,7 @@ lerobot-record \ --dataset.single_task="Navigate around obstacles" \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --display_data=true ``` diff --git a/docs/source/groot.mdx b/docs/source/groot.mdx index 1f74ad73f..d69d10a57 100644 --- a/docs/source/groot.mdx +++ b/docs/source/groot.mdx @@ -123,7 +123,7 @@ lerobot-record \ --dataset.single_task="Grab and handover the red cube to the other arm" \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --policy.path=/groot-bimanual \ # your trained model --dataset.episode_time_s=30 \ --dataset.reset_time_s=10 diff --git a/docs/source/hope_jr.mdx b/docs/source/hope_jr.mdx index 6c01bc912..1f3b08fd7 100644 --- a/docs/source/hope_jr.mdx +++ b/docs/source/hope_jr.mdx @@ -232,7 +232,7 @@ lerobot-record \ --dataset.private=true \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --display_data=true ``` @@ -278,6 +278,6 @@ lerobot-record \ --dataset.num_episodes=10 \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --policy.path=outputs/train/hopejr_hand/checkpoints/last/pretrained_model ``` diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx index 6bb367c36..07789225a 100644 --- a/docs/source/il_robots.mdx +++ b/docs/source/il_robots.mdx @@ -193,7 +193,7 @@ lerobot-record \ --dataset.num_episodes=5 \ --dataset.single_task="Grab the black cube" \ --dataset.streaming_encoding=true \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --dataset.encoder_threads=2 ``` diff --git a/docs/source/lerobot-dataset-v3.mdx b/docs/source/lerobot-dataset-v3.mdx index 6be937e5e..6f3e6d948 100644 --- a/docs/source/lerobot-dataset-v3.mdx +++ b/docs/source/lerobot-dataset-v3.mdx @@ -43,7 +43,7 @@ lerobot-record \ --dataset.num_episodes=5 \ --dataset.single_task="Grab the black cube" \ --dataset.streaming_encoding=true \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --dataset.encoder_threads=2 ``` diff --git a/docs/source/reachy2.mdx b/docs/source/reachy2.mdx index b70095960..4b08569db 100644 --- a/docs/source/reachy2.mdx +++ b/docs/source/reachy2.mdx @@ -161,7 +161,7 @@ lerobot-record \ --dataset.private=true \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --display_data=true ``` @@ -203,7 +203,7 @@ lerobot-record \ --dataset.private=true \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ --display_data=true ``` diff --git a/docs/source/smolvla.mdx b/docs/source/smolvla.mdx index 8a24806b9..6c63c5d11 100644 --- a/docs/source/smolvla.mdx +++ b/docs/source/smolvla.mdx @@ -108,7 +108,7 @@ lerobot-record \ --dataset.num_episodes=10 \ --dataset.streaming_encoding=true \ --dataset.encoder_threads=2 \ - # --dataset.camera_encoder_config.vcodec=auto \ + # --dataset.camera_encoder.vcodec=auto \ # <- Teleop optional if you want to teleoperate in between episodes \ # --teleop.type=so100_leader \ # --teleop.port=/dev/ttyACM0 \ diff --git a/docs/source/streaming_video_encoding.mdx b/docs/source/streaming_video_encoding.mdx index 4b65382f9..36c599bad 100644 --- a/docs/source/streaming_video_encoding.mdx +++ b/docs/source/streaming_video_encoding.mdx @@ -17,7 +17,7 @@ This makes `save_episode()` near-instant (the video is already encoded by the ti | Parameter | CLI Flag | Type | Default | Description | | ----------------------- | ---------------------------------------- | ------------- | ------------- | ----------------------------------------------------------------- | | `streaming_encoding` | `--dataset.streaming_encoding` | `bool` | `True` | Enable real-time encoding during capture | -| `vcodec` | `--dataset.camera_encoder_config.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder | +| `vcodec` | `--dataset.camera_encoder.vcodec` | `str` | `"libsvtav1"` | Video codec. `"auto"` detects best HW encoder | | `encoder_threads` | `--dataset.encoder_threads` | `int \| None` | `None` (auto) | Threads per encoder instance. `None` will leave the vcoded decide | | `encoder_queue_maxsize` | `--dataset.encoder_queue_maxsize` | `int` | `30` | Max buffered frames per camera (~1s at 30fps). Consumes RAM | @@ -84,13 +84,13 @@ Use HW encoding when: | Encoder | Platform | Hardware | CLI Value | | ------------------- | ------------- | ------------------------------------------------------------------------------------------------ | ---------------------------------------------------------- | -| `h264_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.camera_encoder_config.vcodec=h264_videotoolbox` | -| `hevc_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.camera_encoder_config.vcodec=hevc_videotoolbox` | -| `h264_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.camera_encoder_config.vcodec=h264_nvenc` | -| `hevc_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.camera_encoder_config.vcodec=hevc_nvenc` | -| `h264_vaapi` | Linux | Intel/AMD GPU | `--dataset.camera_encoder_config.vcodec=h264_vaapi` | -| `h264_qsv` | Linux/Windows | Intel Quick Sync | `--dataset.camera_encoder_config.vcodec=h264_qsv` | -| `auto` | Any | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.camera_encoder_config.vcodec=auto` | +| `h264_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.camera_encoder.vcodec=h264_videotoolbox` | +| `hevc_videotoolbox` | macOS | Apple Silicon / Intel | `--dataset.camera_encoder.vcodec=hevc_videotoolbox` | +| `h264_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.camera_encoder.vcodec=h264_nvenc` | +| `hevc_nvenc` | Linux/Windows | NVIDIA GPU | `--dataset.camera_encoder.vcodec=hevc_nvenc` | +| `h264_vaapi` | Linux | Intel/AMD GPU | `--dataset.camera_encoder.vcodec=h264_vaapi` | +| `h264_qsv` | Linux/Windows | Intel Quick Sync | `--dataset.camera_encoder.vcodec=h264_qsv` | +| `auto` | Any | Probes the system for available HW encoders. Falls back to `libsvtav1` if no HW encoder is found | `--dataset.camera_encoder.vcodec=auto` | > [!NOTE] > In order to use the HW accelerated encoders you might need to upgrade your GPU drivers. @@ -102,12 +102,12 @@ Use HW encoding when: | Symptom | Likely Cause | Fix | | ------------------------------------------------------------------ | -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage) | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.camera_encoder_config.vcodec=auto`) | -| "Encoder queue full" warnings or dropped frames in dataset | Encoder can't keep up (Queue overflow) | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.camera_encoder_config.vcodec=auto`). | +| System freezes or choppy robot movement or Rerun visualization lag | CPU starved (100% load usage) | Close other apps, reduce encoding throughput, lower `encoder_threads`, use `h264`, use `display_data=False`. If the CPU continues to be at 100% then it might be insufficient for your setup, consider `--dataset.streaming_encoding=false` or HW encoding (`--dataset.camera_encoder.vcodec=auto`) | +| "Encoder queue full" warnings or dropped frames in dataset | Encoder can't keep up (Queue overflow) | If CPU is not at 100%: Increase `encoder_threads`, increase `encoder_queue_maxsize` or use HW encoding (`--dataset.camera_encoder.vcodec=auto`). | | High RAM usage | Queue filling faster than encoding | `encoder_threads` too low or CPU insufficient. Reduce `encoder_queue_maxsize` or use HW encoding | | Large video files | Using HW encoder or H.264 | Expected trade-off. Switch to `libsvtav1` if CPU allows | | `save_episode()` still slow | `streaming_encoding` is `False` | Set `--dataset.streaming_encoding=true` | -| Encoder thread crash | Codec not available or invalid settings | Check `vcodec` is installed, try `--dataset.camera_encoder_config.vcodec=auto` | +| Encoder thread crash | Codec not available or invalid settings | Check `vcodec` is installed, try `--dataset.camera_encoder.vcodec=auto` | | Recorded dataset is missing frames | CPU/GPU starvation or occasional load spikes | If ~5% of frames are missing, your system is likely overloaded — follow the recommendations above. If fewer frames are missing (~2%), they are probably due to occasional transient load spikes (often at startup) and can be considered expected. | ## 6. Recommended Configurations @@ -146,7 +146,7 @@ On very constrained systems, streaming encoding may compete too heavily with the # 2camsx 640x480x3 @30fps: Requires some tuning. # Use H.264, disable streaming, consider batching encoding -lerobot-record --dataset.camera_encoder_config.vcodec=h264 --dataset.streaming_encoding=false ... +lerobot-record --dataset.camera_encoder.vcodec=h264 --dataset.streaming_encoding=false ... ``` ## 7. Closing note diff --git a/docs/source/using_dataset_tools.mdx b/docs/source/using_dataset_tools.mdx index 11812f123..49247a6c1 100644 --- a/docs/source/using_dataset_tools.mdx +++ b/docs/source/using_dataset_tools.mdx @@ -117,10 +117,10 @@ lerobot-edit-dataset \ --repo_id lerobot/pusht_image \ --operation.type convert_image_to_video \ --operation.output_dir outputs/pusht_video \ - --operation.camera_encoder_config.vcodec libsvtav1 \ - --operation.camera_encoder_config.pix_fmt yuv420p \ - --operation.camera_encoder_config.g 2 \ - --operation.camera_encoder_config.crf 30 + --operation.camera_encoder.vcodec libsvtav1 \ + --operation.camera_encoder.pix_fmt yuv420p \ + --operation.camera_encoder.g 2 \ + --operation.camera_encoder.crf 30 # Convert only specific episodes lerobot-edit-dataset \ @@ -147,7 +147,7 @@ lerobot-edit-dataset \ **Parameters:** - `output_dir`: Custom output directory (optional - by default uses `new_repo_id` or `{repo_id}_video`) -- `camera_encoder_config`: Video encoder settings — all sub-fields accessible via `--operation.camera_encoder_config.. See [Video Encoding Parameters](./video_encoding_parameters) for more details. +- `camera_encoder`: Video encoder settings — all sub-fields accessible via `--operation.camera_encoder.. See [Video Encoding Parameters](./video_encoding_parameters) for more details. - `episode_indices`: List of specific episodes to convert (default: all episodes) - `num_workers`: Number of parallel workers for processing (default: 4) diff --git a/src/lerobot/configs/dataset.py b/src/lerobot/configs/dataset.py index 24154eecb..d5c6fa312 100644 --- a/src/lerobot/configs/dataset.py +++ b/src/lerobot/configs/dataset.py @@ -58,8 +58,8 @@ class DatasetRecordConfig: # Set to 1 for immediate encoding (default behavior), or higher for batched encoding video_encoding_batch_size: int = 1 # Video encoder settings for camera MP4s (codec, quality, GOP, etc.). Tuned via CLI nested keys, - # e.g. ``--dataset.camera_encoder_config.vcodec=h264`` (see ``VideoEncoderConfig``). - camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) + # e.g. ``--dataset.camera_encoder.vcodec=h264`` (see ``VideoEncoderConfig``). + camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) # Enable streaming video encoding: encode frames in real-time during capture instead # of writing PNG images first. Makes save_episode() near-instant. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding streaming_encoding: bool = False diff --git a/src/lerobot/configs/eval.py b/src/lerobot/configs/eval.py index d1cebd27f..e570effc5 100644 --- a/src/lerobot/configs/eval.py +++ b/src/lerobot/configs/eval.py @@ -18,8 +18,8 @@ from logging import getLogger from pathlib import Path from lerobot import envs, policies # noqa: F401 -from lerobot.configs import parser +from . import parser from .default import EvalConfig from .policies import PreTrainedConfig diff --git a/src/lerobot/configs/rewards.py b/src/lerobot/configs/rewards.py index d495160bf..a53d5a417 100644 --- a/src/lerobot/configs/rewards.py +++ b/src/lerobot/configs/rewards.py @@ -27,12 +27,13 @@ from huggingface_hub import hf_hub_download from huggingface_hub.constants import CONFIG_NAME from huggingface_hub.errors import HfHubHTTPError -from lerobot.configs.types import PolicyFeature from lerobot.optim.optimizers import OptimizerConfig from lerobot.optim.schedulers import LRSchedulerConfig from lerobot.utils.device_utils import auto_select_torch_device, is_torch_device_available from lerobot.utils.hub import HubMixin +from .types import PolicyFeature + T = TypeVar("T", bound="RewardModelConfig") logger = logging.getLogger(__name__) diff --git a/src/lerobot/configs/train.py b/src/lerobot/configs/train.py index 388de9437..d7fc131d6 100644 --- a/src/lerobot/configs/train.py +++ b/src/lerobot/configs/train.py @@ -25,11 +25,11 @@ from huggingface_hub import hf_hub_download from huggingface_hub.errors import HfHubHTTPError from lerobot import envs -from lerobot.configs import parser from lerobot.optim import LRSchedulerConfig, OptimizerConfig from lerobot.utils.hub import HubMixin from lerobot.utils.sample_weighting import SampleWeightingConfig +from . import parser from .default import DatasetConfig, EvalConfig, PeftConfig, WandBConfig from .policies import PreTrainedConfig from .rewards import RewardModelConfig diff --git a/src/lerobot/datasets/dataset_metadata.py b/src/lerobot/datasets/dataset_metadata.py index bbc8f6f89..289160650 100644 --- a/src/lerobot/datasets/dataset_metadata.py +++ b/src/lerobot/datasets/dataset_metadata.py @@ -514,7 +514,7 @@ class LeRobotDatasetMetadata: def update_video_info( self, video_key: str | None = None, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> None: """Populate per-feature video info in ``info.json``. @@ -524,7 +524,7 @@ class LeRobotDatasetMetadata: Args: video_key: If provided, only update this video key. Otherwise update all video keys in the dataset. - camera_encoder_config: Encoder configuration used to produce the + camera_encoder: Encoder configuration used to produce the videos. When provided, its fields are recorded as ``video.`` entries alongside the stream-derived ``video.*`` entries (see :func:`get_video_info`). @@ -537,7 +537,7 @@ class LeRobotDatasetMetadata: if not self.features[key].get("info", None): video_path = self.root / self.video_path.format(video_key=key, chunk_index=0, file_index=0) self.info.features[key]["info"] = get_video_info( - video_path, camera_encoder_config=camera_encoder_config + video_path, camera_encoder=camera_encoder ) def update_chunk_settings( diff --git a/src/lerobot/datasets/dataset_tools.py b/src/lerobot/datasets/dataset_tools.py index 3cffb4783..ace9f7339 100644 --- a/src/lerobot/datasets/dataset_tools.py +++ b/src/lerobot/datasets/dataset_tools.py @@ -96,7 +96,7 @@ def delete_episodes( episode_indices: list[int], output_dir: str | Path | None = None, repo_id: str | None = None, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> LeRobotDataset: """Delete episodes from a LeRobotDataset and create a new dataset. @@ -105,7 +105,7 @@ def delete_episodes( episode_indices: List of episode indices to delete. output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. - camera_encoder_config: Video encoder settings used when re-encoding video segments + camera_encoder: Video encoder settings used when re-encoding video segments (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). """ if not episode_indices: @@ -139,7 +139,7 @@ def delete_episodes( video_metadata = None if dataset.meta.video_keys: - video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder_config) + video_metadata = _copy_and_reindex_videos(dataset, new_meta, episode_mapping, camera_encoder) data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping) @@ -161,7 +161,7 @@ def split_dataset( dataset: LeRobotDataset, splits: dict[str, float | list[int]], output_dir: str | Path | None = None, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> dict[str, LeRobotDataset]: """Split a LeRobotDataset into multiple smaller datasets. @@ -170,7 +170,7 @@ def split_dataset( splits: Either a dict mapping split names to episode indices, or a dict mapping split names to fractions (must sum to <= 1.0). output_dir: Root directory where the split datasets will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. - camera_encoder_config: Video encoder settings used when re-encoding video segments + camera_encoder: Video encoder settings used when re-encoding video segments (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). Examples: @@ -233,7 +233,7 @@ def split_dataset( video_metadata = None if dataset.meta.video_keys: video_metadata = _copy_and_reindex_videos( - dataset, new_meta, episode_mapping, camera_encoder_config + dataset, new_meta, episode_mapping, camera_encoder ) data_metadata = _copy_and_reindex_data(dataset, new_meta, episode_mapping) @@ -590,7 +590,7 @@ def _keep_episodes_from_video_with_av( output_path: Path, episodes_to_keep: list[tuple[int, int]], fps: float, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> None: """Keep only specified episodes from a video file using PyAV. @@ -604,11 +604,11 @@ def _keep_episodes_from_video_with_av( Ranges are half-open intervals: [start_frame, end_frame), where start_frame is inclusive and end_frame is exclusive. fps: Frame rate of the video. - camera_encoder_config: Video encoder settings + camera_encoder: Video encoder settings (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). """ - if camera_encoder_config is None: - camera_encoder_config = camera_encoder_defaults() + if camera_encoder is None: + camera_encoder = camera_encoder_defaults() from fractions import Fraction import av @@ -632,13 +632,13 @@ def _keep_episodes_from_video_with_av( # Convert fps to Fraction for PyAV compatibility. fps_fraction = Fraction(fps).limit_denominator(1000) - codec_options = camera_encoder_config.get_codec_options(as_strings=True) - v_out = out.add_stream(camera_encoder_config.vcodec, rate=fps_fraction, options=codec_options) + codec_options = camera_encoder.get_codec_options(as_strings=True) + v_out = out.add_stream(camera_encoder.vcodec, rate=fps_fraction, options=codec_options) # PyAV type stubs don't distinguish video streams from audio/subtitle streams. v_out.width = v_in.codec_context.width v_out.height = v_in.codec_context.height - v_out.pix_fmt = camera_encoder_config.pix_fmt + v_out.pix_fmt = camera_encoder.pix_fmt # Set time_base to match the frame rate for proper timestamp handling. v_out.time_base = Fraction(1, int(fps)) @@ -701,7 +701,7 @@ def _copy_and_reindex_videos( src_dataset: LeRobotDataset, dst_meta: LeRobotDatasetMetadata, episode_mapping: dict[int, int], - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> dict[int, dict]: """Copy and filter video files, only re-encoding files with deleted episodes. @@ -713,14 +713,14 @@ def _copy_and_reindex_videos( src_dataset: Source dataset to copy from dst_meta: Destination metadata object episode_mapping: Mapping from old episode indices to new indices - camera_encoder_config: Video encoder settings used when re-encoding segments + camera_encoder: Video encoder settings used when re-encoding segments (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). Returns: dict mapping episode index to its video metadata (chunk_index, file_index, timestamps) """ - if camera_encoder_config is None: - camera_encoder_config = camera_encoder_defaults() + if camera_encoder is None: + camera_encoder = camera_encoder_defaults() if src_dataset.meta.episodes is None: src_dataset.meta.episodes = load_episodes(src_dataset.meta.root) @@ -809,7 +809,7 @@ def _copy_and_reindex_videos( dst_video_path, episodes_to_keep_ranges, src_dataset.meta.fps, - camera_encoder_config, + camera_encoder, ) cumulative_ts = 0.0 @@ -1280,7 +1280,7 @@ def _estimate_frame_size_via_calibration( episode_indices: list[int], temp_dir: Path, fps: int, - camera_encoder_config: VideoEncoderConfig, + camera_encoder: VideoEncoderConfig, num_calibration_frames: int = 30, ) -> float: """Estimate MB per frame by encoding a small calibration sample. @@ -1294,7 +1294,7 @@ def _estimate_frame_size_via_calibration( episode_indices: List of episode indices being processed. temp_dir: Temporary directory for calibration files. fps: Frames per second for video encoding. - camera_encoder_config: Video encoder settings used for calibration encoding. + camera_encoder: Video encoder settings used for calibration encoding. num_calibration_frames: Number of frames to use for calibration (default: 30). Returns: @@ -1330,7 +1330,7 @@ def _estimate_frame_size_via_calibration( imgs_dir=calibration_dir, video_path=calibration_video_path, fps=fps, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, overwrite=True, ) @@ -1648,7 +1648,7 @@ def convert_image_to_video_dataset( dataset: LeRobotDataset, output_dir: Path | None = None, repo_id: str | None = None, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, episode_indices: list[int] | None = None, num_workers: int = 4, max_episodes_per_batch: int | None = None, @@ -1663,7 +1663,7 @@ def convert_image_to_video_dataset( dataset: The source LeRobot dataset with images output_dir: Root directory where the edited dataset will be stored. If not specified, defaults to $HF_LEROBOT_HOME/repo_id. Equivalent to new_root in EditDatasetConfig. repo_id: Edited dataset identifier. Equivalent to new_repo_id in EditDatasetConfig. - camera_encoder_config: Video encoder settings + camera_encoder: Video encoder settings (``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`). episode_indices: List of episode indices to convert (None = all episodes) num_workers: Number of threads for parallel processing (default: 4) @@ -1673,8 +1673,8 @@ def convert_image_to_video_dataset( Returns: New LeRobotDataset with images encoded as videos """ - if camera_encoder_config is None: - camera_encoder_config = camera_encoder_defaults() + if camera_encoder is None: + camera_encoder = camera_encoder_defaults() # Check that it's an image dataset if len(dataset.meta.video_keys) > 0: @@ -1700,8 +1700,8 @@ def convert_image_to_video_dataset( f"Converting {len(episode_indices)} episodes with {len(img_keys)} cameras from {dataset.repo_id}" ) logging.info( - f"Video codec: {camera_encoder_config.vcodec}, pixel format: {camera_encoder_config.pix_fmt}, " - f"GOP: {camera_encoder_config.g}, CRF: {camera_encoder_config.crf}" + f"Video codec: {camera_encoder.vcodec}, pixel format: {camera_encoder.pix_fmt}, " + f"GOP: {camera_encoder.g}, CRF: {camera_encoder.crf}" ) # Create new features dict, converting image features to video features @@ -1772,7 +1772,7 @@ def convert_image_to_video_dataset( episode_indices=episode_indices, temp_dir=temp_dir, fps=fps, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, ) logging.info(f"Processing camera: {img_key}") @@ -1814,7 +1814,7 @@ def convert_image_to_video_dataset( imgs_dir=imgs_dir, video_path=video_path, fps=fps, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, overwrite=True, ) @@ -1861,7 +1861,7 @@ def convert_image_to_video_dataset( video_key=img_key, chunk_index=0, file_index=0 ) new_meta.info.features[img_key]["info"] = get_video_info( - video_path, camera_encoder_config=camera_encoder_config + video_path, camera_encoder=camera_encoder ) write_info(new_meta.info, new_meta.root) diff --git a/src/lerobot/datasets/dataset_writer.py b/src/lerobot/datasets/dataset_writer.py index 76b740d73..6be63194f 100644 --- a/src/lerobot/datasets/dataset_writer.py +++ b/src/lerobot/datasets/dataset_writer.py @@ -67,7 +67,7 @@ def _encode_video_worker( episode_index: int, root: Path, fps: int, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, encoder_threads: int | None = None, ) -> Path: temp_path = Path(tempfile.mkdtemp(dir=root)) / f"{video_key}_{episode_index:03d}.mp4" @@ -77,7 +77,7 @@ def _encode_video_worker( img_dir, temp_path, fps, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, encoder_threads=encoder_threads, overwrite=True, ) @@ -96,7 +96,7 @@ class DatasetWriter: self, meta: LeRobotDatasetMetadata, root: Path, - camera_encoder_config: VideoEncoderConfig | None, + camera_encoder: VideoEncoderConfig | None, encoder_threads: int | None, batch_encoding_size: int, streaming_encoder: StreamingVideoEncoder | None = None, @@ -108,7 +108,7 @@ class DatasetWriter: meta: Dataset metadata instance (used for feature schema, chunk settings, and episode persistence). root: Local dataset root directory. - camera_encoder_config: Video encoder settings applied to all cameras. + camera_encoder: Video encoder settings applied to all cameras. ``None`` uses :func:`~lerobot.configs.camera_encoder_defaults`. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. @@ -120,7 +120,7 @@ class DatasetWriter: """ self._meta = meta self._root = root - self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults() + self._camera_encoder = camera_encoder or camera_encoder_defaults() self._encoder_threads = encoder_threads self._batch_encoding_size = batch_encoding_size self._streaming_encoder = streaming_encoder @@ -293,7 +293,7 @@ class DatasetWriter: episode_index, self._root, self._meta.fps, - self._camera_encoder_config, + self._camera_encoder, self._encoder_threads, ): video_key for video_key in self._meta.video_keys @@ -504,7 +504,7 @@ class DatasetWriter: # Update video info (only needed when first episode is encoded) if episode_index == 0: - self._meta.update_video_info(video_key, camera_encoder_config=self._camera_encoder_config) + self._meta.update_video_info(video_key, camera_encoder=self._camera_encoder) write_info(self._meta.info, self._meta.root) metadata = { @@ -577,7 +577,7 @@ class DatasetWriter: episode_index, self._root, self._meta.fps, - self._camera_encoder_config, + self._camera_encoder, self._encoder_threads, ) diff --git a/src/lerobot/datasets/lerobot_dataset.py b/src/lerobot/datasets/lerobot_dataset.py index d86bd3327..6c3bcf2c3 100644 --- a/src/lerobot/datasets/lerobot_dataset.py +++ b/src/lerobot/datasets/lerobot_dataset.py @@ -58,7 +58,7 @@ class LeRobotDataset(torch.utils.data.Dataset): video_backend: str | None = None, return_uint8: bool = False, batch_encoding_size: int = 1, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, encoder_threads: int | None = None, streaming_encoding: bool = False, encoder_queue_maxsize: int = 30, @@ -177,7 +177,7 @@ class LeRobotDataset(torch.utils.data.Dataset): You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision. batch_encoding_size (int, optional): Number of episodes to accumulate before batch encoding videos. Set to 1 for immediate encoding (default), or higher for batched encoding. Defaults to 1. - camera_encoder_config (VideoEncoderConfig | None, optional): Video encoder settings for cameras + camera_encoder (VideoEncoderConfig | None, optional): Video encoder settings for cameras (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used by the writer. encoder_threads (int | None, optional): Number of encoder threads (global). ``None`` lets the @@ -250,14 +250,14 @@ class LeRobotDataset(torch.utils.data.Dataset): if streaming_encoding and len(self.meta.video_keys) > 0: streaming_enc = self._build_streaming_encoder( self.meta.fps, - camera_encoder_config, + camera_encoder, encoder_queue_maxsize, encoder_threads, ) self.writer = DatasetWriter( meta=self.meta, root=self.root, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, encoder_threads=encoder_threads, batch_encoding_size=batch_encoding_size, streaming_encoder=streaming_enc, @@ -299,13 +299,13 @@ class LeRobotDataset(torch.utils.data.Dataset): @staticmethod def _build_streaming_encoder( fps: int, - camera_encoder_config: VideoEncoderConfig | None, + camera_encoder: VideoEncoderConfig | None, encoder_queue_maxsize: int, encoder_threads: int | None, ) -> StreamingVideoEncoder: return StreamingVideoEncoder( fps=fps, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, queue_maxsize=encoder_queue_maxsize, encoder_threads=encoder_threads, ) @@ -622,7 +622,7 @@ class LeRobotDataset(torch.utils.data.Dataset): image_writer_threads: int = 0, video_backend: str | None = None, batch_encoding_size: int = 1, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, metadata_buffer_size: int = 10, streaming_encoding: bool = False, encoder_queue_maxsize: int = 30, @@ -653,7 +653,7 @@ class LeRobotDataset(torch.utils.data.Dataset): video_backend: Video decoding backend (used when reading back). batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. ``1`` means encode immediately. - camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). + camera_encoder: Video encoder settings for cameras (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. @@ -698,12 +698,12 @@ class LeRobotDataset(torch.utils.data.Dataset): streaming_enc = None if streaming_encoding and len(obj.meta.video_keys) > 0: streaming_enc = cls._build_streaming_encoder( - fps, camera_encoder_config, encoder_queue_maxsize, encoder_threads + fps, camera_encoder, encoder_queue_maxsize, encoder_threads ) obj.writer = DatasetWriter( meta=obj.meta, root=obj.root, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, encoder_threads=encoder_threads, batch_encoding_size=batch_encoding_size, streaming_encoder=streaming_enc, @@ -726,7 +726,7 @@ class LeRobotDataset(torch.utils.data.Dataset): force_cache_sync: bool = False, video_backend: str | None = None, batch_encoding_size: int = 1, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, encoder_threads: int | None = None, image_writer_processes: int = 0, image_writer_threads: int = 0, @@ -754,7 +754,7 @@ class LeRobotDataset(torch.utils.data.Dataset): video_backend: Video decoding backend for reading back data. batch_encoding_size: Number of episodes to accumulate before batch-encoding videos. - camera_encoder_config: Video encoder settings for cameras (codec, quality, etc.). + camera_encoder: Video encoder settings for cameras (codec, quality, etc.). When ``None``, :func:`~lerobot.configs.video.camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global). ``None`` lets the codec decide. @@ -802,12 +802,12 @@ class LeRobotDataset(torch.utils.data.Dataset): streaming_enc = None if streaming_encoding and len(obj.meta.video_keys) > 0: streaming_enc = cls._build_streaming_encoder( - obj.meta.fps, camera_encoder_config, encoder_queue_maxsize, encoder_threads + obj.meta.fps, camera_encoder, encoder_queue_maxsize, encoder_threads ) obj.writer = DatasetWriter( meta=obj.meta, root=obj.root, - camera_encoder_config=camera_encoder_config, + camera_encoder=camera_encoder, encoder_threads=encoder_threads, batch_encoding_size=batch_encoding_size, streaming_encoder=streaming_enc, diff --git a/src/lerobot/datasets/video_utils.py b/src/lerobot/datasets/video_utils.py index 3aca1d085..5d705ca72 100644 --- a/src/lerobot/datasets/video_utils.py +++ b/src/lerobot/datasets/video_utils.py @@ -335,17 +335,17 @@ def encode_video_frames( imgs_dir: Path | str, video_path: Path | str, fps: int, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, encoder_threads: int | None = None, *, log_level: int | None = av.logging.WARNING, overwrite: bool = False, ) -> None: """More info on ffmpeg arguments tuning on `benchmark/video/README.md`""" - if camera_encoder_config is None: - camera_encoder_config = camera_encoder_defaults() - vcodec = camera_encoder_config.vcodec - pix_fmt = camera_encoder_config.pix_fmt + if camera_encoder is None: + camera_encoder = camera_encoder_defaults() + vcodec = camera_encoder.vcodec + pix_fmt = camera_encoder.pix_fmt video_path = Path(video_path) imgs_dir = Path(imgs_dir) @@ -367,7 +367,7 @@ def encode_video_frames( with Image.open(input_list[0]) as dummy_image: width, height = dummy_image.size - video_options = camera_encoder_config.get_codec_options(encoder_threads, as_strings=True) + video_options = camera_encoder.get_codec_options(encoder_threads, as_strings=True) # Set logging level if log_level is not None: @@ -638,14 +638,14 @@ class StreamingVideoEncoder: def __init__( self, fps: int, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, queue_maxsize: int = 30, encoder_threads: int | None = None, ): """ Args: fps: Frames per second for the output videos. - camera_encoder_config: Video encoder settings applied to all cameras. + camera_encoder: Video encoder settings applied to all cameras. When ``None``, :func:`camera_encoder_defaults` is used. encoder_threads: Number of encoder threads (global setting). ``None`` lets the codec decide. @@ -653,7 +653,7 @@ class StreamingVideoEncoder: back-pressure drops frames. """ self.fps = fps - self._camera_encoder_config = camera_encoder_config or camera_encoder_defaults() + self._camera_encoder = camera_encoder or camera_encoder_defaults() self._encoder_threads = encoder_threads self.queue_maxsize = queue_maxsize @@ -686,15 +686,15 @@ class StreamingVideoEncoder: temp_video_dir = Path(tempfile.mkdtemp(dir=temp_dir)) video_path = temp_video_dir / f"{video_key.replace('/', '_')}_streaming.mp4" - vcodec = self._camera_encoder_config.vcodec - codec_options = self._camera_encoder_config.get_codec_options( + vcodec = self._camera_encoder.vcodec + codec_options = self._camera_encoder.get_codec_options( self._encoder_threads, as_strings=True ) encoder_thread = _CameraEncoderThread( video_path=video_path, fps=self.fps, vcodec=vcodec, - pix_fmt=self._camera_encoder_config.pix_fmt, + pix_fmt=self._camera_encoder.pix_fmt, codec_options=codec_options, frame_queue=frame_queue, result_queue=result_queue, @@ -905,13 +905,13 @@ def get_audio_info(video_path: Path | str) -> dict: def get_video_info( video_path: Path | str, - camera_encoder_config: VideoEncoderConfig | None = None, + camera_encoder: VideoEncoderConfig | None = None, ) -> dict: """Build the ``video.*`` / ``audio.*`` info dict persisted in ``info.json``. Args: video_path: Path to the encoded video file to probe. - camera_encoder_config: If provided, record the exact encoder settings used to encode this + camera_encoder: If provided, record the exact encoder settings used to encode this video. Stream-derived values take precedence — encoder fields are only written for keys not already populated from the video file itself. """ @@ -946,8 +946,8 @@ def get_video_info( video_info.update(**get_audio_info(video_path)) # Add additional encoder configuration if provided - if camera_encoder_config is not None: - for field_name, field_value in asdict(camera_encoder_config).items(): + if camera_encoder is not None: + for field_name, field_value in asdict(camera_encoder).items(): # vcodec is already populated from the video stream if field_name == "vcodec": continue diff --git a/src/lerobot/policies/eo1/modeling_eo1.py b/src/lerobot/policies/eo1/modeling_eo1.py index 27d609ec1..1c5860de5 100644 --- a/src/lerobot/policies/eo1/modeling_eo1.py +++ b/src/lerobot/policies/eo1/modeling_eo1.py @@ -28,11 +28,12 @@ import torch.nn.functional as F # noqa: N812 import torch.utils.checkpoint from torch import Tensor -from lerobot.policies.eo1.configuration_eo1 import EO1Config -from lerobot.policies.pretrained import PreTrainedPolicy from lerobot.utils.constants import ACTION, OBS_STATE from lerobot.utils.import_utils import _transformers_available, require_package +from ..pretrained import PreTrainedPolicy +from .configuration_eo1 import EO1Config + if TYPE_CHECKING or _transformers_available: from transformers.activations import ACT2FN from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration diff --git a/src/lerobot/policies/eo1/processor_eo1.py b/src/lerobot/policies/eo1/processor_eo1.py index 2d7bb48ae..b1f32756a 100644 --- a/src/lerobot/policies/eo1/processor_eo1.py +++ b/src/lerobot/policies/eo1/processor_eo1.py @@ -22,7 +22,6 @@ from typing import TYPE_CHECKING, Any import torch from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature -from lerobot.policies.eo1.configuration_eo1 import EO1Config from lerobot.processor import ( AddBatchDimensionProcessorStep, ComplementaryDataProcessorStep, @@ -44,6 +43,8 @@ from lerobot.utils.constants import ( ) from lerobot.utils.import_utils import _transformers_available, require_package +from .configuration_eo1 import EO1Config + if TYPE_CHECKING or _transformers_available: from transformers.models.qwen2_5_vl import Qwen2_5_VLProcessor else: diff --git a/src/lerobot/rewards/classifier/modeling_classifier.py b/src/lerobot/rewards/classifier/modeling_classifier.py index 1d8057135..ca02b532f 100644 --- a/src/lerobot/rewards/classifier/modeling_classifier.py +++ b/src/lerobot/rewards/classifier/modeling_classifier.py @@ -17,10 +17,11 @@ import logging import torch from torch import Tensor, nn -from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig -from lerobot.rewards.pretrained import PreTrainedRewardModel from lerobot.utils.constants import OBS_IMAGE, REWARD +from ..pretrained import PreTrainedRewardModel +from .configuration_classifier import RewardClassifierConfig + class ClassifierOutput: """Wrapper for classifier outputs with additional metadata.""" diff --git a/src/lerobot/rewards/classifier/processor_classifier.py b/src/lerobot/rewards/classifier/processor_classifier.py index 056d7e91b..a5f609d0c 100644 --- a/src/lerobot/rewards/classifier/processor_classifier.py +++ b/src/lerobot/rewards/classifier/processor_classifier.py @@ -25,7 +25,8 @@ from lerobot.processor import ( policy_action_to_transition, transition_to_policy_action, ) -from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig + +from .configuration_classifier import RewardClassifierConfig def make_classifier_processor( diff --git a/src/lerobot/rewards/factory.py b/src/lerobot/rewards/factory.py index f6716f3fb..c173f44a5 100644 --- a/src/lerobot/rewards/factory.py +++ b/src/lerobot/rewards/factory.py @@ -22,9 +22,10 @@ import torch from lerobot.configs.rewards import RewardModelConfig from lerobot.processor import PolicyAction, PolicyProcessorPipeline -from lerobot.rewards.classifier.configuration_classifier import RewardClassifierConfig -from lerobot.rewards.pretrained import PreTrainedRewardModel -from lerobot.rewards.sarm.configuration_sarm import SARMConfig + +from .classifier.configuration_classifier import RewardClassifierConfig +from .pretrained import PreTrainedRewardModel +from .sarm.configuration_sarm import SARMConfig def get_reward_model_class(name: str) -> type[PreTrainedRewardModel]: diff --git a/src/lerobot/rewards/sarm/compute_rabc_weights.py b/src/lerobot/rewards/sarm/compute_rabc_weights.py index b1bf2e1f5..bdbb0d297 100644 --- a/src/lerobot/rewards/sarm/compute_rabc_weights.py +++ b/src/lerobot/rewards/sarm/compute_rabc_weights.py @@ -58,9 +58,10 @@ import torch from tqdm import tqdm from lerobot.datasets import LeRobotDataset -from lerobot.rewards.sarm.modeling_sarm import SARMRewardModel -from lerobot.rewards.sarm.processor_sarm import make_sarm_pre_post_processors -from lerobot.rewards.sarm.sarm_utils import normalize_stage_tau + +from .modeling_sarm import SARMRewardModel +from .processor_sarm import make_sarm_pre_post_processors +from .sarm_utils import normalize_stage_tau def get_reward_model_path_from_parquet(parquet_path: Path) -> str | None: diff --git a/src/lerobot/rewards/sarm/modeling_sarm.py b/src/lerobot/rewards/sarm/modeling_sarm.py index 365f519b2..5ebd42d30 100644 --- a/src/lerobot/rewards/sarm/modeling_sarm.py +++ b/src/lerobot/rewards/sarm/modeling_sarm.py @@ -32,13 +32,14 @@ import torch.nn as nn import torch.nn.functional as F # noqa: N812 from torch import Tensor -from lerobot.rewards.pretrained import PreTrainedRewardModel -from lerobot.rewards.sarm.configuration_sarm import SARMConfig -from lerobot.rewards.sarm.sarm_utils import ( +from lerobot.utils.constants import OBS_STR + +from ..pretrained import PreTrainedRewardModel +from .configuration_sarm import SARMConfig +from .sarm_utils import ( normalize_stage_tau, pad_state_to_max_dim, ) -from lerobot.utils.constants import OBS_STR class StageTransformer(nn.Module): diff --git a/src/lerobot/rewards/sarm/processor_sarm.py b/src/lerobot/rewards/sarm/processor_sarm.py index eaa5f66f5..37db374d4 100644 --- a/src/lerobot/rewards/sarm/processor_sarm.py +++ b/src/lerobot/rewards/sarm/processor_sarm.py @@ -58,15 +58,16 @@ from lerobot.processor import ( policy_action_to_transition, transition_to_policy_action, ) -from lerobot.rewards.sarm.configuration_sarm import SARMConfig -from lerobot.rewards.sarm.sarm_utils import ( +from lerobot.types import EnvTransition, PolicyAction, TransitionKey +from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME + +from .configuration_sarm import SARMConfig +from .sarm_utils import ( apply_rewind_augmentation, compute_absolute_indices, find_stage_and_tau, pad_state_to_max_dim, ) -from lerobot.types import EnvTransition, PolicyAction, TransitionKey -from lerobot.utils.constants import POLICY_POSTPROCESSOR_DEFAULT_NAME, POLICY_PREPROCESSOR_DEFAULT_NAME class SARMEncodingProcessorStep(ProcessorStep): diff --git a/src/lerobot/rollout/context.py b/src/lerobot/rollout/context.py index 07da355cb..bf5fa0fd4 100644 --- a/src/lerobot/rollout/context.py +++ b/src/lerobot/rollout/context.py @@ -332,7 +332,7 @@ def build_rollout_context( cfg.dataset.repo_id, root=cfg.dataset.root, batch_encoding_size=cfg.dataset.video_encoding_batch_size, - camera_encoder_config=cfg.dataset.camera_encoder_config, + camera_encoder=cfg.dataset.camera_encoder, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, encoder_threads=cfg.dataset.encoder_threads, @@ -367,7 +367,7 @@ def build_rollout_context( image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras if hasattr(robot, "cameras") else []), batch_encoding_size=cfg.dataset.video_encoding_batch_size, - camera_encoder_config=cfg.dataset.camera_encoder_config, + camera_encoder=cfg.dataset.camera_encoder, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, encoder_threads=cfg.dataset.encoder_threads, diff --git a/src/lerobot/scripts/lerobot_edit_dataset.py b/src/lerobot/scripts/lerobot_edit_dataset.py index 152c514e9..ca5a5d893 100644 --- a/src/lerobot/scripts/lerobot_edit_dataset.py +++ b/src/lerobot/scripts/lerobot_edit_dataset.py @@ -54,8 +54,8 @@ Delete episodes and re-encode video segments with h264: --repo_id lerobot/pusht \ --operation.type delete_episodes \ --operation.episode_indices "[0, 2, 5]" \ - --operation.camera_encoder_config.vcodec h264 \ - --operation.camera_encoder_config.crf 23 + --operation.camera_encoder.vcodec h264 \ + --operation.camera_encoder.crf 23 Split dataset by fractions (pusht_train, pusht_val): lerobot-edit-dataset \ @@ -87,8 +87,8 @@ Split dataset and re-encode video segments with h264: --repo_id lerobot/pusht \ --operation.type split \ --operation.splits '{"train": 0.8, "val": 0.2}' \ - --operation.camera_encoder_config.vcodec h264 \ - --operation.camera_encoder_config.crf 23 + --operation.camera_encoder.vcodec h264 \ + --operation.camera_encoder.crf 23 Merge multiple datasets: lerobot-edit-dataset \ @@ -208,8 +208,7 @@ from pathlib import Path import draccus -from lerobot.configs import parser -from lerobot.configs.video import VideoEncoderConfig, camera_encoder_defaults +from lerobot.configs import VideoEncoderConfig, camera_encoder_defaults, parser from lerobot.datasets import ( LeRobotDataset, convert_image_to_video_dataset, @@ -235,14 +234,14 @@ class OperationConfig(draccus.ChoiceRegistry, abc.ABC): @dataclass class DeleteEpisodesConfig(OperationConfig): episode_indices: list[int] | None = None - camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) + camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) @OperationConfig.register_subclass("split") @dataclass class SplitConfig(OperationConfig): splits: dict[str, float | list[int]] | None = None - camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) + camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) @OperationConfig.register_subclass("merge") @@ -269,7 +268,7 @@ class ModifyTasksConfig(OperationConfig): @dataclass class ConvertImageToVideoConfig(OperationConfig): output_dir: str | None = None - camera_encoder_config: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) + camera_encoder: VideoEncoderConfig = field(default_factory=camera_encoder_defaults) episode_indices: list[int] | None = None num_workers: int = 4 max_episodes_per_batch: int | None = None @@ -371,7 +370,7 @@ def handle_delete_episodes(cfg: EditDatasetConfig) -> None: episode_indices=cfg.operation.episode_indices, output_dir=output_dir, repo_id=output_repo_id, - camera_encoder_config=cfg.operation.camera_encoder_config, + camera_encoder=cfg.operation.camera_encoder, ) logging.info(f"Dataset saved to {output_dir}") @@ -403,7 +402,7 @@ def handle_split(cfg: EditDatasetConfig) -> None: dataset, splits=cfg.operation.splits, output_dir=cfg.new_root, - camera_encoder_config=cfg.operation.camera_encoder_config, + camera_encoder=cfg.operation.camera_encoder, ) for split_name, split_ds in split_datasets.items(): @@ -574,7 +573,7 @@ def handle_convert_image_to_video(cfg: EditDatasetConfig) -> None: dataset=dataset, output_dir=output_dir, repo_id=output_repo_id, - camera_encoder_config=getattr(cfg.operation, "camera_encoder_config", None) + camera_encoder=getattr(cfg.operation, "camera_encoder", None) or camera_encoder_defaults(), episode_indices=getattr(cfg.operation, "episode_indices", None), num_workers=getattr(cfg.operation, "num_workers", 4), diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py index b571bd5c3..c8419cb14 100644 --- a/src/lerobot/scripts/lerobot_record.py +++ b/src/lerobot/scripts/lerobot_record.py @@ -79,9 +79,9 @@ lerobot-record \\ --dataset.single_task="Grab the cube" \\ --dataset.streaming_encoding=true \\ --dataset.encoder_threads=2 \\ - --dataset.camera_encoder_config.vcodec=h264 \\ - --dataset.camera_encoder_config.preset=fast \\ - --dataset.camera_encoder_config.extra_options={"tune": "film", "profile:v": "high", "bf": 2} \\ + --dataset.camera_encoder.vcodec=h264 \\ + --dataset.camera_encoder.preset=fast \\ + --dataset.camera_encoder.extra_options={"tune": "film", "profile:v": "high", "bf": 2} \\ --display_data=true ``` """ @@ -398,7 +398,7 @@ def record( cfg.dataset.repo_id, root=cfg.dataset.root, batch_encoding_size=cfg.dataset.video_encoding_batch_size, - camera_encoder_config=cfg.dataset.camera_encoder_config, + camera_encoder=cfg.dataset.camera_encoder, encoder_threads=cfg.dataset.encoder_threads, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, @@ -427,7 +427,7 @@ def record( image_writer_processes=cfg.dataset.num_image_writer_processes, image_writer_threads=cfg.dataset.num_image_writer_threads_per_camera * len(robot.cameras), batch_encoding_size=cfg.dataset.video_encoding_batch_size, - camera_encoder_config=cfg.dataset.camera_encoder_config, + camera_encoder=cfg.dataset.camera_encoder, encoder_threads=cfg.dataset.encoder_threads, streaming_encoding=cfg.dataset.streaming_encoding, encoder_queue_maxsize=cfg.dataset.encoder_queue_maxsize, @@ -441,7 +441,7 @@ def record( if not cfg.dataset.streaming_encoding: logging.info( - "Streaming encoding is disabled. If you have capable hardware, consider enabling it for way faster episode saving. --dataset.streaming_encoding=true --dataset.encoder_threads=2 # --dataset.camera_encoder_config.vcodec=auto. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding" + "Streaming encoding is disabled. If you have capable hardware, consider enabling it for way faster episode saving. --dataset.streaming_encoding=true --dataset.encoder_threads=2 # --dataset.camera_encoder.vcodec=auto. More info in the documentation: https://huggingface.co/docs/lerobot/streaming_video_encoding" ) with VideoEncodingManager(dataset): diff --git a/src/lerobot/scripts/lerobot_rollout.py b/src/lerobot/scripts/lerobot_rollout.py index 6a81563ee..7015e707c 100644 --- a/src/lerobot/scripts/lerobot_rollout.py +++ b/src/lerobot/scripts/lerobot_rollout.py @@ -120,6 +120,18 @@ Usage examples --dataset.repo_id=user/rollout_sentry_data \\ --dataset.single_task="patrol" \\ --resume=true + + # Rollout with custom video encoding parameters + lerobot-rollout \\ + --strategy.type=base \\ + --policy.path=lerobot/act_koch_real \\ + --robot.type=koch_follower \\ + --robot.port=/dev/ttyACM0 \\ + --task="pick up cube" --duration=60 \\ + --display_data=true \\ + --dataset.camera_encoder.vcodec=h264 \\ + --dataset.camera_encoder.preset=fast \\ + --dataset.camera_encoder.extra_options={"tune": "film", "profile:v": "high", "bf": 2} """ import logging diff --git a/src/lerobot/transport/utils.py b/src/lerobot/transport/utils.py index 8da338044..2ef63c2cc 100644 --- a/src/lerobot/transport/utils.py +++ b/src/lerobot/transport/utils.py @@ -25,9 +25,10 @@ from typing import Any import torch -from lerobot.transport import services_pb2 from lerobot.utils.transition import Transition +from . import services_pb2 + # FIX for protobuf: Assign the enum to a variable and ignore the type error once TransferState = services_pb2.TransferState # type: ignore[attr-defined] diff --git a/tests/datasets/test_dataset_tools.py b/tests/datasets/test_dataset_tools.py index 2e528a3c8..032fd4f7c 100644 --- a/tests/datasets/test_dataset_tools.py +++ b/tests/datasets/test_dataset_tools.py @@ -1247,7 +1247,7 @@ def test_convert_image_to_video_dataset(tmp_path): dataset=source_dataset, output_dir=output_dir, repo_id="lerobot/pusht_video", - camera_encoder_config=VideoEncoderConfig( + camera_encoder=VideoEncoderConfig( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, diff --git a/tests/datasets/test_dataset_writer.py b/tests/datasets/test_dataset_writer.py index 773a2ec05..8670aeebc 100644 --- a/tests/datasets/test_dataset_writer.py +++ b/tests/datasets/test_dataset_writer.py @@ -53,8 +53,8 @@ def _make_frame(features: dict, task: str = "Dummy task") -> dict: # ── Existing encode_video_worker tests ─────────────────────────────── -def test_encode_video_worker_forwards_camera_encoder_config(tmp_path): - """_encode_video_worker forwards camera_encoder_config to encode_video_frames.""" +def test_encode_video_worker_forwards_camera_encoder(tmp_path): + """_encode_video_worker forwards camera_encoder to encode_video_frames.""" video_key = "observation.images.laptop" fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=0, frame_index=0) img_dir = tmp_path / Path(fpath).parent @@ -74,16 +74,16 @@ def test_encode_video_worker_forwards_camera_encoder_config(tmp_path): 0, tmp_path, fps=30, - camera_encoder_config=VideoEncoderConfig(vcodec="h264", preset=None), + camera_encoder=VideoEncoderConfig(vcodec="h264", preset=None), encoder_threads=4, ) - assert captured_kwargs["camera_encoder_config"].vcodec == "h264" + assert captured_kwargs["camera_encoder"].vcodec == "h264" assert captured_kwargs["encoder_threads"] == 4 -def test_encode_video_worker_default_camera_encoder_config(tmp_path): - """_encode_video_worker passes None camera_encoder_config which encode_video_frames defaults.""" +def test_encode_video_worker_default_camera_encoder(tmp_path): + """_encode_video_worker passes None camera_encoder which encode_video_frames defaults.""" video_key = "observation.images.laptop" fpath = DEFAULT_IMAGE_PATH.format(image_key=video_key, episode_index=0, frame_index=0) img_dir = tmp_path / Path(fpath).parent @@ -100,7 +100,7 @@ def test_encode_video_worker_default_camera_encoder_config(tmp_path): with patch("lerobot.datasets.dataset_writer.encode_video_frames", side_effect=mock_encode): _encode_video_worker(video_key, 0, tmp_path, fps=30) - assert captured_kwargs["camera_encoder_config"] is None + assert captured_kwargs["camera_encoder"] is None assert captured_kwargs["encoder_threads"] is None diff --git a/tests/datasets/test_streaming_video_encoder.py b/tests/datasets/test_streaming_video_encoder.py index a2f1e25e8..a2a494e8b 100644 --- a/tests/datasets/test_streaming_video_encoder.py +++ b/tests/datasets/test_streaming_video_encoder.py @@ -179,7 +179,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.laptop"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 ), ) @@ -211,7 +211,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.laptop", f"{OBS_IMAGES}.phone"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 ), ) @@ -239,7 +239,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.cam"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 ), ) @@ -267,7 +267,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.cam"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 ), ) @@ -315,7 +315,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.cam"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 ), ) @@ -352,7 +352,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.cam1", f"{OBS_IMAGES}.cam2"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30 ), ) @@ -391,7 +391,7 @@ class TestStreamingVideoEncoder: ) encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=cfg, + camera_encoder=cfg, encoder_threads=2, ) assert encoder._encoder_threads == 2 @@ -430,7 +430,7 @@ class TestStreamingVideoEncoder: video_keys = [f"{OBS_IMAGES}.cam"] encoder = StreamingVideoEncoder( fps=30, - camera_encoder_config=self._make_encoder_config( + camera_encoder=self._make_encoder_config( vcodec="libsvtav1", pix_fmt="yuv420p", g=2, crf=30, preset=13 ), queue_maxsize=1, diff --git a/tests/datasets/test_video_encoding.py b/tests/datasets/test_video_encoding.py index da4e3b2ec..a9529d1d8 100644 --- a/tests/datasets/test_video_encoding.py +++ b/tests/datasets/test_video_encoding.py @@ -337,7 +337,7 @@ def _encode_video( ) -> Path: imgs_dir = path.parent / f"imgs_{path.stem}" _write_frames(imgs_dir, num_frames=num_frames) - encode_video_frames(imgs_dir, path, fps=fps, camera_encoder_config=cfg, overwrite=True) + encode_video_frames(imgs_dir, path, fps=fps, camera_encoder=cfg, overwrite=True) return path @@ -377,7 +377,7 @@ class TestGetVideoInfo: def test_merges_encoder_config_as_video_prefixed_entries(self): cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) - info = get_video_info(TEST_ARTIFACTS_DIR / "clip_4frames.mp4", camera_encoder_config=cfg) + info = get_video_info(TEST_ARTIFACTS_DIR / "clip_4frames.mp4", camera_encoder=cfg) assert info["video.g"] == 2 assert info["video.crf"] == 30 @@ -390,7 +390,7 @@ class TestGetVideoInfo: def test_stream_derived_keys_take_precedence_over_config(self): cfg = VideoEncoderConfig(vcodec="libsvtav1", pix_fmt="yuv420p") - info = get_video_info(TEST_ARTIFACTS_DIR / "clip_4frames.mp4", camera_encoder_config=cfg) + info = get_video_info(TEST_ARTIFACTS_DIR / "clip_4frames.mp4", camera_encoder=cfg) assert info["video.codec"] # populated from stream, not from config's vcodec assert info["video.pix_fmt"] == "yuv420p" @@ -453,7 +453,7 @@ class TestEncodeVideoFrames: cfg = VideoEncoderConfig(vcodec="libsvtav1", g=4, crf=25, preset=10) video_path = _encode_video(tmp_path / "out.mp4", num_frames=4, fps=30, cfg=cfg) - info = get_video_info(video_path, camera_encoder_config=cfg) + info = get_video_info(video_path, camera_encoder=cfg) # Stream-derived assert info["video.height"] == 64 @@ -535,7 +535,7 @@ class TestEncoderConfigPersistence: def test_first_episode_save_persists_encoder_config(self, tmp_path, empty_lerobot_dataset_factory): cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) dataset = empty_lerobot_dataset_factory( - root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg + root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder=cfg ) _add_frames(dataset, num_frames=4) @@ -558,7 +558,7 @@ class TestEncoderConfigPersistence: def test_second_episode_does_not_overwrite_encoder_fields(self, tmp_path, empty_lerobot_dataset_factory): cfg = VideoEncoderConfig(vcodec="libsvtav1", g=2, crf=30, preset=12) dataset = empty_lerobot_dataset_factory( - root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder_config=cfg + root=tmp_path / "ds", features=VIDEO_FEATURES, use_videos=True, camera_encoder=cfg ) _add_frames(dataset, num_frames=4)