mirror of
https://github.com/huggingface/lerobot.git
synced 2026-05-22 20:19:43 +00:00
nit
This commit is contained in:
@@ -74,11 +74,11 @@ class SmolVLAConfig(PreTrainedConfig):
|
|||||||
|
|
||||||
# Finetuning settings
|
# Finetuning settings
|
||||||
freeze_vision_encoder: bool = True
|
freeze_vision_encoder: bool = True
|
||||||
train_expert_only: bool = True
|
train_expert_only: bool = False
|
||||||
train_state_proj: bool = True
|
train_state_proj: bool = True
|
||||||
|
|
||||||
# Training presets
|
# Training presets
|
||||||
optimizer_lr: float = 1e-4
|
optimizer_lr: float = 2.5e-5 #1e-4
|
||||||
optimizer_betas: tuple[float, float] = (0.9, 0.95)
|
optimizer_betas: tuple[float, float] = (0.9, 0.95)
|
||||||
optimizer_eps: float = 1e-8
|
optimizer_eps: float = 1e-8
|
||||||
optimizer_weight_decay: float = 1e-10
|
optimizer_weight_decay: float = 1e-10
|
||||||
@@ -105,6 +105,7 @@ class SmolVLAConfig(PreTrainedConfig):
|
|||||||
|
|
||||||
num_expert_layers: int = -1 # Less or equal to 0 is the default where the action expert has the same number of layers of VLM. Otherwise the expert have less layers.
|
num_expert_layers: int = -1 # Less or equal to 0 is the default where the action expert has the same number of layers of VLM. Otherwise the expert have less layers.
|
||||||
num_vlm_layers: int = 16
|
num_vlm_layers: int = 16
|
||||||
|
past_obs_keys: str = f"image"
|
||||||
add_local_special_image_tokens: bool = False
|
add_local_special_image_tokens: bool = False
|
||||||
|
|
||||||
reverse_images_order: bool = False
|
reverse_images_order: bool = False
|
||||||
@@ -115,7 +116,7 @@ class SmolVLAConfig(PreTrainedConfig):
|
|||||||
causal_action_attention_mask: bool = False
|
causal_action_attention_mask: bool = False
|
||||||
|
|
||||||
self_attn_every_n_layers: int = -1# Number of layers used in the VLM (first num_vlm_layers layers)
|
self_attn_every_n_layers: int = -1# Number of layers used in the VLM (first num_vlm_layers layers)
|
||||||
self_attn_every_n_layers: int = 2 # Interleave SA layers each self_attn_every_n_layers
|
#self_attn_every_n_layers: int = 2 # Interleave SA layers each self_attn_every_n_layers
|
||||||
expert_width_multiplier: float = 0.75 # The action expert hidden size (wrt to the VLM)
|
expert_width_multiplier: float = 0.75 # The action expert hidden size (wrt to the VLM)
|
||||||
|
|
||||||
min_period: float = 4e-3 # sensitivity range for the timestep used in sine-cosine positional encoding
|
min_period: float = 4e-3 # sensitivity range for the timestep used in sine-cosine positional encoding
|
||||||
|
|||||||
Reference in New Issue
Block a user