model_name: molmo llm: d_model: 3584 n_heads: 28 n_kv_heads: 4 head_dim: null qkv_bias: true clip_qkv: null n_layers: 28 mlp_ratio: 4 mlp_hidden_size: 37888 activation_type: swiglu block_type: sequential rope: true rope_full_precision: true rope_theta: 1000000.0 rope_type: default rope_factor: null rope_high_freq_factor: null rope_low_freq_factor: null rope_original_max_position_embeddings: null attention_type: sdpa float32_attention: true attention_dropout: 0.0 attention_layer_norm: false attention_layer_norm_type: olmo residual_dropout: 0.1 response_residual_dropout: 0.0 layer_norm_type: rms layer_norm_with_affine: true layer_norm_eps: 1.0e-06 attention_layer_norm_with_affine: true max_sequence_length: 4096 max_position_embeddings: null include_bias: false bias_for_layer_norm: null norm_after: false moe_num_experts: 8 moe_top_k: 2 moe_mlp_impl: sparse moe_log_expert_assignment: false moe_shared_expert: false moe_lbl_in_fp32: false moe_interleave: false moe_loss_weight: 0.1 moe_zloss_weight: null moe_dropless: true moe_capacity_factor: 1.25 embedding_dropout: 0.0 scale_logits: false vocab_size: 152064 additional_vocab_size: 128 weight_tying: false embedding_size: 152064 use_position_ids: true tokenizer: identifier: Qwen/Qwen2.5-7B tokenizer_dir: null depth_tokens: true init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt init_incremental: null new_embedding_init_range: 0.02 initializer_range: 0.02 normalize_input_embeds: false activation_checkpoint: whole_layer compile: blocks fix_pad_tokenizer: false init_std: 0.02 init_fn: normal init_cutoff_factor: null vision_backbone: vit: image_model_type: siglip image_default_input_size: - 378 - 378 image_patch_size: 14 image_pos_patch_size: 14 image_emb_dim: 1152 image_num_heads: 16 image_num_key_value_heads: 16 image_num_layers: 27 image_head_dim: 72 image_mlp_dim: 4304 image_mlp_activations: gelu_pytorch_tanh image_dropout_rate: 0.0 image_num_pos: 729 image_norm_eps: 1.0e-06 attention_dropout: 0.0 residual_dropout: 0.0 initializer_range: 0.02 float32_attention: true attention_type: sdpa activation_checkpointing: true init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt resize_mode: siglip pad_value: 0.0 normalize: siglip image_pooling_2d: attention_meanq pooling_attention_mask: false image_projector: mlp image_padding_embed: null vit_layers: - -3 - -9 skip_unused_layers: true image_feature_dropout: 0.0 connector_activation_checkpointing: true compile_vit: blocks data_formatter: prompt_templates: uber_model message_format: role system_prompt: demo_or_style always_start_with_space: false default_inference_len: 65 select_answer: best debug: false image_last: false format_message_list: null p_one_message: 0.0 mm_preprocessor: crop_mode: overlap-and-resize-c2 max_crops: 8 max_images: 2 max_multi_image_crops: 8 pooling_w: 2 pooling_h: 2 overlap_margins: - 4 - 4 use_col_tokens: true loss_token_weighting: root_subsegments legacy_image_mask: false max_answer_len: null img_aug: true bi_directional_attn: null lora_enable: true lora_rank: 32 lora_alpha: 16 lora_dropout: 0.0 lora_bias: none norm_stats: libero_10_no_noops_modified: action: mean: - 0.01820324920117855 - 0.05858374014496803 - -0.05592384561896324 - 0.004626928828656673 - 0.00289608770981431 - -0.007673131301999092 - 0.5457824468612671 std: - 0.2825464606285095 - 0.35904666781425476 - 0.3673802614212036 - 0.03770702704787254 - 0.05429719388484955 - 0.08725254982709885 - 0.49815231561660767 max: - 0.9375 - 0.9375 - 0.9375 - 0.30000001192092896 - 0.29357144236564636 - 0.375 - 1.0 min: - -0.9375 - -0.9375 - -0.9375 - -0.23642857372760773 - -0.3053571283817291 - -0.3675000071525574 - 0.0 q01: - -0.6348214149475098 - -0.7741071581840515 - -0.7633928656578064 - -0.09749999642372131 - -0.14819999992847435 - -0.2742857038974762 - 0.0 q99: - 0.7714285850524902 - 0.8464285731315613 - 0.9375 - 0.13928571343421936 - 0.15964286029338837 - 0.3246428668498993 - 1.0 proprio: mean: - -0.04190658777952194 - 0.03539430722594261 - 0.8257141709327698 - 2.908308267593384 - -0.5562185049057007 - -0.16649018228054047 - 0.0 - 0.028316624462604523 - -0.028561657294631004 std: - 0.10743364691734314 - 0.14424669742584229 - 0.2572328448295593 - 0.3441362977027893 - 1.234421730041504 - 0.3579835891723633 - 0.0 - 0.013308707624673843 - 0.013174631632864475 max: - 0.21031762659549713 - 0.39128610491752625 - 1.3332009315490723 - 3.6714255809783936 - 3.560650587081909 - 1.386339545249939 - 0.0 - 0.04160946607589722 - 0.0013633022317662835 min: - -0.4828203022480011 - -0.3255046010017395 - 0.445506751537323 - 1.1321442127227783 - -3.641430377960205 - -1.842738389968872 - 0.0 - -0.0010040868073701859 - -0.04111652821302414 q01: - -0.3899900782108307 - -0.2838300323486328 - 0.44795057058334353 - 1.8810229921340942 - -2.886677579879761 - -1.1599004411697387 - 0.0 - 0.002066459748893976 - -0.04001387819647789 q99: - 0.1530261474847791 - 0.32915401458740223 - 1.2546923208236693 - 3.303542451858519 - 2.7496529006957933 - 0.6893712210655194 - 0.0 - 0.040048558115959164 - -0.0017598449345678235 num_transitions: 101469 num_trajectories: 379