{ "adapter_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "silu", "hidden_size": 1152, "image_feature_dropout": 0.0, "image_padding_embed": null, "initializer_range": 0.02, "intermediate_size": 18944, "model_type": "", "num_attention_heads": 16, "num_key_value_heads": 16, "residual_dropout": 0.0, "text_hidden_size": 3584, "vit_layers": [ -3, -9 ] }, "architectures": [ "MolmoActForActionReasoning" ], "auto_map": { "AutoConfig": "configuration_molmoact.MolmoActConfig", "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" }, "image_patch_id": 152066, "initializer_range": 0.02, "llm_config": { "additional_vocab_size": 128, "attention_dropout": 0.0, "embedding_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "layer_norm_eps": 1e-06, "max_position_embeddings": 4096, "model_type": "molmoact_llm", "norm_after": false, "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "qk_norm_type": "olmo", "qkv_bias": true, "residual_dropout": 0.0, "rope_scaling": null, "rope_theta": 1000000.0, "use_cache": true, "use_qk_norm": false, "vocab_size": 152064 }, "model_type": "molmoact", "n_action_bins": 256, "norm_stats": { "libero_10_no_noops_modified": { "action": { "max": [ 0.9375, 0.9375, 0.9375, 0.30000001192092896, 0.29357144236564636, 0.375, 1.0 ], "mean": [ 0.01820324920117855, 0.05858374014496803, -0.05592384561896324, 0.004626928828656673, 0.00289608770981431, -0.007673131301999092, 0.5457824468612671 ], "min": [ -0.9375, -0.9375, -0.9375, -0.23642857372760773, -0.3053571283817291, -0.3675000071525574, 0.0 ], "q01": [ -0.6348214149475098, -0.7741071581840515, -0.7633928656578064, -0.09749999642372131, -0.14819999992847435, -0.2742857038974762, 0.0 ], "q99": [ 0.7714285850524902, 0.8464285731315613, 0.9375, 0.13928571343421936, 0.15964286029338837, 0.3246428668498993, 1.0 ], "std": [ 0.2825464606285095, 0.35904666781425476, 0.3673802614212036, 0.03770702704787254, 0.05429719388484955, 0.08725254982709885, 0.49815231561660767 ] }, "num_trajectories": 379, "num_transitions": 101469, "proprio": { "max": [ 0.21031762659549713, 0.39128610491752625, 1.3332009315490723, 3.6714255809783936, 3.560650587081909, 1.386339545249939, 0.0, 0.04160946607589722, 0.0013633022317662835 ], "mean": [ -0.04190658777952194, 0.03539430722594261, 0.8257141709327698, 2.908308267593384, -0.5562185049057007, -0.16649018228054047, 0.0, 0.028316624462604523, -0.028561657294631004 ], "min": [ -0.4828203022480011, -0.3255046010017395, 0.445506751537323, 1.1321442127227783, -3.641430377960205, -1.842738389968872, 0.0, -0.0010040868073701859, -0.04111652821302414 ], "q01": [ -0.3899900782108307, -0.2838300323486328, 0.44795057058334353, 1.8810229921340942, -2.886677579879761, -1.1599004411697387, 0.0, 0.002066459748893976, -0.04001387819647789 ], "q99": [ 0.1530261474847791, 0.32915401458740223, 1.2546923208236693, 3.303542451858519, 2.7496529006957933, 0.6893712210655194, 0.0, 0.040048558115959164, -0.0017598449345678235 ], "std": [ 0.10743364691734314, 0.14424669742584229, 0.2572328448295593, 0.3441362977027893, 1.234421730041504, 0.3579835891723633, 0.0, 0.013308707624673843, 0.013174631632864475 ] } } }, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.52.3", "use_cache": true, "vit_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_default_input_size": [ 378, 378 ], "image_num_pos": 729, "image_patch_size": 14, "initializer_range": 0.02, "intermediate_size": 4304, "layer_norm_eps": 1e-06, "model_type": "molmoact_vit", "num_attention_heads": 16, "num_hidden_layers": 27, "num_key_value_heads": 16, "patch_bias": true, "pre_layernorm": false, "residual_dropout": 0.0, "use_cls_token": false } }