{ "metadata": { "format": "safetensors-sharded" }, "weight_map": { "model-00001-of-00001.safetensors": [ "tok_emb.weight", "pos_emb.weight", "blocks.0.norm1.weight", "blocks.0.attn.qkv.weight", "blocks.0.attn.out_proj.weight", "blocks.0.attn.out_proj.bias", "blocks.0.attn.path_mix.weight", "blocks.0.attn.path_mix.bias", "blocks.0.norm2.weight", "blocks.0.ffn.fc1.weight", "blocks.0.ffn.fc1.bias", "blocks.0.ffn.fc2.weight", "blocks.0.ffn.fc2.bias", "blocks.0.ffn.fc_out.weight", "blocks.0.ffn.fc_out.bias", "blocks.1.norm1.weight", "blocks.1.attn.qkv.weight", "blocks.1.attn.out_proj.weight", "blocks.1.attn.out_proj.bias", "blocks.1.attn.path_mix.weight", "blocks.1.attn.path_mix.bias", "blocks.1.norm2.weight", "blocks.1.ffn.fc1.weight", "blocks.1.ffn.fc1.bias", "blocks.1.ffn.fc2.weight", "blocks.1.ffn.fc2.bias", "blocks.1.ffn.fc_out.weight", "blocks.1.ffn.fc_out.bias", "blocks.2.norm1.weight", "blocks.2.attn.qkv.weight", "blocks.2.attn.out_proj.weight", "blocks.2.attn.out_proj.bias", "blocks.2.attn.path_mix.weight", "blocks.2.attn.path_mix.bias", "blocks.2.norm2.weight", "blocks.2.ffn.fc1.weight", "blocks.2.ffn.fc1.bias", "blocks.2.ffn.fc2.weight", "blocks.2.ffn.fc2.bias", "blocks.2.ffn.fc_out.weight", "blocks.2.ffn.fc_out.bias", "blocks.3.norm1.weight", "blocks.3.attn.qkv.weight", "blocks.3.attn.out_proj.weight", "blocks.3.attn.out_proj.bias", "blocks.3.attn.path_mix.weight", "blocks.3.attn.path_mix.bias", "blocks.3.norm2.weight", "blocks.3.ffn.fc1.weight", "blocks.3.ffn.fc1.bias", "blocks.3.ffn.fc2.weight", "blocks.3.ffn.fc2.bias", "blocks.3.ffn.fc_out.weight", "blocks.3.ffn.fc_out.bias", "blocks.4.norm1.weight", "blocks.4.attn.qkv.weight", "blocks.4.attn.out_proj.weight", "blocks.4.attn.out_proj.bias", "blocks.4.attn.path_mix.weight", "blocks.4.attn.path_mix.bias", "blocks.4.norm2.weight", "blocks.4.ffn.fc1.weight", "blocks.4.ffn.fc1.bias", "blocks.4.ffn.fc2.weight", "blocks.4.ffn.fc2.bias", "blocks.4.ffn.fc_out.weight", "blocks.4.ffn.fc_out.bias", "blocks.5.norm1.weight", "blocks.5.attn.qkv.weight", "blocks.5.attn.out_proj.weight", "blocks.5.attn.out_proj.bias", "blocks.5.attn.path_mix.weight", "blocks.5.attn.path_mix.bias", "blocks.5.norm2.weight", "blocks.5.ffn.fc1.weight", "blocks.5.ffn.fc1.bias", "blocks.5.ffn.fc2.weight", "blocks.5.ffn.fc2.bias", "blocks.5.ffn.fc_out.weight", "blocks.5.ffn.fc_out.bias", "blocks.6.norm1.weight", "blocks.6.attn.qkv.weight", "blocks.6.attn.out_proj.weight", "blocks.6.attn.out_proj.bias", "blocks.6.attn.path_mix.weight", "blocks.6.attn.path_mix.bias", "blocks.6.norm2.weight", "blocks.6.ffn.fc1.weight", "blocks.6.ffn.fc1.bias", "blocks.6.ffn.fc2.weight", "blocks.6.ffn.fc2.bias", "blocks.6.ffn.fc_out.weight", "blocks.6.ffn.fc_out.bias", "blocks.7.norm1.weight", "blocks.7.attn.qkv.weight", "blocks.7.attn.out_proj.weight", "blocks.7.attn.out_proj.bias", "blocks.7.attn.path_mix.weight", "blocks.7.attn.path_mix.bias", "blocks.7.norm2.weight", "blocks.7.ffn.fc1.weight", "blocks.7.ffn.fc1.bias", "blocks.7.ffn.fc2.weight", "blocks.7.ffn.fc2.bias", "blocks.7.ffn.fc_out.weight", "blocks.7.ffn.fc_out.bias", "blocks.8.norm1.weight", "blocks.8.attn.qkv.weight", "blocks.8.attn.out_proj.weight", "blocks.8.attn.out_proj.bias", "blocks.8.attn.path_mix.weight", "blocks.8.attn.path_mix.bias", "blocks.8.norm2.weight", "blocks.8.ffn.fc1.weight", "blocks.8.ffn.fc1.bias", "blocks.8.ffn.fc2.weight", "blocks.8.ffn.fc2.bias", "blocks.8.ffn.fc_out.weight", "blocks.8.ffn.fc_out.bias", "blocks.9.norm1.weight", "blocks.9.attn.qkv.weight", "blocks.9.attn.out_proj.weight", "blocks.9.attn.out_proj.bias", "blocks.9.attn.path_mix.weight", "blocks.9.attn.path_mix.bias", "blocks.9.norm2.weight", "blocks.9.ffn.fc1.weight", "blocks.9.ffn.fc1.bias", "blocks.9.ffn.fc2.weight", "blocks.9.ffn.fc2.bias", "blocks.9.ffn.fc_out.weight", "blocks.9.ffn.fc_out.bias", "blocks.10.norm1.weight", "blocks.10.attn.qkv.weight", "blocks.10.attn.out_proj.weight", "blocks.10.attn.out_proj.bias", "blocks.10.attn.path_mix.weight", "blocks.10.attn.path_mix.bias", "blocks.10.norm2.weight", "blocks.10.ffn.fc1.weight", "blocks.10.ffn.fc1.bias", "blocks.10.ffn.fc2.weight", "blocks.10.ffn.fc2.bias", "blocks.10.ffn.fc_out.weight", "blocks.10.ffn.fc_out.bias", "blocks.11.norm1.weight", "blocks.11.attn.qkv.weight", "blocks.11.attn.out_proj.weight", "blocks.11.attn.out_proj.bias", "blocks.11.attn.path_mix.weight", "blocks.11.attn.path_mix.bias", "blocks.11.norm2.weight", "blocks.11.ffn.fc1.weight", "blocks.11.ffn.fc1.bias", "blocks.11.ffn.fc2.weight", "blocks.11.ffn.fc2.bias", "blocks.11.ffn.fc_out.weight", "blocks.11.ffn.fc_out.bias", "blocks.12.norm1.weight", "blocks.12.attn.qkv.weight", "blocks.12.attn.out_proj.weight", "blocks.12.attn.out_proj.bias", "blocks.12.attn.path_mix.weight", "blocks.12.attn.path_mix.bias", "blocks.12.norm2.weight", "blocks.12.ffn.fc1.weight", "blocks.12.ffn.fc1.bias", "blocks.12.ffn.fc2.weight", "blocks.12.ffn.fc2.bias", "blocks.12.ffn.fc_out.weight", "blocks.12.ffn.fc_out.bias", "blocks.13.norm1.weight", "blocks.13.attn.qkv.weight", "blocks.13.attn.out_proj.weight", "blocks.13.attn.out_proj.bias", "blocks.13.attn.path_mix.weight", "blocks.13.attn.path_mix.bias", "blocks.13.norm2.weight", "blocks.13.ffn.fc1.weight", "blocks.13.ffn.fc1.bias", "blocks.13.ffn.fc2.weight", "blocks.13.ffn.fc2.bias", "blocks.13.ffn.fc_out.weight", "blocks.13.ffn.fc_out.bias", "blocks.14.norm1.weight", "blocks.14.attn.qkv.weight", "blocks.14.attn.out_proj.weight", "blocks.14.attn.out_proj.bias", "blocks.14.attn.path_mix.weight", "blocks.14.attn.path_mix.bias", "blocks.14.norm2.weight", "blocks.14.ffn.fc1.weight", "blocks.14.ffn.fc1.bias", "blocks.14.ffn.fc2.weight", "blocks.14.ffn.fc2.bias", "blocks.14.ffn.fc_out.weight", "blocks.14.ffn.fc_out.bias", "blocks.15.norm1.weight", "blocks.15.attn.qkv.weight", "blocks.15.attn.out_proj.weight", "blocks.15.attn.out_proj.bias", "blocks.15.attn.path_mix.weight", "blocks.15.attn.path_mix.bias", "blocks.15.norm2.weight", "blocks.15.ffn.fc1.weight", "blocks.15.ffn.fc1.bias", "blocks.15.ffn.fc2.weight", "blocks.15.ffn.fc2.bias", "blocks.15.ffn.fc_out.weight", "blocks.15.ffn.fc_out.bias", "blocks.16.norm1.weight", "blocks.16.attn.qkv.weight", "blocks.16.attn.out_proj.weight", "blocks.16.attn.out_proj.bias", "blocks.16.attn.path_mix.weight", "blocks.16.attn.path_mix.bias", "blocks.16.norm2.weight", "blocks.16.ffn.fc1.weight", "blocks.16.ffn.fc1.bias", "blocks.16.ffn.fc2.weight", "blocks.16.ffn.fc2.bias", "blocks.16.ffn.fc_out.weight", "blocks.16.ffn.fc_out.bias", "blocks.17.norm1.weight", "blocks.17.attn.qkv.weight", "blocks.17.attn.out_proj.weight", "blocks.17.attn.out_proj.bias", "blocks.17.attn.path_mix.weight", "blocks.17.attn.path_mix.bias", "blocks.17.norm2.weight", "blocks.17.ffn.fc1.weight", "blocks.17.ffn.fc1.bias", "blocks.17.ffn.fc2.weight", "blocks.17.ffn.fc2.bias", "blocks.17.ffn.fc_out.weight", "blocks.17.ffn.fc_out.bias", "blocks.18.norm1.weight", "blocks.18.attn.qkv.weight", "blocks.18.attn.out_proj.weight", "blocks.18.attn.out_proj.bias", "blocks.18.attn.path_mix.weight", "blocks.18.attn.path_mix.bias", "blocks.18.norm2.weight", "blocks.18.ffn.fc1.weight", "blocks.18.ffn.fc1.bias", "blocks.18.ffn.fc2.weight", "blocks.18.ffn.fc2.bias", "blocks.18.ffn.fc_out.weight", "blocks.18.ffn.fc_out.bias", "blocks.19.norm1.weight", "blocks.19.attn.qkv.weight", "blocks.19.attn.out_proj.weight", "blocks.19.attn.out_proj.bias", "blocks.19.attn.path_mix.weight", "blocks.19.attn.path_mix.bias", "blocks.19.norm2.weight", "blocks.19.ffn.fc1.weight", "blocks.19.ffn.fc1.bias", "blocks.19.ffn.fc2.weight", "blocks.19.ffn.fc2.bias", "blocks.19.ffn.fc_out.weight", "blocks.19.ffn.fc_out.bias", "blocks.20.norm1.weight", "blocks.20.attn.qkv.weight", "blocks.20.attn.out_proj.weight", "blocks.20.attn.out_proj.bias", "blocks.20.attn.path_mix.weight", "blocks.20.attn.path_mix.bias", "blocks.20.norm2.weight", "blocks.20.ffn.fc1.weight", "blocks.20.ffn.fc1.bias", "blocks.20.ffn.fc2.weight", "blocks.20.ffn.fc2.bias", "blocks.20.ffn.fc_out.weight", "blocks.20.ffn.fc_out.bias", "blocks.21.norm1.weight", "blocks.21.attn.qkv.weight", "blocks.21.attn.out_proj.weight", "blocks.21.attn.out_proj.bias", "blocks.21.attn.path_mix.weight", "blocks.21.attn.path_mix.bias", "blocks.21.norm2.weight", "blocks.21.ffn.fc1.weight", "blocks.21.ffn.fc1.bias", "blocks.21.ffn.fc2.weight", "blocks.21.ffn.fc2.bias", "blocks.21.ffn.fc_out.weight", "blocks.21.ffn.fc_out.bias", "blocks.22.norm1.weight", "blocks.22.attn.qkv.weight", "blocks.22.attn.out_proj.weight", "blocks.22.attn.out_proj.bias", "blocks.22.attn.path_mix.weight", "blocks.22.attn.path_mix.bias", "blocks.22.norm2.weight", "blocks.22.ffn.fc1.weight", "blocks.22.ffn.fc1.bias", "blocks.22.ffn.fc2.weight", "blocks.22.ffn.fc2.bias", "blocks.22.ffn.fc_out.weight", "blocks.22.ffn.fc_out.bias", "blocks.23.norm1.weight", "blocks.23.attn.qkv.weight", "blocks.23.attn.out_proj.weight", "blocks.23.attn.out_proj.bias", "blocks.23.attn.path_mix.weight", "blocks.23.attn.path_mix.bias", "blocks.23.norm2.weight", "blocks.23.ffn.fc1.weight", "blocks.23.ffn.fc1.bias", "blocks.23.ffn.fc2.weight", "blocks.23.ffn.fc2.bias", "blocks.23.ffn.fc_out.weight", "blocks.23.ffn.fc_out.bias", "norm_f.weight", "lm_head.weight" ] } }