| { |
| "metadata": { |
| "ParamSize": 325, |
| "ParamBytes": 2149644288.0, |
| "BitsPerParam": 4.500600961055312 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 49250304, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32064, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 49250304, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c9e354046db2a906d8794caaaaee850d" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4b283c7e90f84f5871b8383564b1ea72" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 23470080, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32064, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6156288, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.21.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 6156288 |
| }, |
| { |
| "name": "transformer.h.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 6162432 |
| }, |
| { |
| "name": "transformer.h.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 18745344 |
| }, |
| { |
| "name": "transformer.h.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 20318208 |
| }, |
| { |
| "name": "transformer.h.21.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 23463936 |
| } |
| ], |
| "md5sum": "c8cabb53c75d1f4815c50cde31324df0" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d5f4b37d33c671a5a4b9dcbb37020281" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.21.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.21.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.22.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.22.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "2604e8c55ed43faf77a4d21126eee0d6" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.22.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.22.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.22.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.22.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.23.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "94ac0ff50fa28b04cdd0919dd30ea721" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d9f2b9e22b94f4f9d5cceb4eb7afa70f" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.23.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.23.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.23.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "4dc3b6375d1a2ae962fe5b863b682d76" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f9a47ddd48acfdfb91d72d76b41ae57b" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.23.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.23.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.24.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.24.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "3bb92bd753f0ea516c9abbd528c2c48a" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.24.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.24.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.24.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.24.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.25.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "d6bffaa9f087578bf43667f8b3fbda0f" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aa9119b9950a5f945c86b5fc643c7060" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.25.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.25.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.25.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "6629af1e1e27dbe16f1a6d5de38df159" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e1f99e218b8d83f487dceab5980183aa" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.25.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.25.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.26.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.26.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "ed57c2bd7cee3d8a324b1bd99ce38e66" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.26.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.26.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.26.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.26.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.27.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "62bdac2c473c7416578d1a54ea77b8c7" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "64e63f78f23ad59e138a32293f5cc7f3" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.27.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.27.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.27.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "e6064b29b1fa2ae29d0730a2f7caa633" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "32c4d4ebf17d20a2c8da0c11b1e5b080" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.27.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.27.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.28.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.28.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "cae1134f31044047dc2f91b10dada0af" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.28.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.28.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.28.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.28.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.29.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "6d7ced2a2ef38b104299aed6adbcd499" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bd190b46e21615db26bc0667d2d028a9" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.29.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.29.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.29.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "2caa2e559b6e495db6549e1d8f6c95f0" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "20d24f9f3cc7a339ad5f99ca9caf4ccc" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.29.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.29.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.30.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.30.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "cc67d2379662797a94de4732d5d8bf96" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.30.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.30.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.30.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.30.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.31.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "8271a6efb48390bc676f891ddd9826b3" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e5bf7733ab694fa663ed7fa0a90085ae" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.31.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.31.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.31.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "bf3eac98211b7772cc2a1e8ba5ef56e3" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 49250304, |
| "records": [ |
| { |
| "name": "transformer.embd.q_weight", |
| "shape": [ |
| 32064, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 49250304, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e61f1b06e77de108c4a56240a97fbb3b" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 22093824, |
| "records": [ |
| { |
| "name": "transformer.h.31.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.31.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.norm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.embd.q_scale", |
| "shape": [ |
| 32064, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6156288, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.0.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 22087680 |
| } |
| ], |
| "md5sum": "46951bed8951c88c54aa1b44217f0d75" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "10d39b272ab4c6fd7289cac12ad41611" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.0.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.0.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.0.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "5f86f0f598cb4ca7b430547f81baf627" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "484741977565d1d71f8998a252b4dc36" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.0.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.0.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.1.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.1.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "499ba4e4630cd92695e34dcce8c0dd9f" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.1.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.1.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.1.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.1.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.10.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "e61f009ca335976aadee22021b6db74d" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "35c62867f472d42255810b0d849c087c" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.10.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.10.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.10.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "3fffb0aea41e5e1922461774cbf9f8cb" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5921d60f12a98545c09c047ff4c1bdad" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.10.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.10.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.11.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.11.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "94c24b2d3015d941ad50829f6e417120" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.11.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.11.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.11.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.11.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.12.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "499f0098e42351c5865b5e0ae71f1aa9" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1ba42809fa7c1f86703078e0f754dff2" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.12.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.12.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.12.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "3269d7577a33b56e3db0c7c49ac04fe0" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "375017006e4bb207100bf697bb972e89" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.12.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.12.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.13.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.13.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "fbdc2a941157a9dcff0b69ed6a70237e" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.13.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.13.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.13.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.13.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.14.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "e6d6ad92fdf7aa8239ee58d80c954dd3" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "09257112ea5169733328c7131702c779" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.14.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.14.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.14.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "a73aa60ba509d31774c4dad59d30dedf" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "def11cc9fc9c9d7de5b84d11214299c5" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.14.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.14.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.15.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.15.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "a38ee2ce5e1c4ee55567d12e1bbf81fd" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.15.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.15.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.15.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.15.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.16.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "db0a8f15a63c0f835ad7360537c26012" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f7f896c7b4ff937d23b1e8090180d675" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.16.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.16.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.16.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "47fa48d1f8ba220049355ff19ee97044" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9a70479627c1d18fbddc1b9603d0de0a" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.16.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.16.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.17.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.17.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "c6c79f373639b46938388dda16b85282" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.17.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.17.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.17.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.17.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.18.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "7d67772852c6fbedc3daf501e7be89d0" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fa21743578b6672f11ff4a7372b6f312" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.18.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.18.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.18.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "f44e7ac43e25bd9a583ef44081ea0495" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "80eb979f8d20bd0ba1c636701040cef1" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.18.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.18.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.19.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.19.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "0593b4734c545448d30e0659a080cd81" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.19.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.19.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.19.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.19.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.2.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "4fde5f9e7a85b8f453e6e96348247612" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1fb5a4cf8b8e4550422c4392f05f712a" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.2.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.2.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.2.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "7e328c611048b7752c57246dcecc2fc4" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5c6113b6b67f4fe11a41bb7f94a4cc16" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.2.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.2.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.20.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.20.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "3da248097cebcbb8405cb9f8dfa8788b" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 26548224, |
| "records": [ |
| { |
| "name": "transformer.h.20.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.20.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.20.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.20.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.21.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 21233664 |
| }, |
| { |
| "name": "transformer.h.21.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 25952256 |
| }, |
| { |
| "name": "transformer.h.3.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 26542080 |
| } |
| ], |
| "md5sum": "f9c1f66e7e9a2c7ac2bfc55d89055594" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "33e6ae873b2c26c4c206812c3ed06a3f" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.3.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.3.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.3.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "6ae5ea60b9220a6c430da183826ddb1e" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cb7fad11cdb09239bf032c34615e20c9" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.3.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.3.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.4.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.4.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "7da0192b24840d143b45f17c540dc508" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.4.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.4.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.4.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.4.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.5.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "fb46533dda0bd4e575cf265ef9f52848" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "590c8f9e35987ceea65b776786893e9e" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.5.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.5.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.5.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "70958f1d94286459811141897b2c1260" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e2f6c5ac238728e25d739a74641adeb7" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.5.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.5.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.6.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.6.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "98f665b5b7f17e41d7a1fd1f255651a2" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.6.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.6.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.6.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.6.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.7.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "64129d5b01aded66f08537ed26077e93" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "60ff3dc3cb5a69eab0b6c49302619bb5" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.7.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.7.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.7.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "56c3809c33e596b25ebe079c967ada3d" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7a08bcb52123b2f5f20fae1bb5f2608f" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 33239040, |
| "records": [ |
| { |
| "name": "transformer.h.7.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.7.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.8.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 15925248 |
| }, |
| { |
| "name": "transformer.h.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 15931392 |
| }, |
| { |
| "name": "transformer.h.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 28514304 |
| }, |
| { |
| "name": "transformer.h.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 30087168 |
| }, |
| { |
| "name": "transformer.h.8.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 33232896 |
| } |
| ], |
| "md5sum": "6ed6ac9e03d979e2774ffef6d0ca6016" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 21239808, |
| "records": [ |
| { |
| "name": "transformer.h.8.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.8.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "transformer.h.8.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 5308416 |
| }, |
| { |
| "name": "transformer.h.8.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 19464192 |
| }, |
| { |
| "name": "transformer.h.9.ln.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 21233664 |
| } |
| ], |
| "md5sum": "ff4f097c28464cf739694531e2526137" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.h.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d95e88dce2bac245d38de0b9c5f7401" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 22616064, |
| "records": [ |
| { |
| "name": "transformer.h.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 3072, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 12582912, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 3072, |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "transformer.h.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 14155776 |
| }, |
| { |
| "name": "transformer.h.9.post_attention_layernorm.weight", |
| "shape": [ |
| 3072 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6144, |
| "byteOffset": 17301504 |
| }, |
| { |
| "name": "transformer.h.9.mixer.out_proj.q_weight", |
| "shape": [ |
| 3072, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4718592, |
| "byteOffset": 17307648 |
| }, |
| { |
| "name": "transformer.h.9.mixer.out_proj.q_scale", |
| "shape": [ |
| 3072, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 589824, |
| "byteOffset": 22026240 |
| } |
| ], |
| "md5sum": "04a78074a5a07f4914515ac0729ec0ae" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 15925248, |
| "records": [ |
| { |
| "name": "transformer.h.9.mixer.qkv_proj.q_weight", |
| "shape": [ |
| 9216, |
| 384 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 14155776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.9.mixer.qkv_proj.q_scale", |
| "shape": [ |
| 9216, |
| 96 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1769472, |
| "byteOffset": 14155776 |
| } |
| ], |
| "md5sum": "e7efa75fa7df03e39f04cd59d087037c" |
| } |
| ] |
| } |