|
{ |
|
"metadata": { |
|
"ParamSize": 485, |
|
"ParamBytes": 18982134784.0, |
|
"BitsPerParam": 4.5002708090210115 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131076096, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
32001, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131076096, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce8d5dce4e14895164b9accf91f38e4d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "016856341a8a2cee93f8545389d55ecf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "709083325f6bb9de1817b2367a8664d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27705856, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
32001, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384512, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16384512 |
|
}, |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 16400896 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27673088 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27689472 |
|
} |
|
], |
|
"md5sum": "3043f0b9949b1db50ba8aa86b206d213" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a2192e79fb92ce8ea9ae3146c02101d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f64e3e4d1074fadd78069b0b0aee52e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a6d889c101e0aae9ec019b441d5d83a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0db6ce1cc5d43630e92bdb36f8f087f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d19431f02b69f83df63521601e89957a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2088cf4fd752b0833ec5b03e1d4b7169" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee14507ab71546525d46689c6f2d1d97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23d1281688324cb0275ee8f75acf1cd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "1aa880b24f2af0d8dee4562b2ddb77f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9bd608cddeb1ab68c8336ab8d66581e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26915f031c12293d0d312c5d9e98bc56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74a8a9fd34c3c89f426ad2fae09f06e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28cd89bafa047ad04e96a973f9e03c30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48128c2b95e662826acada932afc0c2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb3b28216f67a3099fb3cb78402a2f96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a301b6f75aa00465636885728a22b84e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "3e57714c09147de405992c2ef9ed82ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "572be513823914b7250ec69d5f0fcfb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "11c33050393668fe04cc896345eba65e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23875a4d24699f98f5fdf7100bef42db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81b087e177a03f929b23dabf2157a7ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d0ab1c69f1bbfd55d9009b2f40b4cf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "283728038e0b03de0fc8cd668487928e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "169a3cf8fdc4de710ec0f04ea29539ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab46556ec8c4d13e3b7961c373269466" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "0bf90e7276b7d45589ada6c2540ca334" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d959bbbb5c579d2f61ba00b146478b57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72a77ae0966dd154e9914c9d3201d0a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "070334687df7eba7f8bb0756d7c4bd24" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "39eb99c90f5cd3d0731341517c564511" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5442ffddb8d3dc022bb24f2ac8d804c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "193935abac61a34c0b8e309153775652" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131076096, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
32001, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131076096, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd668a9efd79094484e0ec5075d1dc6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "783953056d8a5d3d4137c17d927a79f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a47b4cf73618c2cd003a8f4bf017cf1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d15980a2bc77740c4d6652c3c7d357a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63670390896cde904751ecd830cd6ea8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e7769319e00b64bb17247356f8221cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "41438192d974f1fdccdb13308512f924" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32932352, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
32001, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384512, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16384512 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 16400896 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27673088 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 27689472 |
|
} |
|
], |
|
"md5sum": "0d033da14d40a767e6652378eb305de8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb94fc60e2f211f081c39d399e590825" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4051cf6476e9cf55fa6eefcb94d959c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fa07fe49b2e582a3f1c0c4acb90a40f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c203019d69ee2e0e7ffaf90feca72f5b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e79f414d6cc6dd42c6bacffdd356acf9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67a9b45b672cb31f760ad41cb50f3f23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24952832, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 4194304 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 4210688 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 15482880 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 15499264 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 24936448 |
|
} |
|
], |
|
"md5sum": "12a49704c778b6a31e8f1efb7c9e923e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "818cfcbf8531cf65a7d397a804468378" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "587f643935715b665a0e7f9396801ee9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7749a2b7b61962595807e3b2a3a93fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2d9ed7acb38b3da704a3ddf48eda8b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de3849c66602043661245ebbea64db7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "49032aba6edb6b799dc6e5e38952c714" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f7727b0a02e1ebbc44064efb0ee295c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "816674667ad61f0da84e74e82e4f8020" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "8b7635fb3a865e741f1f5e95430e9400" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c1b2b596d04b8c176c572e510ae778f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "920fd981060345f7a5621d1ff20f1d83" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43ef41abe17891ca8d5b1a861d2acd10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "669a79fa07d91f1f6581caaf4f3eb827" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79b39648b67acf36ec4dc4efc2db1e18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ded61e49111dc67a58a31aa55aa1cdc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5250d9ae4c095b9517eb026376763640" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "a086294931beb0e7f833b92bff52f51a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "670c64a915deee1c7dafa4722c58a0e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d027f1bc3e6d8d651882a0c6fa0dc5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6bdd48243493ce610c61601790f0e309" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9ff946ef084411488fb74367c5c4b68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c1b2e4f4603b2bb50a762343b8baa2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8b27e71004c65791464132cc089a287" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d7a14a76bde296d01dc7dd7c4f9ac57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50a1626483aead9e11ecfe695b9bd42f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32cf1e876ea31c91e7b31ef63bee9e6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30162944 |
|
} |
|
], |
|
"md5sum": "4e062d758c95c462e0ee18b5cc8fb93b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3adeafc5c849fe487bbb08868255e8ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01dd312e7596c7d6035bf5ec7cd67462" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0bc70b4863db47e1d2e4b689c22524c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56069fa7349ddc8e14ca7fb0f491d2b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de3b12819d84560ab0331667c4597f33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c37b8ddc28950b8e807573905dd9cca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1d10dbe165b65b43a844fbcf70777a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "373fd7768d9d90f396741b0baa176f08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "b68234bd761bfe4bcf80a2e42537319b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fe6008f964d4626aa3c1e28894f0536" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59a18fe3088902d8cee532131e83837d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "756e8a98bf630fef8bd724311dd99a9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aff786611ed65110257cd2c06ea374d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc00da699319ee7a2464b4f25bb43c4d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dac004907afed8b4bb0852aece3afb00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "604e9714563b9468305a4a5eac88f07a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46dfc4371269e1e42604edaa4a999b52" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "035d61171ffece8c38a88565e7a7e9e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
} |
|
], |
|
"md5sum": "b67a37667c1e845945dbb50ac6463b06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3cd163792d3e42f303ba27a764b8712d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20d02af420839973b9836745ba79073a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "655c25cc1b2ff033668d1b67c853981a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4da18e569618cb9fc5d803294cea427b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7b31c523d661c6589d412cef72d9233" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4645b7d61388817dd904a13dfc161f8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "e4e6ff92168156f32fe4747e4192b7a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64b0657c588b2963439f0cd3a2e55c43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59f211420efe31204f9f59ccdcf96ab7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21f09d9496cae9f7962e7219bd75f1d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5de3799cd01c1871295a59c0dbbd277" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9b6710f0bcd437c9f35b23d232765d9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66e92e11342d553389db02199af3cfa6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80718b8846a5e1c2e0b134c66b5c8a86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "1ede28c7a85fe8fc552810860d5937df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13df5cc233f060882245296fc6e08a65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44e126b920fc7d41324e364beb16ee52" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "104f95355ce16092c0351c7b20f3c8ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f2f9111ceff1a505d8a17571521bbd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f632349ad4a3dd10249630cf9669168" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09d53e021054b3d40e2f62cd284f4f7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "1b0a0d8b371accfd743218e8c21c37e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a613dc8b74173b89a94ac285c0ae630f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e3ed1516b3909c06df0a937aeccacc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5bee65c1bb54bc1ef465e69e3bf08dfa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "082af15877046a3d7b8e66dc9a4b4600" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da33113672e3043f8c091440b6ab69c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f14002a8589cbe6485b5483b673a8192" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43ef55c5f793b862188c154e98634a3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "233101aca570045c56d3e22b1c5b3f9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "446e58dee2f62cef6d9f85197e0ebc69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efbec170186ac924f381dcae22493290" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43e84e9f3641cc42b588c106abde79eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f684d58f5fdc3b9a4ba82766178d7f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86c271074a9fb90c42abc02d8484ee17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a0f39f615c389305d80b9cfcd0169169" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b4853b9c05547857503aa7b3869eddc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5ad98a4f53751ba4f05a5449063b6b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "8d6a67f603f49864f9c70d306434d5bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "33b2563a867018a48c352619e87e608b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c64586f6a68d86e1ed112b0f7e7935a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b33e6d96ffeaef495cf5152a3f2af9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23c7f453d65d9431ef7a8a1fb26e91a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1348663d62f9656e2aea9d8eb9626a19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4c384f24dde9f6f2364384a0b64c6e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "95e1be1912f556ac81a577404584f96b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6804bbb340afd153bb43fe0b371e7838" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "f4243df377212548f5aeb65b10c513ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a83cb83c2ade5189040542d9a1aaed3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0f43fea67156e27ebb00f879db9f793" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a50ef1e70814a1d36c97c51743a0b7f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82b3f43f9fab681d0d96c651f798e02e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2910c9316ef3786ae1ee78713e94515d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e187cce2c2aa88b59abaeb1c0086ef2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17b92f94025ec5fa0efab668a1917353" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f33c169f82ef2f620b22f0fdc664cd7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ce48169abc82f022b54cf9dad5e9a0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
} |
|
], |
|
"md5sum": "fb68802795e165edb0a6012da8e4fde4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "964dcd09f2353aabbd213fd8f289dfd4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b69cfcd8405e0cd0b1071c146b809c41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "affae6e41d14097be0f1f24b3468ab20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcc9bdad778f5700812a9e3b68d87a13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc08f89a961ca49cac6a50451bf82964" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f4d815ad2c8d29f1da8a47ab94fe5c1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "974cfdd58dfc70d2d767e5768f998296" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91abd4c55116a46654ed73510ca4c843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "909bf878793040ec6a4e0fda148e0781" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34c960dcd82556ce4a69c26cc80c5bbd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "03539b79f84f3b80b009e11a626938e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3add3feab898524115600007535e76cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6451eeaf85040ac11571f00cf35cd402" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66ced6c83c59559a77c35bc43d595dfb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "2c6ea24b7a49d169b85bf4533f6c026e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74ade524b99e887eedaecbc5fe596520" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a26cf5507a2db8ac64986ac5dc2fc3d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c574cc741c3ada51833911c3ae20430" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4e837c4df1566a277a65cc1bcadb859" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da7de7702eef8e6df70622c17a2cdfd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be5635ea848fddd2a1da52e96911e500" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f964cf9b5a64fca54f251815e9272ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "043297009f15b6b2cfded30315a63902" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "4ead52b8dbe462c8ad805126121ba5cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6782c57d9dbb6ed181a16a02d6a63fae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e60b98b300a6fc7e49c68fbd7b6ba1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21fedba317847a73da97e6baee3f6ef2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb989e56e2f0d4f82dc25b0d129a0544" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e328922ede239cf1b7f844e659b4401f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8f95f1f65b203e23a71eca8af4585bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7260d01a7ceebb08799d864920923e6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "cbd910125308c0798261b147149efb7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3323d8ae6a4002fca716e5012106f489" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7eff2f7fb0027dcc56af4831e60813ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b98b3d80780c4b11211bcb74462a876" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d86db4e59932a067cc4a544a90fb47e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "364b691339c7e78658878bee711d4a01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b6c393aafaddb53d90f001424404eb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a653decc84d22ad2154809dda4a391eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34e97690e1cc300814d86f6af8f6617e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f7cee9a2b90d6ee2922ab7507f3ca36" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30162944 |
|
} |
|
], |
|
"md5sum": "e73655f035b28aa81478ea2e80d5fc2d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34dea69f579474827868a6fe19c8c41e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6de5f1c3ced4a3a4491a544a93b9fa10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8c232c8f3f0d9f721d706ce5f465b7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db75bd519e582ae60dd0be998c8fbbe0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2c8e55cc6ad0ad2a86eecc8cd57651d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f4e5ff7e5828d7dc1cecbc0de5869ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 11304960 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 22577152 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 22593536 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 27836416 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "b3f32cbe4cff8863f4e21ded6824161f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d8e0e751fd8a223912e5c79dc428593" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32a9bfc6935786a7436a314ca284cbd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "342dad2a9e7a92d2fe6135d648b3feef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0dc6ca92099fab488e00fd4af5214b8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42c7f5e9e5024f5357b89c039bb0e55a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e261cad06e7398cd71996862df1ec7ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb792ba24fd59ce376e5778d7c228101" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f77c0cae72072ad5981bed9f06bbb980" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "5111a8d65755cd1f323cb15c4df60077" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "508dbb3f23bb7d1152cd269b448d3492" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67f89e94283051b80e73dd1dcd3575c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "808247ebd47de2da2633fd14d79ff286" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "944568005b88716de2382ab9983435d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47f2b2b1b5ef2760f1151d343b222d11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad57f9fe9ec7f98731626a10ebeabcdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "25da8de9906baa74937656a8ba0af3a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "b345d2a575799783259b6be3d02b5232" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53ca51898590bd67af23658db2349756" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f39de6f590b12af18e7ead18cfa4f607" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a816555eb2b9bddbe19e76f64834974d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ad2c99ffd9ff828a659ce1abf1c16f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "698bfd080b625ce71fdafd9ccaac218b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7fd9bdd68478da8ce3eda2352f4f018" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59571d0ccd7dbed15aeb3f47bf1f3676" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b39a42234915bfa48e2792e76c979fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "68b37e6b26b2429eaf3340a7d69b0052" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f703e975fcb8f76ad8cef96d338a6df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f115ac7c2a452a549aee3d378784b6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0837ccc73993ad6f63e44ad9cd905286" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31981568, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 9437184 |
|
} |
|
], |
|
"md5sum": "f8bfe91180ce0352e7e4a150cd8c12cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e098055e78889bba34e54d709f8ed84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee49ac7cdc254f6ceff88fe847a41d09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2110f481b846f26d65907b72d782e98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "740928be4ebedacfa284002741564498" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e101625a9a22c4c894967724f595e1ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "171edf5c6b339c1939d72cb73e425b7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "d992dd076400bb6e190e1f48951c940b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a97409e208908c0a8695f96b6a1d7a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_242.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a244d0d44b08daac46e3f49541ace94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_243.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5c0cdd9adcd5070503008437832bbcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_244.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b555415ac064fc432d356e5f3ae93ba2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_245.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7a3153d4792175a12adddf13363ca6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_246.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52aa47514bb10c0750802030f72bfb40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_247.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6739b3be349305e723b94077579238c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_248.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "10112ba47e8c56b0c5178945fa65f71e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_249.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78a988c760c335f08c561631bd979585" |
|
}, |
|
{ |
|
"dataPath": "params_shard_250.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "baca70b12cd72f546ef45d05959d55d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_251.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed6c2d38092dad62689dba0431a51e75" |
|
}, |
|
{ |
|
"dataPath": "params_shard_252.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "782ecf4c5a914f05b8a134caa43c0420" |
|
}, |
|
{ |
|
"dataPath": "params_shard_253.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1545916e65e30d31dc02fdf4c4acd2aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_254.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a69a11c58a7154432ce95e8ea60de6d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_255.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b3810e521bd7ab53bffa21129875e3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_256.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5eadefc0b6cb8d867369eaa65a89d53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_257.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "86a06a240eecaa470802ada7253173a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_258.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e03d3ededa52bbbb2b301da0193888b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_259.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6157ef3dfeac66fb82c1b6bb796e83c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_260.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "196640fdc3f825f91c4f53cde02c2394" |
|
}, |
|
{ |
|
"dataPath": "params_shard_261.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "563c02eded945acdc600b694dc0eee66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_262.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1035d5d12645264434cf12ce6f48f5a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_263.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb59e9fe975019a517358f45363c3ada" |
|
}, |
|
{ |
|
"dataPath": "params_shard_264.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8098889bbaabcd36b16fd5d3f8a8e7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_265.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "afe1f94bf3e01b1a6007a91025b316fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_266.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "756c35a7958e5af0e276fb414d322768" |
|
}, |
|
{ |
|
"dataPath": "params_shard_267.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "41802b18e131dcd7a0023c70a02b99a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_268.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c389bf112c345558db7cd9a43afabbd6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_269.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee3c5d7cb561c122cd52154428055be1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_270.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c585ebf21c93f1e50a55b9a3a5e7dbc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_271.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acab732c62415e11755d9cfd402dfca9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_272.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d5e304ff5bc4da6bd5d5ac9252d446a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_273.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d7ba43ae447bd361572872330a18c2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_274.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30162944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
} |
|
], |
|
"md5sum": "f2b33828e0546e030c8d04aaa0d85909" |
|
} |
|
] |
|
} |