junrushao's picture
Initial commit
ce9f3e6
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 18982134784.0,
"BitsPerParam": 4.5002708090210115
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131076096,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32001,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131076096,
"byteOffset": 0
}
],
"md5sum": "ce8d5dce4e14895164b9accf91f38e4d"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "016856341a8a2cee93f8545389d55ecf"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "709083325f6bb9de1817b2367a8664d2"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27705856,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32001,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384512,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384512
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400896
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27673088
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27689472
}
],
"md5sum": "3043f0b9949b1db50ba8aa86b206d213"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4a2192e79fb92ce8ea9ae3146c02101d"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8f64e3e4d1074fadd78069b0b0aee52e"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2a6d889c101e0aae9ec019b441d5d83a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0db6ce1cc5d43630e92bdb36f8f087f3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d19431f02b69f83df63521601e89957a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "2088cf4fd752b0833ec5b03e1d4b7169"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ee14507ab71546525d46689c6f2d1d97"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "23d1281688324cb0275ee8f75acf1cd7"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "1aa880b24f2af0d8dee4562b2ddb77f5"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e9bd608cddeb1ab68c8336ab8d66581e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "26915f031c12293d0d312c5d9e98bc56"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "74a8a9fd34c3c89f426ad2fae09f06e4"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "28cd89bafa047ad04e96a973f9e03c30"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "48128c2b95e662826acada932afc0c2f"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cb3b28216f67a3099fb3cb78402a2f96"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a301b6f75aa00465636885728a22b84e"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "3e57714c09147de405992c2ef9ed82ca"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "572be513823914b7250ec69d5f0fcfb3"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "11c33050393668fe04cc896345eba65e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "23875a4d24699f98f5fdf7100bef42db"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "81b087e177a03f929b23dabf2157a7ff"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3d0ab1c69f1bbfd55d9009b2f40b4cf7"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "283728038e0b03de0fc8cd668487928e"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "169a3cf8fdc4de710ec0f04ea29539ec"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ab46556ec8c4d13e3b7961c373269466"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "0bf90e7276b7d45589ada6c2540ca334"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d959bbbb5c579d2f61ba00b146478b57"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "72a77ae0966dd154e9914c9d3201d0a8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "070334687df7eba7f8bb0756d7c4bd24"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39eb99c90f5cd3d0731341517c564511"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5442ffddb8d3dc022bb24f2ac8d804c3"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "193935abac61a34c0b8e309153775652"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 131076096,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32001,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131076096,
"byteOffset": 0
}
],
"md5sum": "cd668a9efd79094484e0ec5075d1dc6b"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "783953056d8a5d3d4137c17d927a79f2"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8a47b4cf73618c2cd003a8f4bf017cf1"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d15980a2bc77740c4d6652c3c7d357a2"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "63670390896cde904751ecd830cd6ea8"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3e7769319e00b64bb17247356f8221cd"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "41438192d974f1fdccdb13308512f924"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32932352,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32001,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384512,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384512
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400896
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27673088
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 27689472
}
],
"md5sum": "0d033da14d40a767e6652378eb305de8"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fb94fc60e2f211f081c39d399e590825"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4051cf6476e9cf55fa6eefcb94d959c8"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6fa07fe49b2e582a3f1c0c4acb90a40f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c203019d69ee2e0e7ffaf90feca72f5b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e79f414d6cc6dd42c6bacffdd356acf9"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "67a9b45b672cb31f760ad41cb50f3f23"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24952832,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 4210688
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 15482880
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 15499264
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 20742144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24936448
}
],
"md5sum": "12a49704c778b6a31e8f1efb7c9e923e"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "818cfcbf8531cf65a7d397a804468378"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "587f643935715b665a0e7f9396801ee9"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d7749a2b7b61962595807e3b2a3a93fa"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c2d9ed7acb38b3da704a3ddf48eda8b5"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "de3849c66602043661245ebbea64db7d"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "49032aba6edb6b799dc6e5e38952c714"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3f7727b0a02e1ebbc44064efb0ee295c"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "816674667ad61f0da84e74e82e4f8020"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "8b7635fb3a865e741f1f5e95430e9400"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7c1b2b596d04b8c176c572e510ae778f"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "920fd981060345f7a5621d1ff20f1d83"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "43ef41abe17891ca8d5b1a861d2acd10"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "669a79fa07d91f1f6581caaf4f3eb827"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "79b39648b67acf36ec4dc4efc2db1e18"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ded61e49111dc67a58a31aa55aa1cdc0"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5250d9ae4c095b9517eb026376763640"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "a086294931beb0e7f833b92bff52f51a"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "670c64a915deee1c7dafa4722c58a0e2"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5d027f1bc3e6d8d651882a0c6fa0dc5f"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6bdd48243493ce610c61601790f0e309"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e9ff946ef084411488fb74367c5c4b68"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9c1b2e4f4603b2bb50a762343b8baa2e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b8b27e71004c65791464132cc089a287"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6d7a14a76bde296d01dc7dd7c4f9ac57"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "50a1626483aead9e11ecfe695b9bd42f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "32cf1e876ea31c91e7b31ef63bee9e6c"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "4e062d758c95c462e0ee18b5cc8fb93b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3adeafc5c849fe487bbb08868255e8ab"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "01dd312e7596c7d6035bf5ec7cd67462"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0bc70b4863db47e1d2e4b689c22524c8"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "56069fa7349ddc8e14ca7fb0f491d2b6"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "de3b12819d84560ab0331667c4597f33"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9c37b8ddc28950b8e807573905dd9cca"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c1d10dbe165b65b43a844fbcf70777a0"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "373fd7768d9d90f396741b0baa176f08"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "b68234bd761bfe4bcf80a2e42537319b"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4fe6008f964d4626aa3c1e28894f0536"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "59a18fe3088902d8cee532131e83837d"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "756e8a98bf630fef8bd724311dd99a9d"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "aff786611ed65110257cd2c06ea374d9"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "cc00da699319ee7a2464b4f25bb43c4d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "dac004907afed8b4bb0852aece3afb00"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "604e9714563b9468305a4a5eac88f07a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "46dfc4371269e1e42604edaa4a999b52"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "035d61171ffece8c38a88565e7a7e9e2"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "b67a37667c1e845945dbb50ac6463b06"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3cd163792d3e42f303ba27a764b8712d"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "20d02af420839973b9836745ba79073a"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "655c25cc1b2ff033668d1b67c853981a"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4da18e569618cb9fc5d803294cea427b"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d7b31c523d661c6589d412cef72d9233"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4645b7d61388817dd904a13dfc161f8f"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "e4e6ff92168156f32fe4747e4192b7a2"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "64b0657c588b2963439f0cd3a2e55c43"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "59f211420efe31204f9f59ccdcf96ab7"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "21f09d9496cae9f7962e7219bd75f1d4"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d5de3799cd01c1871295a59c0dbbd277"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9b6710f0bcd437c9f35b23d232765d9c"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "66e92e11342d553389db02199af3cfa6"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "80718b8846a5e1c2e0b134c66b5c8a86"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "1ede28c7a85fe8fc552810860d5937df"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "13df5cc233f060882245296fc6e08a65"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "44e126b920fc7d41324e364beb16ee52"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "104f95355ce16092c0351c7b20f3c8ff"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8f2f9111ceff1a505d8a17571521bbd1"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2f632349ad4a3dd10249630cf9669168"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "09d53e021054b3d40e2f62cd284f4f7d"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "1b0a0d8b371accfd743218e8c21c37e9"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a613dc8b74173b89a94ac285c0ae630f"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6e3ed1516b3909c06df0a937aeccacc8"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5bee65c1bb54bc1ef465e69e3bf08dfa"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "082af15877046a3d7b8e66dc9a4b4600"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "da33113672e3043f8c091440b6ab69c5"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f14002a8589cbe6485b5483b673a8192"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "43ef55c5f793b862188c154e98634a3a"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "233101aca570045c56d3e22b1c5b3f9a"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "446e58dee2f62cef6d9f85197e0ebc69"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "efbec170186ac924f381dcae22493290"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "43e84e9f3641cc42b588c106abde79eb"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5f684d58f5fdc3b9a4ba82766178d7f8"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "86c271074a9fb90c42abc02d8484ee17"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a0f39f615c389305d80b9cfcd0169169"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7b4853b9c05547857503aa7b3869eddc"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b5ad98a4f53751ba4f05a5449063b6b1"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "8d6a67f603f49864f9c70d306434d5bc"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "33b2563a867018a48c352619e87e608b"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c64586f6a68d86e1ed112b0f7e7935a6"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6b33e6d96ffeaef495cf5152a3f2af9b"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "23c7f453d65d9431ef7a8a1fb26e91a1"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1348663d62f9656e2aea9d8eb9626a19"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d4c384f24dde9f6f2364384a0b64c6e3"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "95e1be1912f556ac81a577404584f96b"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6804bbb340afd153bb43fe0b371e7838"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "f4243df377212548f5aeb65b10c513ea"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a83cb83c2ade5189040542d9a1aaed3e"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e0f43fea67156e27ebb00f879db9f793"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a50ef1e70814a1d36c97c51743a0b7f4"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "82b3f43f9fab681d0d96c651f798e02e"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2910c9316ef3786ae1ee78713e94515d"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2e187cce2c2aa88b59abaeb1c0086ef2"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "17b92f94025ec5fa0efab668a1917353"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f33c169f82ef2f620b22f0fdc664cd7e"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6ce48169abc82f022b54cf9dad5e9a0f"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "fb68802795e165edb0a6012da8e4fde4"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "964dcd09f2353aabbd213fd8f289dfd4"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b69cfcd8405e0cd0b1071c146b809c41"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "affae6e41d14097be0f1f24b3468ab20"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bcc9bdad778f5700812a9e3b68d87a13"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "cc08f89a961ca49cac6a50451bf82964"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f4d815ad2c8d29f1da8a47ab94fe5c1a"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "974cfdd58dfc70d2d767e5768f998296"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "91abd4c55116a46654ed73510ca4c843"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "909bf878793040ec6a4e0fda148e0781"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "34c960dcd82556ce4a69c26cc80c5bbd"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "03539b79f84f3b80b009e11a626938e1"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3add3feab898524115600007535e76cc"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6451eeaf85040ac11571f00cf35cd402"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "66ced6c83c59559a77c35bc43d595dfb"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "2c6ea24b7a49d169b85bf4533f6c026e"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "74ade524b99e887eedaecbc5fe596520"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a26cf5507a2db8ac64986ac5dc2fc3d9"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "7c574cc741c3ada51833911c3ae20430"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e4e837c4df1566a277a65cc1bcadb859"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "da7de7702eef8e6df70622c17a2cdfd1"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "be5635ea848fddd2a1da52e96911e500"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0f964cf9b5a64fca54f251815e9272ea"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "043297009f15b6b2cfded30315a63902"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "4ead52b8dbe462c8ad805126121ba5cf"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6782c57d9dbb6ed181a16a02d6a63fae"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5e60b98b300a6fc7e49c68fbd7b6ba1a"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "21fedba317847a73da97e6baee3f6ef2"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "bb989e56e2f0d4f82dc25b0d129a0544"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e328922ede239cf1b7f844e659b4401f"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e8f95f1f65b203e23a71eca8af4585bd"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "7260d01a7ceebb08799d864920923e6a"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "cbd910125308c0798261b147149efb7a"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3323d8ae6a4002fca716e5012106f489"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7eff2f7fb0027dcc56af4831e60813ad"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4b98b3d80780c4b11211bcb74462a876"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6d86db4e59932a067cc4a544a90fb47e"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "364b691339c7e78658878bee711d4a01"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4b6c393aafaddb53d90f001424404eb1"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a653decc84d22ad2154809dda4a391eb"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "34e97690e1cc300814d86f6af8f6617e"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1f7cee9a2b90d6ee2922ab7507f3ca36"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "e73655f035b28aa81478ea2e80d5fc2d"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "34dea69f579474827868a6fe19c8c41e"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6de5f1c3ced4a3a4491a544a93b9fa10"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f8c232c8f3f0d9f721d706ce5f465b7c"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "db75bd519e582ae60dd0be998c8fbbe0"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b2c8e55cc6ad0ad2a86eecc8cd57651d"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0f4e5ff7e5828d7dc1cecbc0de5869ee"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11288576
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 11304960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22577152
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22593536
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27836416
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "b3f32cbe4cff8863f4e21ded6824161f"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5d8e0e751fd8a223912e5c79dc428593"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "32a9bfc6935786a7436a314ca284cbd1"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "342dad2a9e7a92d2fe6135d648b3feef"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0dc6ca92099fab488e00fd4af5214b8e"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "42c7f5e9e5024f5357b89c039bb0e55a"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e261cad06e7398cd71996862df1ec7ea"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "eb792ba24fd59ce376e5778d7c228101"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f77c0cae72072ad5981bed9f06bbb980"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "5111a8d65755cd1f323cb15c4df60077"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "508dbb3f23bb7d1152cd269b448d3492"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "67f89e94283051b80e73dd1dcd3575c1"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "808247ebd47de2da2633fd14d79ff286"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "944568005b88716de2382ab9983435d0"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "47f2b2b1b5ef2760f1151d343b222d11"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ad57f9fe9ec7f98731626a10ebeabcdc"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "25da8de9906baa74937656a8ba0af3a4"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "b345d2a575799783259b6be3d02b5232"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "53ca51898590bd67af23658db2349756"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f39de6f590b12af18e7ead18cfa4f607"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a816555eb2b9bddbe19e76f64834974d"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9ad2c99ffd9ff828a659ce1abf1c16f4"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "698bfd080b625ce71fdafd9ccaac218b"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f7fd9bdd68478da8ce3eda2352f4f018"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "59571d0ccd7dbed15aeb3f47bf1f3676"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2b39a42234915bfa48e2792e76c979fe"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "68b37e6b26b2429eaf3340a7d69b0052"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0f703e975fcb8f76ad8cef96d338a6df"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3f115ac7c2a452a549aee3d378784b6c"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0837ccc73993ad6f63e44ad9cd905286"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 31981568,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9437184
}
],
"md5sum": "f8bfe91180ce0352e7e4a150cd8c12cb"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3e098055e78889bba34e54d709f8ed84"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ee49ac7cdc254f6ceff88fe847a41d09"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a2110f481b846f26d65907b72d782e98"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "740928be4ebedacfa284002741564498"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e101625a9a22c4c894967724f595e1ad"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "171edf5c6b339c1939d72cb73e425b7d"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "d992dd076400bb6e190e1f48951c940b"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6a97409e208908c0a8695f96b6a1d7a0"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9a244d0d44b08daac46e3f49541ace94"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f5c0cdd9adcd5070503008437832bbcb"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b555415ac064fc432d356e5f3ae93ba2"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c7a3153d4792175a12adddf13363ca6c"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "52aa47514bb10c0750802030f72bfb40"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6739b3be349305e723b94077579238c2"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "10112ba47e8c56b0c5178945fa65f71e"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "78a988c760c335f08c561631bd979585"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "baca70b12cd72f546ef45d05959d55d0"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ed6c2d38092dad62689dba0431a51e75"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "782ecf4c5a914f05b8a134caa43c0420"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1545916e65e30d31dc02fdf4c4acd2aa"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a69a11c58a7154432ce95e8ea60de6d8"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0b3810e521bd7ab53bffa21129875e3c"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e5eadefc0b6cb8d867369eaa65a89d53"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "86a06a240eecaa470802ada7253173a6"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4e03d3ededa52bbbb2b301da0193888b"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d6157ef3dfeac66fb82c1b6bb796e83c"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "196640fdc3f825f91c4f53cde02c2394"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "563c02eded945acdc600b694dc0eee66"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1035d5d12645264434cf12ce6f48f5a6"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bb59e9fe975019a517358f45363c3ada"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e8098889bbaabcd36b16fd5d3f8a8e7f"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "afe1f94bf3e01b1a6007a91025b316fc"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "756c35a7958e5af0e276fb414d322768"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "41802b18e131dcd7a0023c70a02b99a6"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c389bf112c345558db7cd9a43afabbd6"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ee3c5d7cb561c122cd52154428055be1"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "c585ebf21c93f1e50a55b9a3a5e7dbc0"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "acab732c62415e11755d9cfd402dfca9"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3d5e304ff5bc4da6bd5d5ac9252d446a"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3d7ba43ae447bd361572872330a18c2f"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 30162944,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
}
],
"md5sum": "f2b33828e0546e030c8d04aaa0d85909"
}
]
}