junrushao's picture
Initial commit
3248360
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 15201296596.0,
"BitsPerParam": 3.6039124212684372
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 104963280,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32001,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104963280,
"byteOffset": 0
}
],
"md5sum": "93c92d3034461698963b3433c6d89f57"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "59e0b2a395ff9b526d5896b7b55965e7"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f0b0b4b2a602cc8034e5fbdbf7e9e26a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "5c56d80f283337f5ea618a044eac9550"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "c1ab2872ce208d1859a02b9cbdff6bae"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "52b9a5fd070b69457b0c82778fc931a6"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31241114,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32001,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13120410,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 13120410
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 13136794
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22164378
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22180762
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 22197146
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31224730
}
],
"md5sum": "52233f53643bd49b51077c7ed81bfce9"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "6ade92bb1a2a6af81106e8214237dc01"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "49b3d1d541e2eebce7971c2847a00797"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c99d7a4cb0b53ef2a795fe2736b31234"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "2ae46bc770c53547191afe2353550c0f"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "04f64778f2fb1ecb1e7b29506b25767f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "8c1821e589e55fd8d79fd7d1fa061aa1"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "fe971b41ea9fcf1a6c04707fc123eb45"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "9570d390e80fe582779dc5537c1c37dd"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "6a82c3ee78ce7a65887db0b3191028ca"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "c17e0972cdb1749fdf13075985e534e8"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "606f6efd8c03b6e392882687bd45f645"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "2230f942c64b3fe5e54b24f6830b7d45"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "83211957771c08c0f6fb443d5005944e"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "e6ed220e2747f1710b13e2467fcfa9ef"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "16ada9c07eecc437278b4a866b7d9065"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "030ac8a136406772b6ff229ff3c28a27"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "396e6f63f50763912ddf3e7e383d6f2a"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "36d9508ea73a7459fe8b36d0f4745145"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "e9f58d03bdec42a8e21494020880ddf0"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "32ac8f262b419b409d53ead8ebc079ef"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "d4bb3babe576f2cac2132b49b36dacdb"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "b7909641c867db545761db0d98eedbfc"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "8cb0c700cd526d5071afd71b5b5afd4d"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "42afbf52975887a5435c12f1a067479d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "b128700377210ea82ed26ab685525758"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "fdf614629a18a5381da9a96047fd926f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 104963280,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32001,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104963280,
"byteOffset": 0
}
],
"md5sum": "79c5421bdf07b40173055e6ae9df1f20"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "27fa72d3c4acf57d46da21a43ccd5e85"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "d50e666af4349d8b3ef80538b3d1b19b"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "4f5fbae4dd655034c60614a9fd1c71db"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "440a787e513321a0a03f6bd65e4a1656"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "4fed16912ae2fffb438ccedafcc9711a"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "9699e3fd2a34e1921b49315121d7ce15"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33129370,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32001,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13120410,
"byteOffset": 3375104
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16495514
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 16511898
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25539482
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 25555866
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 29754266
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 33112986
}
],
"md5sum": "da4b63b57d2f5733055653c6599c43c6"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "339b7f8f87b818144b31b595b2c22e0b"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "d813918b3472f92bc9f7d251bba0fb0b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "03725b2336fba22a9ba5f5688295d2f4"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "df18f34ffda694586c72ae96cb15a2ae"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e3aafb2bc69892a9dccb724350f9a35e"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "5262ff768083fef4fb42ab6db012022f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "110b879edcf171d29219e6b62a038c68"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "79bea5f0ca691a29bffc020cfbeccbeb"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "aa81961df9b1f81e1e60a1592031d9c9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "bec7abf879806e308606a8958192ca02"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "66cc3448c539e7675716ab75621203ae"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "09da626999963ddc4ef2344c0fb931a4"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "036b8fc18528f004ab5cdf18fd65bd2e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "d04225390be085073a3cc1959e51b4c2"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "8377e7454d8268c060bc67e5321d43a9"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "d74fd1da33b2eda821efbb02298f4304"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "0ff6c3469c431d3c820273748c8a1b8b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "644bbc33ecbc342d82d01f6d02ca7c5e"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "d2f904fc3fe7b24383250a1bc57faaa2"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e15edd85d31ec6336aed25bd0847dcd1"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "3209a41c6ad6a270ac5d7482a0a6a2ad"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "297af47bb67e1c27a5e67ba51e25e044"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "73ad9ec11d1f78bca3590a4eae394053"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "7d27bf3babccd8fa4de5509e13360bb8"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "348c05e6c2aacf6efca32696b32c022a"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "67ef99f2355dbbbbea4aa6f02d714e73"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "9259154b7abc8a9d763fc210dd15feba"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "b28bf60a6687eeadae91267a7316f5e1"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "861706a83746dfcd33aba8fd9ba5633d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "58683c9d095544e3cd19c99a06a0a710"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "ba511ec74bcb652ce532387a246fc63c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "4ce5b688a10c06eba486f75b5ef650e2"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "3201db0708b31e82943fc5420397ae90"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "10fc2df0d953e574683eda6450cd509b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "1c2faf0abbff58a937d8622aed4a4d3a"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "5519bd567aeab86213cc72727f71812e"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "3cc6c3412346c5d989ee89a0e84f4c0b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "4a78300c97fc19cf8cc2838635591838"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "4f7f381821a558a41dc6b9093031aa42"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "192ed3ad89f221bb7212f2636990e7cc"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "4db4cdf117406af641e1a9665db978e6"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "775ce60b47259552eb4e77971ea2b705"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "bdcccdfbad7c825fcacf7a04e425c345"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "18a2d5c0c8d0ff81546b42ed8963c15d"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "d07ddb3947b73cac15155dbc76a88989"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "7b0c680fe96bda01a0c1ee4d36356363"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a1e63444ea256ac1d357201ebfd1bf93"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "1447ca738b4454a05489236de9cbe4e5"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a5410fcece138ac573270cf92e28b341"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "0ab67be4bda0ebba66840ca2b03b941c"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "530559f9849e3bad3272fcf87fbee89d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "9a87a570723568ab78113893eeea5aee"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "b3e1df5a3904f4637d5c4d971f2dc76d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "36e23ea62e1d6475980c406cb672ddd0"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "abad539d35fe6f85048da8e04631ad87"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "34ff4b5e0491aeb553ea2d46455b5f93"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "ac54be5de2833fd7c03d08a728ff231a"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "a4f2623ce558341506cc496ad5e1f1bf"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "b6caac17760cec7348b4f07f4fced33c"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "9241e235ba0023a08154b06e39919949"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "8b03b5d803be3d4a633890cdf2a49727"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "f5c96fb9aba15f5ff0774d50a94b3ef4"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "cfbba49b38c8653a66ead63432d5182c"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "88cdc9facfc8af39be7ee6016fc3c045"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a96305b17fb5d3590624ae53e8235267"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "82838994172fd628fdf384dce3af887f"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "add436cfbe2236b3f757ee5bc9665540"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "f2580d5b381e928f411a3546675385c1"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "1c311a4e48bd588848d005e4d812e9a5"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "2815fd93d3baff3f2622eae2b89113ed"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "721bcd1accb2c54f6d107d60716e2885"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 29052928,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12402688
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12419072
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 12435456
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 21463040
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21479424
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25677824
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29036544
}
],
"md5sum": "16d48c27048bb3db3b8673ee9437d625"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "1053bdb89ea523c9d73744a3e8a5ce23"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "6eb3f4cbe22d88dd292a7beffaaec174"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "2e577b3e7d1b08744b1a852214aa4bef"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "d00a04c7a299ed1df0c7bf09b28db406"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "35d74363a0566fbab2599eb17d2f74b6"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "f4aeb92d5089a837e9840370f58afef0"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "ddb6eb01e95c4881918f0c81f50bdcda"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "fb08dbb414ec9e3ca62b2ccc0bb66f18"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "8e467605208ebac08d9712f7c67041c4"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "4ee5494d936186972a5052a6b1eceb2a"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "8b171f406d1d7feb7903dde2d40db905"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "bec0ba9f7c63f646766a8485c50a91fa"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "516d15ed3569394bb5ac134820aa751a"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "585b414ed7e396035294a945d71fd39f"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e45eb4962ba091709109693b8821caf4"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c8e4852b5e7b90f7cc013201e918a99d"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "47e5158f91d5c86fa27830b2451881c7"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "0998c27cc590ed818f87d73e26c9c28f"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "d7a4546c410ccb02895f89fc77de0a64"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "18dd11037fae63de90d20cdd47553910"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "04ad399c92ae4223ec39b8bb56bc69f4"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "612a66e66220dcaaca45b2810c4e4676"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "ec59c3de5511062f0bc60d5bd00d9717"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "159d5e0f06d228dedb1d267a9b9963bf"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "307feaab755c381a27367cf8a42c5152"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "bd405ea1517dd2d6fa97bd34d487affc"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "5fabc486c5c8a7fc5d4aec2423b143e6"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "bd1f27b94cd4a9e8914a1736e228593a"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "7a34f6a8b1860602e9a912c9a84244a2"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a52981fdbb0c97f87e626db10584ddb8"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "18615a5eb1c8ba6be4b73a5f9039b9a2"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "0a410fa190586382b47ca35fa6e31784"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c611032e523e5f76c9ee93e1fb877768"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "10cd93a4ff7332bfd8537009cd15ae4f"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f60aee4288b8426f5fbaa13f433f8c93"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "a89eb0ca04b78c79b230622489586f56"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c8513ab4cb7d940d8161b845bbadcd91"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3b5aac4087a82ca4e18a5fdbe206a128"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "aecbdec3cd057526c7da1513a0c61a72"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "6023b710b0e60eb46505f3826c72c670"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "98d2e2ab20b9e2ccacf17ed843d2cff7"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "8a4f29ba4e85483bffc12bf5097c0121"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "9410654818d52001abf8fcfd26549f54"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "de2a9c1217b59d589522f00a62a72b2a"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "31b3ef3ca83a5a244c08d4ededd643c2"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "6eb8271bd5f713a5db2a7896c06691c1"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "62320add52cc7aecb5e9fc25faa80c26"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "ccae2f031d96fdb312cd18830bf71f9d"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "11fe3007148d1b0521b867debd56828f"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "a46a92ff45d6bbbb2c14e0dd7196e0f1"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "43efb54ab61f9ae0f29b159084e68a44"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "5a7926c96432f840ce9f8a6e4be6d106"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "0f72b3177dad9e5cfda1d9dedf93f585"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "fadc1e570af0f969f3ae486870487fea"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "a45f276b94fd2bf56530b2383fb8a024"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "794f99223a88baaf19a2a3bbc27657f2"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "a9d4e49f0bd328a0fd50ecf959f2f539"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "1b5031e28440ac857386bc3e3825eb84"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "c44abf605ccbbc4c49416b93f47ec227"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "71ef79513ada66d49a159b596de3cffa"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "12ffe2ef7d77bb077b9e15d04ab999f4"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "e8c02ee194c5e3d209fe4bb04136a627"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "37d7783b3ccf7211a36ff81b363e336a"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "3f5a1db73f3b46ff22a97bedbbab77d6"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "a9d652b57dff20b3d2fd07c8277440d0"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1800aa40b04ceca84c0bbe040f241317"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c5bf1b02b6e9efd985bc40bab603166f"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "bd2b77b1d99501e4c42a38c029e5f815"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "f18dbdf5b5cccb0f7d24b52d591f9952"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "6f5918cdc46bacbc868f3df759c5ecdd"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f3b422f58387868148e3129f8a542295"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "55f7d38d01b8c6a20f5ad32d526a78eb"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "409ce0f0a60dac21c03bd8b1b5854d90"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "9f98bff014ea410bb9ce370d2473e3d1"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "4a6d152d18345da193761932e7ce2b0b"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "bf2c82cafe502acb05d11e1778031df2"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "01911751f99032f7a57aa8ec05ec98a8"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "62c8b8bf9376e1024363abdcfae26b91"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "f83b1c62eff763dcccabd3cef29addc1"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "826fb46be468ff8259cb054d2d41e2ca"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "aa8d82e61c2fbd438baeefdf5a4cce81"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "c55e95a7b993db51a668d299286d2d40"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "ea6c730e77a5431b5e9379dcc1ac705c"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "85201eea9ee1e405bb55c537ed16309d"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "6d2211ca49540bb219a48d36aa814931"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "c1438a5663b8e282cf6ad9a59df0c3b0"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "9e705d1c1c994cfdf7d47d8d060db763"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "a4689895c97d006b0b6e0d8829fbd94f"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "30eb02152d296f09487a02fcf6f985a6"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "8dacb0687ca953b8f449fadcb7fc536d"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "22999d95d3312718ed61af0ff559897b"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "418c53c9eb7d4b0bcf022587e532663d"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "612c64f639326d777b3909a9f3cf93cb"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "8a73f5d082a19a5a3bb4081a28d75046"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "1ec45ac983a5505ce73be63d0c149d1e"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "bee1164161710d26782842baf2d1a939"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "88d2f8dbf2f8373749851bf6ce782b75"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "f112f0b2d088a1fd89951f3a4d603e06"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "450fd789371847cba2a3bb98a56bfaa4"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "4b4ecbb8cd7c285869a2cc1b3e8069ee"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "d7804525118ed624a22ab7c672b9d223"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "90518705fe1f2d40b9f3c7c5038ccdb5"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "1ff8b044dfb74e27e5ed1c5ffd7070c8"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "a5c6d32514674ea4d38d7dc99dfd46e0"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "a2b32b9b3913c8ff49f63c3490b63d16"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "fb37c8f5212a5765abb292bc994eaac9"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1fd8e965aa476a9b3b31d559c3c3d152"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "4c71cd9e5d0a47e018bc2ba7fd7eb791"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "ecfaa1be1b1b44a6f47eb34940ff7e66"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "7c743c389fd9bf55a2592a41da9d4af7"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "140c022bd65070b5557e25d414d35b66"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "d19f3ca5b7c9486bd72704a89a49f110"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "c304d2de82cda67aa547409848c25bd4"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "0e621f3c255b82187b7ec2731c636ba8"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "382a2678e3d6ccd50ad940e0736c0f8d"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "365c7758f6d455be588244f6f332215e"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "da779bfc2e476c525a420f237d146682"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "2db2ac480646d5d9f1197ba133144d06"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "7fdacec5bc69495133aa1b4de522bdc2"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "f67476e7a9aca7be016043ebdb6814e6"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "4bbe8bd74c18427392099bfa7052fe12"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "c04eb0ec6f78bbb8550faa4746d0847f"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "01341f3bd5f6bb97a69c4ef0b2f1cdef"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "f48a0bd291a15729ad513de9cdaae892"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "80763779b76f4bb7ef9f688aed9d0508"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "13ff20c7609bed19931e4ce869308baa"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "133ac77d2f4f9e4b293b49ce24382e86"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "3c9b1910baca3f30e8d1da48b77fa5b8"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "66e7a46bfd3ff6c670c42a7e1004f245"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "4433b4a9c32b21a68fde5c63d40c64b9"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "639d2819b2ff6522d98397f8b9fb11d6"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "199bf3ea65247df12d9a3d7cc82be493"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "253b430a56018a209765f580238572f1"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "62b156fe7866d016b02e84e5e3f53a59"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "5f8ce8af62c7a61c33f125862f61b5c9"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 28968960,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
}
],
"md5sum": "d727d4112c48b39279b1833343c23444"
}
]
}