leafspark commited on
Commit
de84e35
·
verified ·
1 Parent(s): 764b82e

model: add config and update index

Browse files
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 8192,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 29568,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 80,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 64,
16
+ "num_hidden_layers": 80,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 131072,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.40.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064
27
+ }
generate_index.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from safetensors import safe_open
4
+ import torch
5
+ from typing import Dict
6
+
7
+ def generate_index(directory: str) -> Dict:
8
+ index = {
9
+ "metadata": {"total_size": 0},
10
+ "weight_map": {}
11
+ }
12
+
13
+ safetensors_files = [f for f in os.listdir(directory) if f.endswith('.safetensors')]
14
+ safetensors_files.sort() # Ensure consistent ordering
15
+
16
+ for filename in safetensors_files:
17
+ filepath = os.path.join(directory, filename)
18
+ try:
19
+ with safe_open(filepath, framework="pt") as f:
20
+ for key in f.keys():
21
+ if key in index["weight_map"]:
22
+ print(f"Warning: Duplicate key '{key}' found in {filename}. Using the last occurrence.")
23
+ index["weight_map"][key] = filename
24
+
25
+ tensor = f.get_tensor(key)
26
+ tensor_size = tensor.numel() * tensor.element_size()
27
+ index["metadata"]["total_size"] += tensor_size
28
+ except Exception as e:
29
+ print(f"Error processing {filename}: {str(e)}")
30
+
31
+ return index
32
+
33
+ def save_index(index: Dict, output_file: str):
34
+ with open(output_file, 'w') as f:
35
+ json.dump(index, f, indent=2)
36
+
37
+ if __name__ == "__main__":
38
+ current_dir = os.getcwd()
39
+ output_file = "model.safetensors.index.json"
40
+
41
+ index = generate_index(current_dir)
42
+ save_index(index, output_file)
43
+
44
+ print(f"Index generated with {len(index['weight_map'])} tensors.")
45
+ print(f"Total size: {index['metadata']['total_size']} bytes")
46
+ print(f"Index saved to {output_file}")
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
model.safetensors.index.json CHANGED
@@ -1,8 +1,10 @@
1
  {
2
  "metadata": {
3
- "total_size": 103868809216
4
  },
5
  "weight_map": {
 
 
6
  "model.layers.0.input_layernorm.weight": "model_layers_0_input_layernorm_weight.safetensors",
7
  "model.layers.0.mlp.down_proj.weight": "model_layers_0_mlp_down_proj_weight.safetensors",
8
  "model.layers.0.mlp.gate_proj.weight": "model_layers_0_mlp_gate_proj_weight.safetensors",
@@ -651,6 +653,11 @@
651
  "model.layers.58.self_attn.q_proj.weight": "model_layers_58_self_attn_q_proj_weight.safetensors",
652
  "model.layers.58.self_attn.v_proj.bias": "model_layers_58_self_attn_v_proj_bias.safetensors",
653
  "model.layers.58.self_attn.v_proj.weight": "model_layers_58_self_attn_v_proj_weight.safetensors",
 
 
 
 
 
654
  "model.layers.59.self_attn.k_proj.bias": "model_layers_59_self_attn_k_proj_bias.safetensors",
655
  "model.layers.59.self_attn.k_proj.weight": "model_layers_59_self_attn_k_proj_weight.safetensors",
656
  "model.layers.59.self_attn.o_proj.weight": "model_layers_59_self_attn_o_proj_weight.safetensors",
@@ -670,6 +677,126 @@
670
  "model.layers.5.self_attn.q_proj.weight": "model_layers_5_self_attn_q_proj_weight.safetensors",
671
  "model.layers.5.self_attn.v_proj.bias": "model_layers_5_self_attn_v_proj_bias.safetensors",
672
  "model.layers.5.self_attn.v_proj.weight": "model_layers_5_self_attn_v_proj_weight.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
  "model.layers.6.input_layernorm.weight": "model_layers_6_input_layernorm_weight.safetensors",
674
  "model.layers.6.mlp.down_proj.weight": "model_layers_6_mlp_down_proj_weight.safetensors",
675
  "model.layers.6.mlp.gate_proj.weight": "model_layers_6_mlp_gate_proj_weight.safetensors",
@@ -682,6 +809,126 @@
682
  "model.layers.6.self_attn.q_proj.weight": "model_layers_6_self_attn_q_proj_weight.safetensors",
683
  "model.layers.6.self_attn.v_proj.bias": "model_layers_6_self_attn_v_proj_bias.safetensors",
684
  "model.layers.6.self_attn.v_proj.weight": "model_layers_6_self_attn_v_proj_weight.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  "model.layers.7.input_layernorm.weight": "model_layers_7_input_layernorm_weight.safetensors",
686
  "model.layers.7.mlp.down_proj.weight": "model_layers_7_mlp_down_proj_weight.safetensors",
687
  "model.layers.7.mlp.gate_proj.weight": "model_layers_7_mlp_gate_proj_weight.safetensors",
@@ -717,6 +964,7 @@
717
  "model.layers.9.self_attn.q_proj.bias": "model_layers_9_self_attn_q_proj_bias.safetensors",
718
  "model.layers.9.self_attn.q_proj.weight": "model_layers_9_self_attn_q_proj_weight.safetensors",
719
  "model.layers.9.self_attn.v_proj.bias": "model_layers_9_self_attn_v_proj_bias.safetensors",
720
- "model.layers.9.self_attn.v_proj.weight": "model_layers_9_self_attn_v_proj_weight.safetensors"
 
721
  }
722
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 145412407296
4
  },
5
  "weight_map": {
6
+ "lm_head.weight": "model-00031-of-00031.safetensors",
7
+ "model.embed_tokens.weight": "model_embed_tokens_weight.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model_layers_0_input_layernorm_weight.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model_layers_0_mlp_down_proj_weight.safetensors",
10
  "model.layers.0.mlp.gate_proj.weight": "model_layers_0_mlp_gate_proj_weight.safetensors",
 
653
  "model.layers.58.self_attn.q_proj.weight": "model_layers_58_self_attn_q_proj_weight.safetensors",
654
  "model.layers.58.self_attn.v_proj.bias": "model_layers_58_self_attn_v_proj_bias.safetensors",
655
  "model.layers.58.self_attn.v_proj.weight": "model_layers_58_self_attn_v_proj_weight.safetensors",
656
+ "model.layers.59.input_layernorm.weight": "model_layers_59_input_layernorm_weight.safetensors",
657
+ "model.layers.59.mlp.down_proj.weight": "model_layers_59_mlp_down_proj_weight.safetensors",
658
+ "model.layers.59.mlp.gate_proj.weight": "model_layers_59_mlp_gate_proj_weight.safetensors",
659
+ "model.layers.59.mlp.up_proj.weight": "model_layers_59_mlp_up_proj_weight.safetensors",
660
+ "model.layers.59.post_attention_layernorm.weight": "model_layers_59_post_attention_layernorm_weight.safetensors",
661
  "model.layers.59.self_attn.k_proj.bias": "model_layers_59_self_attn_k_proj_bias.safetensors",
662
  "model.layers.59.self_attn.k_proj.weight": "model_layers_59_self_attn_k_proj_weight.safetensors",
663
  "model.layers.59.self_attn.o_proj.weight": "model_layers_59_self_attn_o_proj_weight.safetensors",
 
677
  "model.layers.5.self_attn.q_proj.weight": "model_layers_5_self_attn_q_proj_weight.safetensors",
678
  "model.layers.5.self_attn.v_proj.bias": "model_layers_5_self_attn_v_proj_bias.safetensors",
679
  "model.layers.5.self_attn.v_proj.weight": "model_layers_5_self_attn_v_proj_weight.safetensors",
680
+ "model.layers.60.input_layernorm.weight": "model_layers_60_input_layernorm_weight.safetensors",
681
+ "model.layers.60.mlp.down_proj.weight": "model_layers_60_mlp_down_proj_weight.safetensors",
682
+ "model.layers.60.mlp.gate_proj.weight": "model_layers_60_mlp_gate_proj_weight.safetensors",
683
+ "model.layers.60.mlp.up_proj.weight": "model_layers_60_mlp_up_proj_weight.safetensors",
684
+ "model.layers.60.post_attention_layernorm.weight": "model_layers_60_post_attention_layernorm_weight.safetensors",
685
+ "model.layers.60.self_attn.k_proj.bias": "model_layers_60_self_attn_k_proj_bias.safetensors",
686
+ "model.layers.60.self_attn.k_proj.weight": "model_layers_60_self_attn_k_proj_weight.safetensors",
687
+ "model.layers.60.self_attn.o_proj.weight": "model_layers_60_self_attn_o_proj_weight.safetensors",
688
+ "model.layers.60.self_attn.q_proj.bias": "model_layers_60_self_attn_q_proj_bias.safetensors",
689
+ "model.layers.60.self_attn.q_proj.weight": "model_layers_60_self_attn_q_proj_weight.safetensors",
690
+ "model.layers.60.self_attn.v_proj.bias": "model_layers_60_self_attn_v_proj_bias.safetensors",
691
+ "model.layers.60.self_attn.v_proj.weight": "model_layers_60_self_attn_v_proj_weight.safetensors",
692
+ "model.layers.61.input_layernorm.weight": "model_layers_61_input_layernorm_weight.safetensors",
693
+ "model.layers.61.mlp.down_proj.weight": "model_layers_61_mlp_down_proj_weight.safetensors",
694
+ "model.layers.61.mlp.gate_proj.weight": "model_layers_61_mlp_gate_proj_weight.safetensors",
695
+ "model.layers.61.mlp.up_proj.weight": "model_layers_61_mlp_up_proj_weight.safetensors",
696
+ "model.layers.61.post_attention_layernorm.weight": "model_layers_61_post_attention_layernorm_weight.safetensors",
697
+ "model.layers.61.self_attn.k_proj.bias": "model_layers_61_self_attn_k_proj_bias.safetensors",
698
+ "model.layers.61.self_attn.k_proj.weight": "model_layers_61_self_attn_k_proj_weight.safetensors",
699
+ "model.layers.61.self_attn.o_proj.weight": "model_layers_61_self_attn_o_proj_weight.safetensors",
700
+ "model.layers.61.self_attn.q_proj.bias": "model_layers_61_self_attn_q_proj_bias.safetensors",
701
+ "model.layers.61.self_attn.q_proj.weight": "model_layers_61_self_attn_q_proj_weight.safetensors",
702
+ "model.layers.61.self_attn.v_proj.bias": "model_layers_61_self_attn_v_proj_bias.safetensors",
703
+ "model.layers.61.self_attn.v_proj.weight": "model_layers_61_self_attn_v_proj_weight.safetensors",
704
+ "model.layers.62.input_layernorm.weight": "model_layers_62_input_layernorm_weight.safetensors",
705
+ "model.layers.62.mlp.down_proj.weight": "model_layers_62_mlp_down_proj_weight.safetensors",
706
+ "model.layers.62.mlp.gate_proj.weight": "model_layers_62_mlp_gate_proj_weight.safetensors",
707
+ "model.layers.62.mlp.up_proj.weight": "model_layers_62_mlp_up_proj_weight.safetensors",
708
+ "model.layers.62.post_attention_layernorm.weight": "model_layers_62_post_attention_layernorm_weight.safetensors",
709
+ "model.layers.62.self_attn.k_proj.bias": "model_layers_62_self_attn_k_proj_bias.safetensors",
710
+ "model.layers.62.self_attn.k_proj.weight": "model_layers_62_self_attn_k_proj_weight.safetensors",
711
+ "model.layers.62.self_attn.o_proj.weight": "model_layers_62_self_attn_o_proj_weight.safetensors",
712
+ "model.layers.62.self_attn.q_proj.bias": "model_layers_62_self_attn_q_proj_bias.safetensors",
713
+ "model.layers.62.self_attn.q_proj.weight": "model_layers_62_self_attn_q_proj_weight.safetensors",
714
+ "model.layers.62.self_attn.v_proj.bias": "model_layers_62_self_attn_v_proj_bias.safetensors",
715
+ "model.layers.62.self_attn.v_proj.weight": "model_layers_62_self_attn_v_proj_weight.safetensors",
716
+ "model.layers.63.input_layernorm.weight": "model_layers_63_input_layernorm_weight.safetensors",
717
+ "model.layers.63.mlp.down_proj.weight": "model_layers_63_mlp_down_proj_weight.safetensors",
718
+ "model.layers.63.mlp.gate_proj.weight": "model_layers_63_mlp_gate_proj_weight.safetensors",
719
+ "model.layers.63.mlp.up_proj.weight": "model_layers_63_mlp_up_proj_weight.safetensors",
720
+ "model.layers.63.post_attention_layernorm.weight": "model_layers_63_post_attention_layernorm_weight.safetensors",
721
+ "model.layers.63.self_attn.k_proj.bias": "model_layers_63_self_attn_k_proj_bias.safetensors",
722
+ "model.layers.63.self_attn.k_proj.weight": "model_layers_63_self_attn_k_proj_weight.safetensors",
723
+ "model.layers.63.self_attn.o_proj.weight": "model_layers_63_self_attn_o_proj_weight.safetensors",
724
+ "model.layers.63.self_attn.q_proj.bias": "model_layers_63_self_attn_q_proj_bias.safetensors",
725
+ "model.layers.63.self_attn.q_proj.weight": "model_layers_63_self_attn_q_proj_weight.safetensors",
726
+ "model.layers.63.self_attn.v_proj.bias": "model_layers_63_self_attn_v_proj_bias.safetensors",
727
+ "model.layers.63.self_attn.v_proj.weight": "model_layers_63_self_attn_v_proj_weight.safetensors",
728
+ "model.layers.64.input_layernorm.weight": "model_layers_64_input_layernorm_weight.safetensors",
729
+ "model.layers.64.mlp.down_proj.weight": "model_layers_64_mlp_down_proj_weight.safetensors",
730
+ "model.layers.64.mlp.gate_proj.weight": "model_layers_64_mlp_gate_proj_weight.safetensors",
731
+ "model.layers.64.mlp.up_proj.weight": "model_layers_64_mlp_up_proj_weight.safetensors",
732
+ "model.layers.64.post_attention_layernorm.weight": "model_layers_64_post_attention_layernorm_weight.safetensors",
733
+ "model.layers.64.self_attn.k_proj.bias": "model_layers_64_self_attn_k_proj_bias.safetensors",
734
+ "model.layers.64.self_attn.k_proj.weight": "model_layers_64_self_attn_k_proj_weight.safetensors",
735
+ "model.layers.64.self_attn.o_proj.weight": "model_layers_64_self_attn_o_proj_weight.safetensors",
736
+ "model.layers.64.self_attn.q_proj.bias": "model_layers_64_self_attn_q_proj_bias.safetensors",
737
+ "model.layers.64.self_attn.q_proj.weight": "model_layers_64_self_attn_q_proj_weight.safetensors",
738
+ "model.layers.64.self_attn.v_proj.bias": "model_layers_64_self_attn_v_proj_bias.safetensors",
739
+ "model.layers.64.self_attn.v_proj.weight": "model_layers_64_self_attn_v_proj_weight.safetensors",
740
+ "model.layers.65.input_layernorm.weight": "model_layers_65_input_layernorm_weight.safetensors",
741
+ "model.layers.65.mlp.down_proj.weight": "model_layers_65_mlp_down_proj_weight.safetensors",
742
+ "model.layers.65.mlp.gate_proj.weight": "model_layers_65_mlp_gate_proj_weight.safetensors",
743
+ "model.layers.65.mlp.up_proj.weight": "model_layers_65_mlp_up_proj_weight.safetensors",
744
+ "model.layers.65.post_attention_layernorm.weight": "model_layers_65_post_attention_layernorm_weight.safetensors",
745
+ "model.layers.65.self_attn.k_proj.bias": "model_layers_65_self_attn_k_proj_bias.safetensors",
746
+ "model.layers.65.self_attn.k_proj.weight": "model_layers_65_self_attn_k_proj_weight.safetensors",
747
+ "model.layers.65.self_attn.o_proj.weight": "model_layers_65_self_attn_o_proj_weight.safetensors",
748
+ "model.layers.65.self_attn.q_proj.bias": "model_layers_65_self_attn_q_proj_bias.safetensors",
749
+ "model.layers.65.self_attn.q_proj.weight": "model_layers_65_self_attn_q_proj_weight.safetensors",
750
+ "model.layers.65.self_attn.v_proj.bias": "model_layers_65_self_attn_v_proj_bias.safetensors",
751
+ "model.layers.65.self_attn.v_proj.weight": "model_layers_65_self_attn_v_proj_weight.safetensors",
752
+ "model.layers.66.input_layernorm.weight": "model_layers_66_input_layernorm_weight.safetensors",
753
+ "model.layers.66.mlp.down_proj.weight": "model_layers_66_mlp_down_proj_weight.safetensors",
754
+ "model.layers.66.mlp.gate_proj.weight": "model_layers_66_mlp_gate_proj_weight.safetensors",
755
+ "model.layers.66.mlp.up_proj.weight": "model_layers_66_mlp_up_proj_weight.safetensors",
756
+ "model.layers.66.post_attention_layernorm.weight": "model_layers_66_post_attention_layernorm_weight.safetensors",
757
+ "model.layers.66.self_attn.k_proj.bias": "model_layers_66_self_attn_k_proj_bias.safetensors",
758
+ "model.layers.66.self_attn.k_proj.weight": "model_layers_66_self_attn_k_proj_weight.safetensors",
759
+ "model.layers.66.self_attn.o_proj.weight": "model_layers_66_self_attn_o_proj_weight.safetensors",
760
+ "model.layers.66.self_attn.q_proj.bias": "model_layers_66_self_attn_q_proj_bias.safetensors",
761
+ "model.layers.66.self_attn.q_proj.weight": "model_layers_66_self_attn_q_proj_weight.safetensors",
762
+ "model.layers.66.self_attn.v_proj.bias": "model_layers_66_self_attn_v_proj_bias.safetensors",
763
+ "model.layers.66.self_attn.v_proj.weight": "model_layers_66_self_attn_v_proj_weight.safetensors",
764
+ "model.layers.67.input_layernorm.weight": "model_layers_67_input_layernorm_weight.safetensors",
765
+ "model.layers.67.mlp.down_proj.weight": "model_layers_67_mlp_down_proj_weight.safetensors",
766
+ "model.layers.67.mlp.gate_proj.weight": "model_layers_67_mlp_gate_proj_weight.safetensors",
767
+ "model.layers.67.mlp.up_proj.weight": "model_layers_67_mlp_up_proj_weight.safetensors",
768
+ "model.layers.67.post_attention_layernorm.weight": "model_layers_67_post_attention_layernorm_weight.safetensors",
769
+ "model.layers.67.self_attn.k_proj.bias": "model_layers_67_self_attn_k_proj_bias.safetensors",
770
+ "model.layers.67.self_attn.k_proj.weight": "model_layers_67_self_attn_k_proj_weight.safetensors",
771
+ "model.layers.67.self_attn.o_proj.weight": "model_layers_67_self_attn_o_proj_weight.safetensors",
772
+ "model.layers.67.self_attn.q_proj.bias": "model_layers_67_self_attn_q_proj_bias.safetensors",
773
+ "model.layers.67.self_attn.q_proj.weight": "model_layers_67_self_attn_q_proj_weight.safetensors",
774
+ "model.layers.67.self_attn.v_proj.bias": "model_layers_67_self_attn_v_proj_bias.safetensors",
775
+ "model.layers.67.self_attn.v_proj.weight": "model_layers_67_self_attn_v_proj_weight.safetensors",
776
+ "model.layers.68.input_layernorm.weight": "model_layers_68_input_layernorm_weight.safetensors",
777
+ "model.layers.68.mlp.down_proj.weight": "model_layers_68_mlp_down_proj_weight.safetensors",
778
+ "model.layers.68.mlp.gate_proj.weight": "model_layers_68_mlp_gate_proj_weight.safetensors",
779
+ "model.layers.68.mlp.up_proj.weight": "model_layers_68_mlp_up_proj_weight.safetensors",
780
+ "model.layers.68.post_attention_layernorm.weight": "model_layers_68_post_attention_layernorm_weight.safetensors",
781
+ "model.layers.68.self_attn.k_proj.bias": "model_layers_68_self_attn_k_proj_bias.safetensors",
782
+ "model.layers.68.self_attn.k_proj.weight": "model_layers_68_self_attn_k_proj_weight.safetensors",
783
+ "model.layers.68.self_attn.o_proj.weight": "model_layers_68_self_attn_o_proj_weight.safetensors",
784
+ "model.layers.68.self_attn.q_proj.bias": "model_layers_68_self_attn_q_proj_bias.safetensors",
785
+ "model.layers.68.self_attn.q_proj.weight": "model_layers_68_self_attn_q_proj_weight.safetensors",
786
+ "model.layers.68.self_attn.v_proj.bias": "model_layers_68_self_attn_v_proj_bias.safetensors",
787
+ "model.layers.68.self_attn.v_proj.weight": "model_layers_68_self_attn_v_proj_weight.safetensors",
788
+ "model.layers.69.input_layernorm.weight": "model_layers_69_input_layernorm_weight.safetensors",
789
+ "model.layers.69.mlp.down_proj.weight": "model_layers_69_mlp_down_proj_weight.safetensors",
790
+ "model.layers.69.mlp.gate_proj.weight": "model_layers_69_mlp_gate_proj_weight.safetensors",
791
+ "model.layers.69.mlp.up_proj.weight": "model_layers_69_mlp_up_proj_weight.safetensors",
792
+ "model.layers.69.post_attention_layernorm.weight": "model_layers_69_post_attention_layernorm_weight.safetensors",
793
+ "model.layers.69.self_attn.k_proj.bias": "model_layers_69_self_attn_k_proj_bias.safetensors",
794
+ "model.layers.69.self_attn.k_proj.weight": "model_layers_69_self_attn_k_proj_weight.safetensors",
795
+ "model.layers.69.self_attn.o_proj.weight": "model_layers_69_self_attn_o_proj_weight.safetensors",
796
+ "model.layers.69.self_attn.q_proj.bias": "model_layers_69_self_attn_q_proj_bias.safetensors",
797
+ "model.layers.69.self_attn.q_proj.weight": "model_layers_69_self_attn_q_proj_weight.safetensors",
798
+ "model.layers.69.self_attn.v_proj.bias": "model_layers_69_self_attn_v_proj_bias.safetensors",
799
+ "model.layers.69.self_attn.v_proj.weight": "model_layers_69_self_attn_v_proj_weight.safetensors",
800
  "model.layers.6.input_layernorm.weight": "model_layers_6_input_layernorm_weight.safetensors",
801
  "model.layers.6.mlp.down_proj.weight": "model_layers_6_mlp_down_proj_weight.safetensors",
802
  "model.layers.6.mlp.gate_proj.weight": "model_layers_6_mlp_gate_proj_weight.safetensors",
 
809
  "model.layers.6.self_attn.q_proj.weight": "model_layers_6_self_attn_q_proj_weight.safetensors",
810
  "model.layers.6.self_attn.v_proj.bias": "model_layers_6_self_attn_v_proj_bias.safetensors",
811
  "model.layers.6.self_attn.v_proj.weight": "model_layers_6_self_attn_v_proj_weight.safetensors",
812
+ "model.layers.70.input_layernorm.weight": "model_layers_70_input_layernorm_weight.safetensors",
813
+ "model.layers.70.mlp.down_proj.weight": "model_layers_70_mlp_down_proj_weight.safetensors",
814
+ "model.layers.70.mlp.gate_proj.weight": "model_layers_70_mlp_gate_proj_weight.safetensors",
815
+ "model.layers.70.mlp.up_proj.weight": "model_layers_70_mlp_up_proj_weight.safetensors",
816
+ "model.layers.70.post_attention_layernorm.weight": "model_layers_70_post_attention_layernorm_weight.safetensors",
817
+ "model.layers.70.self_attn.k_proj.bias": "model_layers_70_self_attn_k_proj_bias.safetensors",
818
+ "model.layers.70.self_attn.k_proj.weight": "model_layers_70_self_attn_k_proj_weight.safetensors",
819
+ "model.layers.70.self_attn.o_proj.weight": "model_layers_70_self_attn_o_proj_weight.safetensors",
820
+ "model.layers.70.self_attn.q_proj.bias": "model_layers_70_self_attn_q_proj_bias.safetensors",
821
+ "model.layers.70.self_attn.q_proj.weight": "model_layers_70_self_attn_q_proj_weight.safetensors",
822
+ "model.layers.70.self_attn.v_proj.bias": "model_layers_70_self_attn_v_proj_bias.safetensors",
823
+ "model.layers.70.self_attn.v_proj.weight": "model_layers_70_self_attn_v_proj_weight.safetensors",
824
+ "model.layers.71.input_layernorm.weight": "model_layers_71_input_layernorm_weight.safetensors",
825
+ "model.layers.71.mlp.down_proj.weight": "model_layers_71_mlp_down_proj_weight.safetensors",
826
+ "model.layers.71.mlp.gate_proj.weight": "model_layers_71_mlp_gate_proj_weight.safetensors",
827
+ "model.layers.71.mlp.up_proj.weight": "model_layers_71_mlp_up_proj_weight.safetensors",
828
+ "model.layers.71.post_attention_layernorm.weight": "model_layers_71_post_attention_layernorm_weight.safetensors",
829
+ "model.layers.71.self_attn.k_proj.bias": "model_layers_71_self_attn_k_proj_bias.safetensors",
830
+ "model.layers.71.self_attn.k_proj.weight": "model_layers_71_self_attn_k_proj_weight.safetensors",
831
+ "model.layers.71.self_attn.o_proj.weight": "model_layers_71_self_attn_o_proj_weight.safetensors",
832
+ "model.layers.71.self_attn.q_proj.bias": "model_layers_71_self_attn_q_proj_bias.safetensors",
833
+ "model.layers.71.self_attn.q_proj.weight": "model_layers_71_self_attn_q_proj_weight.safetensors",
834
+ "model.layers.71.self_attn.v_proj.bias": "model_layers_71_self_attn_v_proj_bias.safetensors",
835
+ "model.layers.71.self_attn.v_proj.weight": "model_layers_71_self_attn_v_proj_weight.safetensors",
836
+ "model.layers.72.input_layernorm.weight": "model_layers_72_input_layernorm_weight.safetensors",
837
+ "model.layers.72.mlp.down_proj.weight": "model_layers_72_mlp_down_proj_weight.safetensors",
838
+ "model.layers.72.mlp.gate_proj.weight": "model_layers_72_mlp_gate_proj_weight.safetensors",
839
+ "model.layers.72.mlp.up_proj.weight": "model_layers_72_mlp_up_proj_weight.safetensors",
840
+ "model.layers.72.post_attention_layernorm.weight": "model_layers_72_post_attention_layernorm_weight.safetensors",
841
+ "model.layers.72.self_attn.k_proj.bias": "model_layers_72_self_attn_k_proj_bias.safetensors",
842
+ "model.layers.72.self_attn.k_proj.weight": "model_layers_72_self_attn_k_proj_weight.safetensors",
843
+ "model.layers.72.self_attn.o_proj.weight": "model_layers_72_self_attn_o_proj_weight.safetensors",
844
+ "model.layers.72.self_attn.q_proj.bias": "model_layers_72_self_attn_q_proj_bias.safetensors",
845
+ "model.layers.72.self_attn.q_proj.weight": "model_layers_72_self_attn_q_proj_weight.safetensors",
846
+ "model.layers.72.self_attn.v_proj.bias": "model_layers_72_self_attn_v_proj_bias.safetensors",
847
+ "model.layers.72.self_attn.v_proj.weight": "model_layers_72_self_attn_v_proj_weight.safetensors",
848
+ "model.layers.73.input_layernorm.weight": "model_layers_73_input_layernorm_weight.safetensors",
849
+ "model.layers.73.mlp.down_proj.weight": "model_layers_73_mlp_down_proj_weight.safetensors",
850
+ "model.layers.73.mlp.gate_proj.weight": "model_layers_73_mlp_gate_proj_weight.safetensors",
851
+ "model.layers.73.mlp.up_proj.weight": "model_layers_73_mlp_up_proj_weight.safetensors",
852
+ "model.layers.73.post_attention_layernorm.weight": "model_layers_73_post_attention_layernorm_weight.safetensors",
853
+ "model.layers.73.self_attn.k_proj.bias": "model_layers_73_self_attn_k_proj_bias.safetensors",
854
+ "model.layers.73.self_attn.k_proj.weight": "model_layers_73_self_attn_k_proj_weight.safetensors",
855
+ "model.layers.73.self_attn.o_proj.weight": "model_layers_73_self_attn_o_proj_weight.safetensors",
856
+ "model.layers.73.self_attn.q_proj.bias": "model_layers_73_self_attn_q_proj_bias.safetensors",
857
+ "model.layers.73.self_attn.q_proj.weight": "model_layers_73_self_attn_q_proj_weight.safetensors",
858
+ "model.layers.73.self_attn.v_proj.bias": "model_layers_73_self_attn_v_proj_bias.safetensors",
859
+ "model.layers.73.self_attn.v_proj.weight": "model_layers_73_self_attn_v_proj_weight.safetensors",
860
+ "model.layers.74.input_layernorm.weight": "model_layers_74_input_layernorm_weight.safetensors",
861
+ "model.layers.74.mlp.down_proj.weight": "model_layers_74_mlp_down_proj_weight.safetensors",
862
+ "model.layers.74.mlp.gate_proj.weight": "model_layers_74_mlp_gate_proj_weight.safetensors",
863
+ "model.layers.74.mlp.up_proj.weight": "model_layers_74_mlp_up_proj_weight.safetensors",
864
+ "model.layers.74.post_attention_layernorm.weight": "model_layers_74_post_attention_layernorm_weight.safetensors",
865
+ "model.layers.74.self_attn.k_proj.bias": "model_layers_74_self_attn_k_proj_bias.safetensors",
866
+ "model.layers.74.self_attn.k_proj.weight": "model_layers_74_self_attn_k_proj_weight.safetensors",
867
+ "model.layers.74.self_attn.o_proj.weight": "model_layers_74_self_attn_o_proj_weight.safetensors",
868
+ "model.layers.74.self_attn.q_proj.bias": "model_layers_74_self_attn_q_proj_bias.safetensors",
869
+ "model.layers.74.self_attn.q_proj.weight": "model_layers_74_self_attn_q_proj_weight.safetensors",
870
+ "model.layers.74.self_attn.v_proj.bias": "model_layers_74_self_attn_v_proj_bias.safetensors",
871
+ "model.layers.74.self_attn.v_proj.weight": "model_layers_74_self_attn_v_proj_weight.safetensors",
872
+ "model.layers.75.input_layernorm.weight": "model_layers_75_input_layernorm_weight.safetensors",
873
+ "model.layers.75.mlp.down_proj.weight": "model_layers_75_mlp_down_proj_weight.safetensors",
874
+ "model.layers.75.mlp.gate_proj.weight": "model_layers_75_mlp_gate_proj_weight.safetensors",
875
+ "model.layers.75.mlp.up_proj.weight": "model_layers_75_mlp_up_proj_weight.safetensors",
876
+ "model.layers.75.post_attention_layernorm.weight": "model_layers_75_post_attention_layernorm_weight.safetensors",
877
+ "model.layers.75.self_attn.k_proj.bias": "model_layers_75_self_attn_k_proj_bias.safetensors",
878
+ "model.layers.75.self_attn.k_proj.weight": "model_layers_75_self_attn_k_proj_weight.safetensors",
879
+ "model.layers.75.self_attn.o_proj.weight": "model_layers_75_self_attn_o_proj_weight.safetensors",
880
+ "model.layers.75.self_attn.q_proj.bias": "model_layers_75_self_attn_q_proj_bias.safetensors",
881
+ "model.layers.75.self_attn.q_proj.weight": "model_layers_75_self_attn_q_proj_weight.safetensors",
882
+ "model.layers.75.self_attn.v_proj.bias": "model_layers_75_self_attn_v_proj_bias.safetensors",
883
+ "model.layers.75.self_attn.v_proj.weight": "model_layers_75_self_attn_v_proj_weight.safetensors",
884
+ "model.layers.76.input_layernorm.weight": "model_layers_76_input_layernorm_weight.safetensors",
885
+ "model.layers.76.mlp.down_proj.weight": "model_layers_76_mlp_down_proj_weight.safetensors",
886
+ "model.layers.76.mlp.gate_proj.weight": "model_layers_76_mlp_gate_proj_weight.safetensors",
887
+ "model.layers.76.mlp.up_proj.weight": "model_layers_76_mlp_up_proj_weight.safetensors",
888
+ "model.layers.76.post_attention_layernorm.weight": "model_layers_76_post_attention_layernorm_weight.safetensors",
889
+ "model.layers.76.self_attn.k_proj.bias": "model_layers_76_self_attn_k_proj_bias.safetensors",
890
+ "model.layers.76.self_attn.k_proj.weight": "model_layers_76_self_attn_k_proj_weight.safetensors",
891
+ "model.layers.76.self_attn.o_proj.weight": "model_layers_76_self_attn_o_proj_weight.safetensors",
892
+ "model.layers.76.self_attn.q_proj.bias": "model_layers_76_self_attn_q_proj_bias.safetensors",
893
+ "model.layers.76.self_attn.q_proj.weight": "model_layers_76_self_attn_q_proj_weight.safetensors",
894
+ "model.layers.76.self_attn.v_proj.bias": "model_layers_76_self_attn_v_proj_bias.safetensors",
895
+ "model.layers.76.self_attn.v_proj.weight": "model_layers_76_self_attn_v_proj_weight.safetensors",
896
+ "model.layers.77.input_layernorm.weight": "model_layers_77_input_layernorm_weight.safetensors",
897
+ "model.layers.77.mlp.down_proj.weight": "model_layers_77_mlp_down_proj_weight.safetensors",
898
+ "model.layers.77.mlp.gate_proj.weight": "model_layers_77_mlp_gate_proj_weight.safetensors",
899
+ "model.layers.77.mlp.up_proj.weight": "model_layers_77_mlp_up_proj_weight.safetensors",
900
+ "model.layers.77.post_attention_layernorm.weight": "model_layers_77_post_attention_layernorm_weight.safetensors",
901
+ "model.layers.77.self_attn.k_proj.bias": "model_layers_77_self_attn_k_proj_bias.safetensors",
902
+ "model.layers.77.self_attn.k_proj.weight": "model_layers_77_self_attn_k_proj_weight.safetensors",
903
+ "model.layers.77.self_attn.o_proj.weight": "model_layers_77_self_attn_o_proj_weight.safetensors",
904
+ "model.layers.77.self_attn.q_proj.bias": "model_layers_77_self_attn_q_proj_bias.safetensors",
905
+ "model.layers.77.self_attn.q_proj.weight": "model_layers_77_self_attn_q_proj_weight.safetensors",
906
+ "model.layers.77.self_attn.v_proj.bias": "model_layers_77_self_attn_v_proj_bias.safetensors",
907
+ "model.layers.77.self_attn.v_proj.weight": "model_layers_77_self_attn_v_proj_weight.safetensors",
908
+ "model.layers.78.input_layernorm.weight": "model_layers_78_input_layernorm_weight.safetensors",
909
+ "model.layers.78.mlp.down_proj.weight": "model_layers_78_mlp_down_proj_weight.safetensors",
910
+ "model.layers.78.mlp.gate_proj.weight": "model_layers_78_mlp_gate_proj_weight.safetensors",
911
+ "model.layers.78.mlp.up_proj.weight": "model_layers_78_mlp_up_proj_weight.safetensors",
912
+ "model.layers.78.post_attention_layernorm.weight": "model_layers_78_post_attention_layernorm_weight.safetensors",
913
+ "model.layers.78.self_attn.k_proj.bias": "model_layers_78_self_attn_k_proj_bias.safetensors",
914
+ "model.layers.78.self_attn.k_proj.weight": "model_layers_78_self_attn_k_proj_weight.safetensors",
915
+ "model.layers.78.self_attn.o_proj.weight": "model_layers_78_self_attn_o_proj_weight.safetensors",
916
+ "model.layers.78.self_attn.q_proj.bias": "model_layers_78_self_attn_q_proj_bias.safetensors",
917
+ "model.layers.78.self_attn.q_proj.weight": "model_layers_78_self_attn_q_proj_weight.safetensors",
918
+ "model.layers.78.self_attn.v_proj.bias": "model_layers_78_self_attn_v_proj_bias.safetensors",
919
+ "model.layers.78.self_attn.v_proj.weight": "model_layers_78_self_attn_v_proj_weight.safetensors",
920
+ "model.layers.79.input_layernorm.weight": "model_layers_79_input_layernorm_weight.safetensors",
921
+ "model.layers.79.mlp.down_proj.weight": "model_layers_79_mlp_down_proj_weight.safetensors",
922
+ "model.layers.79.mlp.gate_proj.weight": "model_layers_79_mlp_gate_proj_weight.safetensors",
923
+ "model.layers.79.mlp.up_proj.weight": "model_layers_79_mlp_up_proj_weight.safetensors",
924
+ "model.layers.79.post_attention_layernorm.weight": "model_layers_79_post_attention_layernorm_weight.safetensors",
925
+ "model.layers.79.self_attn.k_proj.bias": "model_layers_79_self_attn_k_proj_bias.safetensors",
926
+ "model.layers.79.self_attn.k_proj.weight": "model_layers_79_self_attn_k_proj_weight.safetensors",
927
+ "model.layers.79.self_attn.o_proj.weight": "model_layers_79_self_attn_o_proj_weight.safetensors",
928
+ "model.layers.79.self_attn.q_proj.bias": "model_layers_79_self_attn_q_proj_bias.safetensors",
929
+ "model.layers.79.self_attn.q_proj.weight": "model_layers_79_self_attn_q_proj_weight.safetensors",
930
+ "model.layers.79.self_attn.v_proj.bias": "model_layers_79_self_attn_v_proj_bias.safetensors",
931
+ "model.layers.79.self_attn.v_proj.weight": "model_layers_79_self_attn_v_proj_weight.safetensors",
932
  "model.layers.7.input_layernorm.weight": "model_layers_7_input_layernorm_weight.safetensors",
933
  "model.layers.7.mlp.down_proj.weight": "model_layers_7_mlp_down_proj_weight.safetensors",
934
  "model.layers.7.mlp.gate_proj.weight": "model_layers_7_mlp_gate_proj_weight.safetensors",
 
964
  "model.layers.9.self_attn.q_proj.bias": "model_layers_9_self_attn_q_proj_bias.safetensors",
965
  "model.layers.9.self_attn.q_proj.weight": "model_layers_9_self_attn_q_proj_weight.safetensors",
966
  "model.layers.9.self_attn.v_proj.bias": "model_layers_9_self_attn_v_proj_bias.safetensors",
967
+ "model.layers.9.self_attn.v_proj.weight": "model_layers_9_self_attn_v_proj_weight.safetensors",
968
+ "model.norm.weight": "model_norm_weight.safetensors"
969
  }
970
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
30
+ "bos_token": null,
31
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|im_end|>",
34
+ "errors": "replace",
35
+ "model_max_length": 131072,
36
+ "pad_token": "<|endoftext|>",
37
+ "split_special_tokens": false,
38
+ "tokenizer_class": "Qwen2Tokenizer",
39
+ "unk_token": null
40
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff