model: add config and update index
Browse files- config.json +27 -0
- generate_index.py +46 -0
- generation_config.json +14 -0
- model.safetensors.index.json +250 -2
- tokenizer.json +0 -0
- tokenizer_config.json +40 -0
- vocab.json +0 -0
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen2ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 151643,
|
7 |
+
"eos_token_id": 151645,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 8192,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 29568,
|
12 |
+
"max_position_embeddings": 32768,
|
13 |
+
"max_window_layers": 80,
|
14 |
+
"model_type": "qwen2",
|
15 |
+
"num_attention_heads": 64,
|
16 |
+
"num_hidden_layers": 80,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-06,
|
19 |
+
"rope_theta": 1000000.0,
|
20 |
+
"sliding_window": 131072,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.40.1",
|
24 |
+
"use_cache": true,
|
25 |
+
"use_sliding_window": false,
|
26 |
+
"vocab_size": 152064
|
27 |
+
}
|
generate_index.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from safetensors import safe_open
|
4 |
+
import torch
|
5 |
+
from typing import Dict
|
6 |
+
|
7 |
+
def generate_index(directory: str) -> Dict:
|
8 |
+
index = {
|
9 |
+
"metadata": {"total_size": 0},
|
10 |
+
"weight_map": {}
|
11 |
+
}
|
12 |
+
|
13 |
+
safetensors_files = [f for f in os.listdir(directory) if f.endswith('.safetensors')]
|
14 |
+
safetensors_files.sort() # Ensure consistent ordering
|
15 |
+
|
16 |
+
for filename in safetensors_files:
|
17 |
+
filepath = os.path.join(directory, filename)
|
18 |
+
try:
|
19 |
+
with safe_open(filepath, framework="pt") as f:
|
20 |
+
for key in f.keys():
|
21 |
+
if key in index["weight_map"]:
|
22 |
+
print(f"Warning: Duplicate key '{key}' found in {filename}. Using the last occurrence.")
|
23 |
+
index["weight_map"][key] = filename
|
24 |
+
|
25 |
+
tensor = f.get_tensor(key)
|
26 |
+
tensor_size = tensor.numel() * tensor.element_size()
|
27 |
+
index["metadata"]["total_size"] += tensor_size
|
28 |
+
except Exception as e:
|
29 |
+
print(f"Error processing {filename}: {str(e)}")
|
30 |
+
|
31 |
+
return index
|
32 |
+
|
33 |
+
def save_index(index: Dict, output_file: str):
|
34 |
+
with open(output_file, 'w') as f:
|
35 |
+
json.dump(index, f, indent=2)
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
current_dir = os.getcwd()
|
39 |
+
output_file = "model.safetensors.index.json"
|
40 |
+
|
41 |
+
index = generate_index(current_dir)
|
42 |
+
save_index(index, output_file)
|
43 |
+
|
44 |
+
print(f"Index generated with {len(index['weight_map'])} tensors.")
|
45 |
+
print(f"Total size: {index['metadata']['total_size']} bytes")
|
46 |
+
print(f"Index saved to {output_file}")
|
generation_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"pad_token_id": 151643,
|
4 |
+
"do_sample": true,
|
5 |
+
"eos_token_id": [
|
6 |
+
151645,
|
7 |
+
151643
|
8 |
+
],
|
9 |
+
"repetition_penalty": 1.05,
|
10 |
+
"temperature": 0.7,
|
11 |
+
"top_p": 0.8,
|
12 |
+
"top_k": 20,
|
13 |
+
"transformers_version": "4.37.0"
|
14 |
+
}
|
model.safetensors.index.json
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
|
|
|
|
6 |
"model.layers.0.input_layernorm.weight": "model_layers_0_input_layernorm_weight.safetensors",
|
7 |
"model.layers.0.mlp.down_proj.weight": "model_layers_0_mlp_down_proj_weight.safetensors",
|
8 |
"model.layers.0.mlp.gate_proj.weight": "model_layers_0_mlp_gate_proj_weight.safetensors",
|
@@ -651,6 +653,11 @@
|
|
651 |
"model.layers.58.self_attn.q_proj.weight": "model_layers_58_self_attn_q_proj_weight.safetensors",
|
652 |
"model.layers.58.self_attn.v_proj.bias": "model_layers_58_self_attn_v_proj_bias.safetensors",
|
653 |
"model.layers.58.self_attn.v_proj.weight": "model_layers_58_self_attn_v_proj_weight.safetensors",
|
|
|
|
|
|
|
|
|
|
|
654 |
"model.layers.59.self_attn.k_proj.bias": "model_layers_59_self_attn_k_proj_bias.safetensors",
|
655 |
"model.layers.59.self_attn.k_proj.weight": "model_layers_59_self_attn_k_proj_weight.safetensors",
|
656 |
"model.layers.59.self_attn.o_proj.weight": "model_layers_59_self_attn_o_proj_weight.safetensors",
|
@@ -670,6 +677,126 @@
|
|
670 |
"model.layers.5.self_attn.q_proj.weight": "model_layers_5_self_attn_q_proj_weight.safetensors",
|
671 |
"model.layers.5.self_attn.v_proj.bias": "model_layers_5_self_attn_v_proj_bias.safetensors",
|
672 |
"model.layers.5.self_attn.v_proj.weight": "model_layers_5_self_attn_v_proj_weight.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
"model.layers.6.input_layernorm.weight": "model_layers_6_input_layernorm_weight.safetensors",
|
674 |
"model.layers.6.mlp.down_proj.weight": "model_layers_6_mlp_down_proj_weight.safetensors",
|
675 |
"model.layers.6.mlp.gate_proj.weight": "model_layers_6_mlp_gate_proj_weight.safetensors",
|
@@ -682,6 +809,126 @@
|
|
682 |
"model.layers.6.self_attn.q_proj.weight": "model_layers_6_self_attn_q_proj_weight.safetensors",
|
683 |
"model.layers.6.self_attn.v_proj.bias": "model_layers_6_self_attn_v_proj_bias.safetensors",
|
684 |
"model.layers.6.self_attn.v_proj.weight": "model_layers_6_self_attn_v_proj_weight.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
685 |
"model.layers.7.input_layernorm.weight": "model_layers_7_input_layernorm_weight.safetensors",
|
686 |
"model.layers.7.mlp.down_proj.weight": "model_layers_7_mlp_down_proj_weight.safetensors",
|
687 |
"model.layers.7.mlp.gate_proj.weight": "model_layers_7_mlp_gate_proj_weight.safetensors",
|
@@ -717,6 +964,7 @@
|
|
717 |
"model.layers.9.self_attn.q_proj.bias": "model_layers_9_self_attn_q_proj_bias.safetensors",
|
718 |
"model.layers.9.self_attn.q_proj.weight": "model_layers_9_self_attn_q_proj_weight.safetensors",
|
719 |
"model.layers.9.self_attn.v_proj.bias": "model_layers_9_self_attn_v_proj_bias.safetensors",
|
720 |
-
"model.layers.9.self_attn.v_proj.weight": "model_layers_9_self_attn_v_proj_weight.safetensors"
|
|
|
721 |
}
|
722 |
}
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 145412407296
|
4 |
},
|
5 |
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00031-of-00031.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model_embed_tokens_weight.safetensors",
|
8 |
"model.layers.0.input_layernorm.weight": "model_layers_0_input_layernorm_weight.safetensors",
|
9 |
"model.layers.0.mlp.down_proj.weight": "model_layers_0_mlp_down_proj_weight.safetensors",
|
10 |
"model.layers.0.mlp.gate_proj.weight": "model_layers_0_mlp_gate_proj_weight.safetensors",
|
|
|
653 |
"model.layers.58.self_attn.q_proj.weight": "model_layers_58_self_attn_q_proj_weight.safetensors",
|
654 |
"model.layers.58.self_attn.v_proj.bias": "model_layers_58_self_attn_v_proj_bias.safetensors",
|
655 |
"model.layers.58.self_attn.v_proj.weight": "model_layers_58_self_attn_v_proj_weight.safetensors",
|
656 |
+
"model.layers.59.input_layernorm.weight": "model_layers_59_input_layernorm_weight.safetensors",
|
657 |
+
"model.layers.59.mlp.down_proj.weight": "model_layers_59_mlp_down_proj_weight.safetensors",
|
658 |
+
"model.layers.59.mlp.gate_proj.weight": "model_layers_59_mlp_gate_proj_weight.safetensors",
|
659 |
+
"model.layers.59.mlp.up_proj.weight": "model_layers_59_mlp_up_proj_weight.safetensors",
|
660 |
+
"model.layers.59.post_attention_layernorm.weight": "model_layers_59_post_attention_layernorm_weight.safetensors",
|
661 |
"model.layers.59.self_attn.k_proj.bias": "model_layers_59_self_attn_k_proj_bias.safetensors",
|
662 |
"model.layers.59.self_attn.k_proj.weight": "model_layers_59_self_attn_k_proj_weight.safetensors",
|
663 |
"model.layers.59.self_attn.o_proj.weight": "model_layers_59_self_attn_o_proj_weight.safetensors",
|
|
|
677 |
"model.layers.5.self_attn.q_proj.weight": "model_layers_5_self_attn_q_proj_weight.safetensors",
|
678 |
"model.layers.5.self_attn.v_proj.bias": "model_layers_5_self_attn_v_proj_bias.safetensors",
|
679 |
"model.layers.5.self_attn.v_proj.weight": "model_layers_5_self_attn_v_proj_weight.safetensors",
|
680 |
+
"model.layers.60.input_layernorm.weight": "model_layers_60_input_layernorm_weight.safetensors",
|
681 |
+
"model.layers.60.mlp.down_proj.weight": "model_layers_60_mlp_down_proj_weight.safetensors",
|
682 |
+
"model.layers.60.mlp.gate_proj.weight": "model_layers_60_mlp_gate_proj_weight.safetensors",
|
683 |
+
"model.layers.60.mlp.up_proj.weight": "model_layers_60_mlp_up_proj_weight.safetensors",
|
684 |
+
"model.layers.60.post_attention_layernorm.weight": "model_layers_60_post_attention_layernorm_weight.safetensors",
|
685 |
+
"model.layers.60.self_attn.k_proj.bias": "model_layers_60_self_attn_k_proj_bias.safetensors",
|
686 |
+
"model.layers.60.self_attn.k_proj.weight": "model_layers_60_self_attn_k_proj_weight.safetensors",
|
687 |
+
"model.layers.60.self_attn.o_proj.weight": "model_layers_60_self_attn_o_proj_weight.safetensors",
|
688 |
+
"model.layers.60.self_attn.q_proj.bias": "model_layers_60_self_attn_q_proj_bias.safetensors",
|
689 |
+
"model.layers.60.self_attn.q_proj.weight": "model_layers_60_self_attn_q_proj_weight.safetensors",
|
690 |
+
"model.layers.60.self_attn.v_proj.bias": "model_layers_60_self_attn_v_proj_bias.safetensors",
|
691 |
+
"model.layers.60.self_attn.v_proj.weight": "model_layers_60_self_attn_v_proj_weight.safetensors",
|
692 |
+
"model.layers.61.input_layernorm.weight": "model_layers_61_input_layernorm_weight.safetensors",
|
693 |
+
"model.layers.61.mlp.down_proj.weight": "model_layers_61_mlp_down_proj_weight.safetensors",
|
694 |
+
"model.layers.61.mlp.gate_proj.weight": "model_layers_61_mlp_gate_proj_weight.safetensors",
|
695 |
+
"model.layers.61.mlp.up_proj.weight": "model_layers_61_mlp_up_proj_weight.safetensors",
|
696 |
+
"model.layers.61.post_attention_layernorm.weight": "model_layers_61_post_attention_layernorm_weight.safetensors",
|
697 |
+
"model.layers.61.self_attn.k_proj.bias": "model_layers_61_self_attn_k_proj_bias.safetensors",
|
698 |
+
"model.layers.61.self_attn.k_proj.weight": "model_layers_61_self_attn_k_proj_weight.safetensors",
|
699 |
+
"model.layers.61.self_attn.o_proj.weight": "model_layers_61_self_attn_o_proj_weight.safetensors",
|
700 |
+
"model.layers.61.self_attn.q_proj.bias": "model_layers_61_self_attn_q_proj_bias.safetensors",
|
701 |
+
"model.layers.61.self_attn.q_proj.weight": "model_layers_61_self_attn_q_proj_weight.safetensors",
|
702 |
+
"model.layers.61.self_attn.v_proj.bias": "model_layers_61_self_attn_v_proj_bias.safetensors",
|
703 |
+
"model.layers.61.self_attn.v_proj.weight": "model_layers_61_self_attn_v_proj_weight.safetensors",
|
704 |
+
"model.layers.62.input_layernorm.weight": "model_layers_62_input_layernorm_weight.safetensors",
|
705 |
+
"model.layers.62.mlp.down_proj.weight": "model_layers_62_mlp_down_proj_weight.safetensors",
|
706 |
+
"model.layers.62.mlp.gate_proj.weight": "model_layers_62_mlp_gate_proj_weight.safetensors",
|
707 |
+
"model.layers.62.mlp.up_proj.weight": "model_layers_62_mlp_up_proj_weight.safetensors",
|
708 |
+
"model.layers.62.post_attention_layernorm.weight": "model_layers_62_post_attention_layernorm_weight.safetensors",
|
709 |
+
"model.layers.62.self_attn.k_proj.bias": "model_layers_62_self_attn_k_proj_bias.safetensors",
|
710 |
+
"model.layers.62.self_attn.k_proj.weight": "model_layers_62_self_attn_k_proj_weight.safetensors",
|
711 |
+
"model.layers.62.self_attn.o_proj.weight": "model_layers_62_self_attn_o_proj_weight.safetensors",
|
712 |
+
"model.layers.62.self_attn.q_proj.bias": "model_layers_62_self_attn_q_proj_bias.safetensors",
|
713 |
+
"model.layers.62.self_attn.q_proj.weight": "model_layers_62_self_attn_q_proj_weight.safetensors",
|
714 |
+
"model.layers.62.self_attn.v_proj.bias": "model_layers_62_self_attn_v_proj_bias.safetensors",
|
715 |
+
"model.layers.62.self_attn.v_proj.weight": "model_layers_62_self_attn_v_proj_weight.safetensors",
|
716 |
+
"model.layers.63.input_layernorm.weight": "model_layers_63_input_layernorm_weight.safetensors",
|
717 |
+
"model.layers.63.mlp.down_proj.weight": "model_layers_63_mlp_down_proj_weight.safetensors",
|
718 |
+
"model.layers.63.mlp.gate_proj.weight": "model_layers_63_mlp_gate_proj_weight.safetensors",
|
719 |
+
"model.layers.63.mlp.up_proj.weight": "model_layers_63_mlp_up_proj_weight.safetensors",
|
720 |
+
"model.layers.63.post_attention_layernorm.weight": "model_layers_63_post_attention_layernorm_weight.safetensors",
|
721 |
+
"model.layers.63.self_attn.k_proj.bias": "model_layers_63_self_attn_k_proj_bias.safetensors",
|
722 |
+
"model.layers.63.self_attn.k_proj.weight": "model_layers_63_self_attn_k_proj_weight.safetensors",
|
723 |
+
"model.layers.63.self_attn.o_proj.weight": "model_layers_63_self_attn_o_proj_weight.safetensors",
|
724 |
+
"model.layers.63.self_attn.q_proj.bias": "model_layers_63_self_attn_q_proj_bias.safetensors",
|
725 |
+
"model.layers.63.self_attn.q_proj.weight": "model_layers_63_self_attn_q_proj_weight.safetensors",
|
726 |
+
"model.layers.63.self_attn.v_proj.bias": "model_layers_63_self_attn_v_proj_bias.safetensors",
|
727 |
+
"model.layers.63.self_attn.v_proj.weight": "model_layers_63_self_attn_v_proj_weight.safetensors",
|
728 |
+
"model.layers.64.input_layernorm.weight": "model_layers_64_input_layernorm_weight.safetensors",
|
729 |
+
"model.layers.64.mlp.down_proj.weight": "model_layers_64_mlp_down_proj_weight.safetensors",
|
730 |
+
"model.layers.64.mlp.gate_proj.weight": "model_layers_64_mlp_gate_proj_weight.safetensors",
|
731 |
+
"model.layers.64.mlp.up_proj.weight": "model_layers_64_mlp_up_proj_weight.safetensors",
|
732 |
+
"model.layers.64.post_attention_layernorm.weight": "model_layers_64_post_attention_layernorm_weight.safetensors",
|
733 |
+
"model.layers.64.self_attn.k_proj.bias": "model_layers_64_self_attn_k_proj_bias.safetensors",
|
734 |
+
"model.layers.64.self_attn.k_proj.weight": "model_layers_64_self_attn_k_proj_weight.safetensors",
|
735 |
+
"model.layers.64.self_attn.o_proj.weight": "model_layers_64_self_attn_o_proj_weight.safetensors",
|
736 |
+
"model.layers.64.self_attn.q_proj.bias": "model_layers_64_self_attn_q_proj_bias.safetensors",
|
737 |
+
"model.layers.64.self_attn.q_proj.weight": "model_layers_64_self_attn_q_proj_weight.safetensors",
|
738 |
+
"model.layers.64.self_attn.v_proj.bias": "model_layers_64_self_attn_v_proj_bias.safetensors",
|
739 |
+
"model.layers.64.self_attn.v_proj.weight": "model_layers_64_self_attn_v_proj_weight.safetensors",
|
740 |
+
"model.layers.65.input_layernorm.weight": "model_layers_65_input_layernorm_weight.safetensors",
|
741 |
+
"model.layers.65.mlp.down_proj.weight": "model_layers_65_mlp_down_proj_weight.safetensors",
|
742 |
+
"model.layers.65.mlp.gate_proj.weight": "model_layers_65_mlp_gate_proj_weight.safetensors",
|
743 |
+
"model.layers.65.mlp.up_proj.weight": "model_layers_65_mlp_up_proj_weight.safetensors",
|
744 |
+
"model.layers.65.post_attention_layernorm.weight": "model_layers_65_post_attention_layernorm_weight.safetensors",
|
745 |
+
"model.layers.65.self_attn.k_proj.bias": "model_layers_65_self_attn_k_proj_bias.safetensors",
|
746 |
+
"model.layers.65.self_attn.k_proj.weight": "model_layers_65_self_attn_k_proj_weight.safetensors",
|
747 |
+
"model.layers.65.self_attn.o_proj.weight": "model_layers_65_self_attn_o_proj_weight.safetensors",
|
748 |
+
"model.layers.65.self_attn.q_proj.bias": "model_layers_65_self_attn_q_proj_bias.safetensors",
|
749 |
+
"model.layers.65.self_attn.q_proj.weight": "model_layers_65_self_attn_q_proj_weight.safetensors",
|
750 |
+
"model.layers.65.self_attn.v_proj.bias": "model_layers_65_self_attn_v_proj_bias.safetensors",
|
751 |
+
"model.layers.65.self_attn.v_proj.weight": "model_layers_65_self_attn_v_proj_weight.safetensors",
|
752 |
+
"model.layers.66.input_layernorm.weight": "model_layers_66_input_layernorm_weight.safetensors",
|
753 |
+
"model.layers.66.mlp.down_proj.weight": "model_layers_66_mlp_down_proj_weight.safetensors",
|
754 |
+
"model.layers.66.mlp.gate_proj.weight": "model_layers_66_mlp_gate_proj_weight.safetensors",
|
755 |
+
"model.layers.66.mlp.up_proj.weight": "model_layers_66_mlp_up_proj_weight.safetensors",
|
756 |
+
"model.layers.66.post_attention_layernorm.weight": "model_layers_66_post_attention_layernorm_weight.safetensors",
|
757 |
+
"model.layers.66.self_attn.k_proj.bias": "model_layers_66_self_attn_k_proj_bias.safetensors",
|
758 |
+
"model.layers.66.self_attn.k_proj.weight": "model_layers_66_self_attn_k_proj_weight.safetensors",
|
759 |
+
"model.layers.66.self_attn.o_proj.weight": "model_layers_66_self_attn_o_proj_weight.safetensors",
|
760 |
+
"model.layers.66.self_attn.q_proj.bias": "model_layers_66_self_attn_q_proj_bias.safetensors",
|
761 |
+
"model.layers.66.self_attn.q_proj.weight": "model_layers_66_self_attn_q_proj_weight.safetensors",
|
762 |
+
"model.layers.66.self_attn.v_proj.bias": "model_layers_66_self_attn_v_proj_bias.safetensors",
|
763 |
+
"model.layers.66.self_attn.v_proj.weight": "model_layers_66_self_attn_v_proj_weight.safetensors",
|
764 |
+
"model.layers.67.input_layernorm.weight": "model_layers_67_input_layernorm_weight.safetensors",
|
765 |
+
"model.layers.67.mlp.down_proj.weight": "model_layers_67_mlp_down_proj_weight.safetensors",
|
766 |
+
"model.layers.67.mlp.gate_proj.weight": "model_layers_67_mlp_gate_proj_weight.safetensors",
|
767 |
+
"model.layers.67.mlp.up_proj.weight": "model_layers_67_mlp_up_proj_weight.safetensors",
|
768 |
+
"model.layers.67.post_attention_layernorm.weight": "model_layers_67_post_attention_layernorm_weight.safetensors",
|
769 |
+
"model.layers.67.self_attn.k_proj.bias": "model_layers_67_self_attn_k_proj_bias.safetensors",
|
770 |
+
"model.layers.67.self_attn.k_proj.weight": "model_layers_67_self_attn_k_proj_weight.safetensors",
|
771 |
+
"model.layers.67.self_attn.o_proj.weight": "model_layers_67_self_attn_o_proj_weight.safetensors",
|
772 |
+
"model.layers.67.self_attn.q_proj.bias": "model_layers_67_self_attn_q_proj_bias.safetensors",
|
773 |
+
"model.layers.67.self_attn.q_proj.weight": "model_layers_67_self_attn_q_proj_weight.safetensors",
|
774 |
+
"model.layers.67.self_attn.v_proj.bias": "model_layers_67_self_attn_v_proj_bias.safetensors",
|
775 |
+
"model.layers.67.self_attn.v_proj.weight": "model_layers_67_self_attn_v_proj_weight.safetensors",
|
776 |
+
"model.layers.68.input_layernorm.weight": "model_layers_68_input_layernorm_weight.safetensors",
|
777 |
+
"model.layers.68.mlp.down_proj.weight": "model_layers_68_mlp_down_proj_weight.safetensors",
|
778 |
+
"model.layers.68.mlp.gate_proj.weight": "model_layers_68_mlp_gate_proj_weight.safetensors",
|
779 |
+
"model.layers.68.mlp.up_proj.weight": "model_layers_68_mlp_up_proj_weight.safetensors",
|
780 |
+
"model.layers.68.post_attention_layernorm.weight": "model_layers_68_post_attention_layernorm_weight.safetensors",
|
781 |
+
"model.layers.68.self_attn.k_proj.bias": "model_layers_68_self_attn_k_proj_bias.safetensors",
|
782 |
+
"model.layers.68.self_attn.k_proj.weight": "model_layers_68_self_attn_k_proj_weight.safetensors",
|
783 |
+
"model.layers.68.self_attn.o_proj.weight": "model_layers_68_self_attn_o_proj_weight.safetensors",
|
784 |
+
"model.layers.68.self_attn.q_proj.bias": "model_layers_68_self_attn_q_proj_bias.safetensors",
|
785 |
+
"model.layers.68.self_attn.q_proj.weight": "model_layers_68_self_attn_q_proj_weight.safetensors",
|
786 |
+
"model.layers.68.self_attn.v_proj.bias": "model_layers_68_self_attn_v_proj_bias.safetensors",
|
787 |
+
"model.layers.68.self_attn.v_proj.weight": "model_layers_68_self_attn_v_proj_weight.safetensors",
|
788 |
+
"model.layers.69.input_layernorm.weight": "model_layers_69_input_layernorm_weight.safetensors",
|
789 |
+
"model.layers.69.mlp.down_proj.weight": "model_layers_69_mlp_down_proj_weight.safetensors",
|
790 |
+
"model.layers.69.mlp.gate_proj.weight": "model_layers_69_mlp_gate_proj_weight.safetensors",
|
791 |
+
"model.layers.69.mlp.up_proj.weight": "model_layers_69_mlp_up_proj_weight.safetensors",
|
792 |
+
"model.layers.69.post_attention_layernorm.weight": "model_layers_69_post_attention_layernorm_weight.safetensors",
|
793 |
+
"model.layers.69.self_attn.k_proj.bias": "model_layers_69_self_attn_k_proj_bias.safetensors",
|
794 |
+
"model.layers.69.self_attn.k_proj.weight": "model_layers_69_self_attn_k_proj_weight.safetensors",
|
795 |
+
"model.layers.69.self_attn.o_proj.weight": "model_layers_69_self_attn_o_proj_weight.safetensors",
|
796 |
+
"model.layers.69.self_attn.q_proj.bias": "model_layers_69_self_attn_q_proj_bias.safetensors",
|
797 |
+
"model.layers.69.self_attn.q_proj.weight": "model_layers_69_self_attn_q_proj_weight.safetensors",
|
798 |
+
"model.layers.69.self_attn.v_proj.bias": "model_layers_69_self_attn_v_proj_bias.safetensors",
|
799 |
+
"model.layers.69.self_attn.v_proj.weight": "model_layers_69_self_attn_v_proj_weight.safetensors",
|
800 |
"model.layers.6.input_layernorm.weight": "model_layers_6_input_layernorm_weight.safetensors",
|
801 |
"model.layers.6.mlp.down_proj.weight": "model_layers_6_mlp_down_proj_weight.safetensors",
|
802 |
"model.layers.6.mlp.gate_proj.weight": "model_layers_6_mlp_gate_proj_weight.safetensors",
|
|
|
809 |
"model.layers.6.self_attn.q_proj.weight": "model_layers_6_self_attn_q_proj_weight.safetensors",
|
810 |
"model.layers.6.self_attn.v_proj.bias": "model_layers_6_self_attn_v_proj_bias.safetensors",
|
811 |
"model.layers.6.self_attn.v_proj.weight": "model_layers_6_self_attn_v_proj_weight.safetensors",
|
812 |
+
"model.layers.70.input_layernorm.weight": "model_layers_70_input_layernorm_weight.safetensors",
|
813 |
+
"model.layers.70.mlp.down_proj.weight": "model_layers_70_mlp_down_proj_weight.safetensors",
|
814 |
+
"model.layers.70.mlp.gate_proj.weight": "model_layers_70_mlp_gate_proj_weight.safetensors",
|
815 |
+
"model.layers.70.mlp.up_proj.weight": "model_layers_70_mlp_up_proj_weight.safetensors",
|
816 |
+
"model.layers.70.post_attention_layernorm.weight": "model_layers_70_post_attention_layernorm_weight.safetensors",
|
817 |
+
"model.layers.70.self_attn.k_proj.bias": "model_layers_70_self_attn_k_proj_bias.safetensors",
|
818 |
+
"model.layers.70.self_attn.k_proj.weight": "model_layers_70_self_attn_k_proj_weight.safetensors",
|
819 |
+
"model.layers.70.self_attn.o_proj.weight": "model_layers_70_self_attn_o_proj_weight.safetensors",
|
820 |
+
"model.layers.70.self_attn.q_proj.bias": "model_layers_70_self_attn_q_proj_bias.safetensors",
|
821 |
+
"model.layers.70.self_attn.q_proj.weight": "model_layers_70_self_attn_q_proj_weight.safetensors",
|
822 |
+
"model.layers.70.self_attn.v_proj.bias": "model_layers_70_self_attn_v_proj_bias.safetensors",
|
823 |
+
"model.layers.70.self_attn.v_proj.weight": "model_layers_70_self_attn_v_proj_weight.safetensors",
|
824 |
+
"model.layers.71.input_layernorm.weight": "model_layers_71_input_layernorm_weight.safetensors",
|
825 |
+
"model.layers.71.mlp.down_proj.weight": "model_layers_71_mlp_down_proj_weight.safetensors",
|
826 |
+
"model.layers.71.mlp.gate_proj.weight": "model_layers_71_mlp_gate_proj_weight.safetensors",
|
827 |
+
"model.layers.71.mlp.up_proj.weight": "model_layers_71_mlp_up_proj_weight.safetensors",
|
828 |
+
"model.layers.71.post_attention_layernorm.weight": "model_layers_71_post_attention_layernorm_weight.safetensors",
|
829 |
+
"model.layers.71.self_attn.k_proj.bias": "model_layers_71_self_attn_k_proj_bias.safetensors",
|
830 |
+
"model.layers.71.self_attn.k_proj.weight": "model_layers_71_self_attn_k_proj_weight.safetensors",
|
831 |
+
"model.layers.71.self_attn.o_proj.weight": "model_layers_71_self_attn_o_proj_weight.safetensors",
|
832 |
+
"model.layers.71.self_attn.q_proj.bias": "model_layers_71_self_attn_q_proj_bias.safetensors",
|
833 |
+
"model.layers.71.self_attn.q_proj.weight": "model_layers_71_self_attn_q_proj_weight.safetensors",
|
834 |
+
"model.layers.71.self_attn.v_proj.bias": "model_layers_71_self_attn_v_proj_bias.safetensors",
|
835 |
+
"model.layers.71.self_attn.v_proj.weight": "model_layers_71_self_attn_v_proj_weight.safetensors",
|
836 |
+
"model.layers.72.input_layernorm.weight": "model_layers_72_input_layernorm_weight.safetensors",
|
837 |
+
"model.layers.72.mlp.down_proj.weight": "model_layers_72_mlp_down_proj_weight.safetensors",
|
838 |
+
"model.layers.72.mlp.gate_proj.weight": "model_layers_72_mlp_gate_proj_weight.safetensors",
|
839 |
+
"model.layers.72.mlp.up_proj.weight": "model_layers_72_mlp_up_proj_weight.safetensors",
|
840 |
+
"model.layers.72.post_attention_layernorm.weight": "model_layers_72_post_attention_layernorm_weight.safetensors",
|
841 |
+
"model.layers.72.self_attn.k_proj.bias": "model_layers_72_self_attn_k_proj_bias.safetensors",
|
842 |
+
"model.layers.72.self_attn.k_proj.weight": "model_layers_72_self_attn_k_proj_weight.safetensors",
|
843 |
+
"model.layers.72.self_attn.o_proj.weight": "model_layers_72_self_attn_o_proj_weight.safetensors",
|
844 |
+
"model.layers.72.self_attn.q_proj.bias": "model_layers_72_self_attn_q_proj_bias.safetensors",
|
845 |
+
"model.layers.72.self_attn.q_proj.weight": "model_layers_72_self_attn_q_proj_weight.safetensors",
|
846 |
+
"model.layers.72.self_attn.v_proj.bias": "model_layers_72_self_attn_v_proj_bias.safetensors",
|
847 |
+
"model.layers.72.self_attn.v_proj.weight": "model_layers_72_self_attn_v_proj_weight.safetensors",
|
848 |
+
"model.layers.73.input_layernorm.weight": "model_layers_73_input_layernorm_weight.safetensors",
|
849 |
+
"model.layers.73.mlp.down_proj.weight": "model_layers_73_mlp_down_proj_weight.safetensors",
|
850 |
+
"model.layers.73.mlp.gate_proj.weight": "model_layers_73_mlp_gate_proj_weight.safetensors",
|
851 |
+
"model.layers.73.mlp.up_proj.weight": "model_layers_73_mlp_up_proj_weight.safetensors",
|
852 |
+
"model.layers.73.post_attention_layernorm.weight": "model_layers_73_post_attention_layernorm_weight.safetensors",
|
853 |
+
"model.layers.73.self_attn.k_proj.bias": "model_layers_73_self_attn_k_proj_bias.safetensors",
|
854 |
+
"model.layers.73.self_attn.k_proj.weight": "model_layers_73_self_attn_k_proj_weight.safetensors",
|
855 |
+
"model.layers.73.self_attn.o_proj.weight": "model_layers_73_self_attn_o_proj_weight.safetensors",
|
856 |
+
"model.layers.73.self_attn.q_proj.bias": "model_layers_73_self_attn_q_proj_bias.safetensors",
|
857 |
+
"model.layers.73.self_attn.q_proj.weight": "model_layers_73_self_attn_q_proj_weight.safetensors",
|
858 |
+
"model.layers.73.self_attn.v_proj.bias": "model_layers_73_self_attn_v_proj_bias.safetensors",
|
859 |
+
"model.layers.73.self_attn.v_proj.weight": "model_layers_73_self_attn_v_proj_weight.safetensors",
|
860 |
+
"model.layers.74.input_layernorm.weight": "model_layers_74_input_layernorm_weight.safetensors",
|
861 |
+
"model.layers.74.mlp.down_proj.weight": "model_layers_74_mlp_down_proj_weight.safetensors",
|
862 |
+
"model.layers.74.mlp.gate_proj.weight": "model_layers_74_mlp_gate_proj_weight.safetensors",
|
863 |
+
"model.layers.74.mlp.up_proj.weight": "model_layers_74_mlp_up_proj_weight.safetensors",
|
864 |
+
"model.layers.74.post_attention_layernorm.weight": "model_layers_74_post_attention_layernorm_weight.safetensors",
|
865 |
+
"model.layers.74.self_attn.k_proj.bias": "model_layers_74_self_attn_k_proj_bias.safetensors",
|
866 |
+
"model.layers.74.self_attn.k_proj.weight": "model_layers_74_self_attn_k_proj_weight.safetensors",
|
867 |
+
"model.layers.74.self_attn.o_proj.weight": "model_layers_74_self_attn_o_proj_weight.safetensors",
|
868 |
+
"model.layers.74.self_attn.q_proj.bias": "model_layers_74_self_attn_q_proj_bias.safetensors",
|
869 |
+
"model.layers.74.self_attn.q_proj.weight": "model_layers_74_self_attn_q_proj_weight.safetensors",
|
870 |
+
"model.layers.74.self_attn.v_proj.bias": "model_layers_74_self_attn_v_proj_bias.safetensors",
|
871 |
+
"model.layers.74.self_attn.v_proj.weight": "model_layers_74_self_attn_v_proj_weight.safetensors",
|
872 |
+
"model.layers.75.input_layernorm.weight": "model_layers_75_input_layernorm_weight.safetensors",
|
873 |
+
"model.layers.75.mlp.down_proj.weight": "model_layers_75_mlp_down_proj_weight.safetensors",
|
874 |
+
"model.layers.75.mlp.gate_proj.weight": "model_layers_75_mlp_gate_proj_weight.safetensors",
|
875 |
+
"model.layers.75.mlp.up_proj.weight": "model_layers_75_mlp_up_proj_weight.safetensors",
|
876 |
+
"model.layers.75.post_attention_layernorm.weight": "model_layers_75_post_attention_layernorm_weight.safetensors",
|
877 |
+
"model.layers.75.self_attn.k_proj.bias": "model_layers_75_self_attn_k_proj_bias.safetensors",
|
878 |
+
"model.layers.75.self_attn.k_proj.weight": "model_layers_75_self_attn_k_proj_weight.safetensors",
|
879 |
+
"model.layers.75.self_attn.o_proj.weight": "model_layers_75_self_attn_o_proj_weight.safetensors",
|
880 |
+
"model.layers.75.self_attn.q_proj.bias": "model_layers_75_self_attn_q_proj_bias.safetensors",
|
881 |
+
"model.layers.75.self_attn.q_proj.weight": "model_layers_75_self_attn_q_proj_weight.safetensors",
|
882 |
+
"model.layers.75.self_attn.v_proj.bias": "model_layers_75_self_attn_v_proj_bias.safetensors",
|
883 |
+
"model.layers.75.self_attn.v_proj.weight": "model_layers_75_self_attn_v_proj_weight.safetensors",
|
884 |
+
"model.layers.76.input_layernorm.weight": "model_layers_76_input_layernorm_weight.safetensors",
|
885 |
+
"model.layers.76.mlp.down_proj.weight": "model_layers_76_mlp_down_proj_weight.safetensors",
|
886 |
+
"model.layers.76.mlp.gate_proj.weight": "model_layers_76_mlp_gate_proj_weight.safetensors",
|
887 |
+
"model.layers.76.mlp.up_proj.weight": "model_layers_76_mlp_up_proj_weight.safetensors",
|
888 |
+
"model.layers.76.post_attention_layernorm.weight": "model_layers_76_post_attention_layernorm_weight.safetensors",
|
889 |
+
"model.layers.76.self_attn.k_proj.bias": "model_layers_76_self_attn_k_proj_bias.safetensors",
|
890 |
+
"model.layers.76.self_attn.k_proj.weight": "model_layers_76_self_attn_k_proj_weight.safetensors",
|
891 |
+
"model.layers.76.self_attn.o_proj.weight": "model_layers_76_self_attn_o_proj_weight.safetensors",
|
892 |
+
"model.layers.76.self_attn.q_proj.bias": "model_layers_76_self_attn_q_proj_bias.safetensors",
|
893 |
+
"model.layers.76.self_attn.q_proj.weight": "model_layers_76_self_attn_q_proj_weight.safetensors",
|
894 |
+
"model.layers.76.self_attn.v_proj.bias": "model_layers_76_self_attn_v_proj_bias.safetensors",
|
895 |
+
"model.layers.76.self_attn.v_proj.weight": "model_layers_76_self_attn_v_proj_weight.safetensors",
|
896 |
+
"model.layers.77.input_layernorm.weight": "model_layers_77_input_layernorm_weight.safetensors",
|
897 |
+
"model.layers.77.mlp.down_proj.weight": "model_layers_77_mlp_down_proj_weight.safetensors",
|
898 |
+
"model.layers.77.mlp.gate_proj.weight": "model_layers_77_mlp_gate_proj_weight.safetensors",
|
899 |
+
"model.layers.77.mlp.up_proj.weight": "model_layers_77_mlp_up_proj_weight.safetensors",
|
900 |
+
"model.layers.77.post_attention_layernorm.weight": "model_layers_77_post_attention_layernorm_weight.safetensors",
|
901 |
+
"model.layers.77.self_attn.k_proj.bias": "model_layers_77_self_attn_k_proj_bias.safetensors",
|
902 |
+
"model.layers.77.self_attn.k_proj.weight": "model_layers_77_self_attn_k_proj_weight.safetensors",
|
903 |
+
"model.layers.77.self_attn.o_proj.weight": "model_layers_77_self_attn_o_proj_weight.safetensors",
|
904 |
+
"model.layers.77.self_attn.q_proj.bias": "model_layers_77_self_attn_q_proj_bias.safetensors",
|
905 |
+
"model.layers.77.self_attn.q_proj.weight": "model_layers_77_self_attn_q_proj_weight.safetensors",
|
906 |
+
"model.layers.77.self_attn.v_proj.bias": "model_layers_77_self_attn_v_proj_bias.safetensors",
|
907 |
+
"model.layers.77.self_attn.v_proj.weight": "model_layers_77_self_attn_v_proj_weight.safetensors",
|
908 |
+
"model.layers.78.input_layernorm.weight": "model_layers_78_input_layernorm_weight.safetensors",
|
909 |
+
"model.layers.78.mlp.down_proj.weight": "model_layers_78_mlp_down_proj_weight.safetensors",
|
910 |
+
"model.layers.78.mlp.gate_proj.weight": "model_layers_78_mlp_gate_proj_weight.safetensors",
|
911 |
+
"model.layers.78.mlp.up_proj.weight": "model_layers_78_mlp_up_proj_weight.safetensors",
|
912 |
+
"model.layers.78.post_attention_layernorm.weight": "model_layers_78_post_attention_layernorm_weight.safetensors",
|
913 |
+
"model.layers.78.self_attn.k_proj.bias": "model_layers_78_self_attn_k_proj_bias.safetensors",
|
914 |
+
"model.layers.78.self_attn.k_proj.weight": "model_layers_78_self_attn_k_proj_weight.safetensors",
|
915 |
+
"model.layers.78.self_attn.o_proj.weight": "model_layers_78_self_attn_o_proj_weight.safetensors",
|
916 |
+
"model.layers.78.self_attn.q_proj.bias": "model_layers_78_self_attn_q_proj_bias.safetensors",
|
917 |
+
"model.layers.78.self_attn.q_proj.weight": "model_layers_78_self_attn_q_proj_weight.safetensors",
|
918 |
+
"model.layers.78.self_attn.v_proj.bias": "model_layers_78_self_attn_v_proj_bias.safetensors",
|
919 |
+
"model.layers.78.self_attn.v_proj.weight": "model_layers_78_self_attn_v_proj_weight.safetensors",
|
920 |
+
"model.layers.79.input_layernorm.weight": "model_layers_79_input_layernorm_weight.safetensors",
|
921 |
+
"model.layers.79.mlp.down_proj.weight": "model_layers_79_mlp_down_proj_weight.safetensors",
|
922 |
+
"model.layers.79.mlp.gate_proj.weight": "model_layers_79_mlp_gate_proj_weight.safetensors",
|
923 |
+
"model.layers.79.mlp.up_proj.weight": "model_layers_79_mlp_up_proj_weight.safetensors",
|
924 |
+
"model.layers.79.post_attention_layernorm.weight": "model_layers_79_post_attention_layernorm_weight.safetensors",
|
925 |
+
"model.layers.79.self_attn.k_proj.bias": "model_layers_79_self_attn_k_proj_bias.safetensors",
|
926 |
+
"model.layers.79.self_attn.k_proj.weight": "model_layers_79_self_attn_k_proj_weight.safetensors",
|
927 |
+
"model.layers.79.self_attn.o_proj.weight": "model_layers_79_self_attn_o_proj_weight.safetensors",
|
928 |
+
"model.layers.79.self_attn.q_proj.bias": "model_layers_79_self_attn_q_proj_bias.safetensors",
|
929 |
+
"model.layers.79.self_attn.q_proj.weight": "model_layers_79_self_attn_q_proj_weight.safetensors",
|
930 |
+
"model.layers.79.self_attn.v_proj.bias": "model_layers_79_self_attn_v_proj_bias.safetensors",
|
931 |
+
"model.layers.79.self_attn.v_proj.weight": "model_layers_79_self_attn_v_proj_weight.safetensors",
|
932 |
"model.layers.7.input_layernorm.weight": "model_layers_7_input_layernorm_weight.safetensors",
|
933 |
"model.layers.7.mlp.down_proj.weight": "model_layers_7_mlp_down_proj_weight.safetensors",
|
934 |
"model.layers.7.mlp.gate_proj.weight": "model_layers_7_mlp_gate_proj_weight.safetensors",
|
|
|
964 |
"model.layers.9.self_attn.q_proj.bias": "model_layers_9_self_attn_q_proj_bias.safetensors",
|
965 |
"model.layers.9.self_attn.q_proj.weight": "model_layers_9_self_attn_q_proj_weight.safetensors",
|
966 |
"model.layers.9.self_attn.v_proj.bias": "model_layers_9_self_attn_v_proj_bias.safetensors",
|
967 |
+
"model.layers.9.self_attn.v_proj.weight": "model_layers_9_self_attn_v_proj_weight.safetensors",
|
968 |
+
"model.norm.weight": "model_norm_weight.safetensors"
|
969 |
}
|
970 |
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"151643": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"151644": {
|
13 |
+
"content": "<|im_start|>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"151645": {
|
21 |
+
"content": "<|im_end|>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
}
|
28 |
+
},
|
29 |
+
"additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
|
30 |
+
"bos_token": null,
|
31 |
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
32 |
+
"clean_up_tokenization_spaces": false,
|
33 |
+
"eos_token": "<|im_end|>",
|
34 |
+
"errors": "replace",
|
35 |
+
"model_max_length": 131072,
|
36 |
+
"pad_token": "<|endoftext|>",
|
37 |
+
"split_special_tokens": false,
|
38 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
39 |
+
"unk_token": null
|
40 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|