lmmy commited on
Commit
2ef7c9f
·
verified ·
1 Parent(s): 78f058f

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ license_link: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct/blob/main/LICENSE
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - mlx
8
+ base_model: Qwen/Qwen3-Next-80B-A3B-Instruct
9
+ ---
10
+ ## 💫 Community Model> Qwen3-Next-80B-A3B-Instruct by Qwen
11
+
12
+ _👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)_.
13
+
14
+ **Model creator**: [Qwen](https://huggingface.co/Qwen)<br>
15
+ **Original model**: [Qwen3-Next-80B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct)<br>
16
+ **MLX quantization**: provided by [LM Studio team](https://x.com/lmstudio) using [mlx_lm](https://github.com/ml-explore/mlx-lm)<br>
17
+
18
+ ## Technical Details
19
+
20
+ 5-bit quantized version of Qwen3-Next-80B-A3B-Instruct using MLX, optimized for Apple Silicon.
21
+
22
+ ## Special thanks
23
+
24
+ 🙏 Special thanks to the [Apple Machine Learning Research](https://github.com/ml-explore) team for creating [MLX](https://github.com/ml-explore/mlx).
25
+
26
+ ## Disclaimers
27
+
28
+ LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,821 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3NextForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "decoder_sparse_step": 1,
8
+ "eos_token_id": 151645,
9
+ "full_attention_interval": 4,
10
+ "head_dim": 256,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 5120,
15
+ "linear_conv_kernel_dim": 4,
16
+ "linear_key_head_dim": 128,
17
+ "linear_num_key_heads": 16,
18
+ "linear_num_value_heads": 32,
19
+ "linear_value_head_dim": 128,
20
+ "max_position_embeddings": 262144,
21
+ "mlp_only_layers": [],
22
+ "model_type": "qwen3_next",
23
+ "moe_intermediate_size": 512,
24
+ "norm_topk_prob": true,
25
+ "num_attention_heads": 16,
26
+ "num_experts": 512,
27
+ "num_experts_per_tok": 10,
28
+ "num_hidden_layers": 48,
29
+ "num_key_value_heads": 2,
30
+ "output_router_logits": false,
31
+ "partial_rotary_factor": 0.25,
32
+ "quantization": {
33
+ "group_size": 64,
34
+ "bits": 5,
35
+ "mode": "affine",
36
+ "model.layers.0.mlp.gate": {
37
+ "group_size": 64,
38
+ "bits": 8
39
+ },
40
+ "model.layers.0.mlp.shared_expert_gate": {
41
+ "group_size": 64,
42
+ "bits": 8
43
+ },
44
+ "model.layers.1.mlp.gate": {
45
+ "group_size": 64,
46
+ "bits": 8
47
+ },
48
+ "model.layers.1.mlp.shared_expert_gate": {
49
+ "group_size": 64,
50
+ "bits": 8
51
+ },
52
+ "model.layers.2.mlp.gate": {
53
+ "group_size": 64,
54
+ "bits": 8
55
+ },
56
+ "model.layers.2.mlp.shared_expert_gate": {
57
+ "group_size": 64,
58
+ "bits": 8
59
+ },
60
+ "model.layers.3.mlp.gate": {
61
+ "group_size": 64,
62
+ "bits": 8
63
+ },
64
+ "model.layers.3.mlp.shared_expert_gate": {
65
+ "group_size": 64,
66
+ "bits": 8
67
+ },
68
+ "model.layers.4.mlp.gate": {
69
+ "group_size": 64,
70
+ "bits": 8
71
+ },
72
+ "model.layers.4.mlp.shared_expert_gate": {
73
+ "group_size": 64,
74
+ "bits": 8
75
+ },
76
+ "model.layers.5.mlp.gate": {
77
+ "group_size": 64,
78
+ "bits": 8
79
+ },
80
+ "model.layers.5.mlp.shared_expert_gate": {
81
+ "group_size": 64,
82
+ "bits": 8
83
+ },
84
+ "model.layers.6.mlp.gate": {
85
+ "group_size": 64,
86
+ "bits": 8
87
+ },
88
+ "model.layers.6.mlp.shared_expert_gate": {
89
+ "group_size": 64,
90
+ "bits": 8
91
+ },
92
+ "model.layers.7.mlp.gate": {
93
+ "group_size": 64,
94
+ "bits": 8
95
+ },
96
+ "model.layers.7.mlp.shared_expert_gate": {
97
+ "group_size": 64,
98
+ "bits": 8
99
+ },
100
+ "model.layers.8.mlp.gate": {
101
+ "group_size": 64,
102
+ "bits": 8
103
+ },
104
+ "model.layers.8.mlp.shared_expert_gate": {
105
+ "group_size": 64,
106
+ "bits": 8
107
+ },
108
+ "model.layers.9.mlp.gate": {
109
+ "group_size": 64,
110
+ "bits": 8
111
+ },
112
+ "model.layers.9.mlp.shared_expert_gate": {
113
+ "group_size": 64,
114
+ "bits": 8
115
+ },
116
+ "model.layers.10.mlp.gate": {
117
+ "group_size": 64,
118
+ "bits": 8
119
+ },
120
+ "model.layers.10.mlp.shared_expert_gate": {
121
+ "group_size": 64,
122
+ "bits": 8
123
+ },
124
+ "model.layers.11.mlp.gate": {
125
+ "group_size": 64,
126
+ "bits": 8
127
+ },
128
+ "model.layers.11.mlp.shared_expert_gate": {
129
+ "group_size": 64,
130
+ "bits": 8
131
+ },
132
+ "model.layers.12.mlp.gate": {
133
+ "group_size": 64,
134
+ "bits": 8
135
+ },
136
+ "model.layers.12.mlp.shared_expert_gate": {
137
+ "group_size": 64,
138
+ "bits": 8
139
+ },
140
+ "model.layers.13.mlp.gate": {
141
+ "group_size": 64,
142
+ "bits": 8
143
+ },
144
+ "model.layers.13.mlp.shared_expert_gate": {
145
+ "group_size": 64,
146
+ "bits": 8
147
+ },
148
+ "model.layers.14.mlp.gate": {
149
+ "group_size": 64,
150
+ "bits": 8
151
+ },
152
+ "model.layers.14.mlp.shared_expert_gate": {
153
+ "group_size": 64,
154
+ "bits": 8
155
+ },
156
+ "model.layers.15.mlp.gate": {
157
+ "group_size": 64,
158
+ "bits": 8
159
+ },
160
+ "model.layers.15.mlp.shared_expert_gate": {
161
+ "group_size": 64,
162
+ "bits": 8
163
+ },
164
+ "model.layers.16.mlp.gate": {
165
+ "group_size": 64,
166
+ "bits": 8
167
+ },
168
+ "model.layers.16.mlp.shared_expert_gate": {
169
+ "group_size": 64,
170
+ "bits": 8
171
+ },
172
+ "model.layers.17.mlp.gate": {
173
+ "group_size": 64,
174
+ "bits": 8
175
+ },
176
+ "model.layers.17.mlp.shared_expert_gate": {
177
+ "group_size": 64,
178
+ "bits": 8
179
+ },
180
+ "model.layers.18.mlp.gate": {
181
+ "group_size": 64,
182
+ "bits": 8
183
+ },
184
+ "model.layers.18.mlp.shared_expert_gate": {
185
+ "group_size": 64,
186
+ "bits": 8
187
+ },
188
+ "model.layers.19.mlp.gate": {
189
+ "group_size": 64,
190
+ "bits": 8
191
+ },
192
+ "model.layers.19.mlp.shared_expert_gate": {
193
+ "group_size": 64,
194
+ "bits": 8
195
+ },
196
+ "model.layers.20.mlp.gate": {
197
+ "group_size": 64,
198
+ "bits": 8
199
+ },
200
+ "model.layers.20.mlp.shared_expert_gate": {
201
+ "group_size": 64,
202
+ "bits": 8
203
+ },
204
+ "model.layers.21.mlp.gate": {
205
+ "group_size": 64,
206
+ "bits": 8
207
+ },
208
+ "model.layers.21.mlp.shared_expert_gate": {
209
+ "group_size": 64,
210
+ "bits": 8
211
+ },
212
+ "model.layers.22.mlp.gate": {
213
+ "group_size": 64,
214
+ "bits": 8
215
+ },
216
+ "model.layers.22.mlp.shared_expert_gate": {
217
+ "group_size": 64,
218
+ "bits": 8
219
+ },
220
+ "model.layers.23.mlp.gate": {
221
+ "group_size": 64,
222
+ "bits": 8
223
+ },
224
+ "model.layers.23.mlp.shared_expert_gate": {
225
+ "group_size": 64,
226
+ "bits": 8
227
+ },
228
+ "model.layers.24.mlp.gate": {
229
+ "group_size": 64,
230
+ "bits": 8
231
+ },
232
+ "model.layers.24.mlp.shared_expert_gate": {
233
+ "group_size": 64,
234
+ "bits": 8
235
+ },
236
+ "model.layers.25.mlp.gate": {
237
+ "group_size": 64,
238
+ "bits": 8
239
+ },
240
+ "model.layers.25.mlp.shared_expert_gate": {
241
+ "group_size": 64,
242
+ "bits": 8
243
+ },
244
+ "model.layers.26.mlp.gate": {
245
+ "group_size": 64,
246
+ "bits": 8
247
+ },
248
+ "model.layers.26.mlp.shared_expert_gate": {
249
+ "group_size": 64,
250
+ "bits": 8
251
+ },
252
+ "model.layers.27.mlp.gate": {
253
+ "group_size": 64,
254
+ "bits": 8
255
+ },
256
+ "model.layers.27.mlp.shared_expert_gate": {
257
+ "group_size": 64,
258
+ "bits": 8
259
+ },
260
+ "model.layers.28.mlp.gate": {
261
+ "group_size": 64,
262
+ "bits": 8
263
+ },
264
+ "model.layers.28.mlp.shared_expert_gate": {
265
+ "group_size": 64,
266
+ "bits": 8
267
+ },
268
+ "model.layers.29.mlp.gate": {
269
+ "group_size": 64,
270
+ "bits": 8
271
+ },
272
+ "model.layers.29.mlp.shared_expert_gate": {
273
+ "group_size": 64,
274
+ "bits": 8
275
+ },
276
+ "model.layers.30.mlp.gate": {
277
+ "group_size": 64,
278
+ "bits": 8
279
+ },
280
+ "model.layers.30.mlp.shared_expert_gate": {
281
+ "group_size": 64,
282
+ "bits": 8
283
+ },
284
+ "model.layers.31.mlp.gate": {
285
+ "group_size": 64,
286
+ "bits": 8
287
+ },
288
+ "model.layers.31.mlp.shared_expert_gate": {
289
+ "group_size": 64,
290
+ "bits": 8
291
+ },
292
+ "model.layers.32.mlp.gate": {
293
+ "group_size": 64,
294
+ "bits": 8
295
+ },
296
+ "model.layers.32.mlp.shared_expert_gate": {
297
+ "group_size": 64,
298
+ "bits": 8
299
+ },
300
+ "model.layers.33.mlp.gate": {
301
+ "group_size": 64,
302
+ "bits": 8
303
+ },
304
+ "model.layers.33.mlp.shared_expert_gate": {
305
+ "group_size": 64,
306
+ "bits": 8
307
+ },
308
+ "model.layers.34.mlp.gate": {
309
+ "group_size": 64,
310
+ "bits": 8
311
+ },
312
+ "model.layers.34.mlp.shared_expert_gate": {
313
+ "group_size": 64,
314
+ "bits": 8
315
+ },
316
+ "model.layers.35.mlp.gate": {
317
+ "group_size": 64,
318
+ "bits": 8
319
+ },
320
+ "model.layers.35.mlp.shared_expert_gate": {
321
+ "group_size": 64,
322
+ "bits": 8
323
+ },
324
+ "model.layers.36.mlp.gate": {
325
+ "group_size": 64,
326
+ "bits": 8
327
+ },
328
+ "model.layers.36.mlp.shared_expert_gate": {
329
+ "group_size": 64,
330
+ "bits": 8
331
+ },
332
+ "model.layers.37.mlp.gate": {
333
+ "group_size": 64,
334
+ "bits": 8
335
+ },
336
+ "model.layers.37.mlp.shared_expert_gate": {
337
+ "group_size": 64,
338
+ "bits": 8
339
+ },
340
+ "model.layers.38.mlp.gate": {
341
+ "group_size": 64,
342
+ "bits": 8
343
+ },
344
+ "model.layers.38.mlp.shared_expert_gate": {
345
+ "group_size": 64,
346
+ "bits": 8
347
+ },
348
+ "model.layers.39.mlp.gate": {
349
+ "group_size": 64,
350
+ "bits": 8
351
+ },
352
+ "model.layers.39.mlp.shared_expert_gate": {
353
+ "group_size": 64,
354
+ "bits": 8
355
+ },
356
+ "model.layers.40.mlp.gate": {
357
+ "group_size": 64,
358
+ "bits": 8
359
+ },
360
+ "model.layers.40.mlp.shared_expert_gate": {
361
+ "group_size": 64,
362
+ "bits": 8
363
+ },
364
+ "model.layers.41.mlp.gate": {
365
+ "group_size": 64,
366
+ "bits": 8
367
+ },
368
+ "model.layers.41.mlp.shared_expert_gate": {
369
+ "group_size": 64,
370
+ "bits": 8
371
+ },
372
+ "model.layers.42.mlp.gate": {
373
+ "group_size": 64,
374
+ "bits": 8
375
+ },
376
+ "model.layers.42.mlp.shared_expert_gate": {
377
+ "group_size": 64,
378
+ "bits": 8
379
+ },
380
+ "model.layers.43.mlp.gate": {
381
+ "group_size": 64,
382
+ "bits": 8
383
+ },
384
+ "model.layers.43.mlp.shared_expert_gate": {
385
+ "group_size": 64,
386
+ "bits": 8
387
+ },
388
+ "model.layers.44.mlp.gate": {
389
+ "group_size": 64,
390
+ "bits": 8
391
+ },
392
+ "model.layers.44.mlp.shared_expert_gate": {
393
+ "group_size": 64,
394
+ "bits": 8
395
+ },
396
+ "model.layers.45.mlp.gate": {
397
+ "group_size": 64,
398
+ "bits": 8
399
+ },
400
+ "model.layers.45.mlp.shared_expert_gate": {
401
+ "group_size": 64,
402
+ "bits": 8
403
+ },
404
+ "model.layers.46.mlp.gate": {
405
+ "group_size": 64,
406
+ "bits": 8
407
+ },
408
+ "model.layers.46.mlp.shared_expert_gate": {
409
+ "group_size": 64,
410
+ "bits": 8
411
+ },
412
+ "model.layers.47.mlp.gate": {
413
+ "group_size": 64,
414
+ "bits": 8
415
+ },
416
+ "model.layers.47.mlp.shared_expert_gate": {
417
+ "group_size": 64,
418
+ "bits": 8
419
+ }
420
+ },
421
+ "quantization_config": {
422
+ "group_size": 64,
423
+ "bits": 5,
424
+ "mode": "affine",
425
+ "model.layers.0.mlp.gate": {
426
+ "group_size": 64,
427
+ "bits": 8
428
+ },
429
+ "model.layers.0.mlp.shared_expert_gate": {
430
+ "group_size": 64,
431
+ "bits": 8
432
+ },
433
+ "model.layers.1.mlp.gate": {
434
+ "group_size": 64,
435
+ "bits": 8
436
+ },
437
+ "model.layers.1.mlp.shared_expert_gate": {
438
+ "group_size": 64,
439
+ "bits": 8
440
+ },
441
+ "model.layers.2.mlp.gate": {
442
+ "group_size": 64,
443
+ "bits": 8
444
+ },
445
+ "model.layers.2.mlp.shared_expert_gate": {
446
+ "group_size": 64,
447
+ "bits": 8
448
+ },
449
+ "model.layers.3.mlp.gate": {
450
+ "group_size": 64,
451
+ "bits": 8
452
+ },
453
+ "model.layers.3.mlp.shared_expert_gate": {
454
+ "group_size": 64,
455
+ "bits": 8
456
+ },
457
+ "model.layers.4.mlp.gate": {
458
+ "group_size": 64,
459
+ "bits": 8
460
+ },
461
+ "model.layers.4.mlp.shared_expert_gate": {
462
+ "group_size": 64,
463
+ "bits": 8
464
+ },
465
+ "model.layers.5.mlp.gate": {
466
+ "group_size": 64,
467
+ "bits": 8
468
+ },
469
+ "model.layers.5.mlp.shared_expert_gate": {
470
+ "group_size": 64,
471
+ "bits": 8
472
+ },
473
+ "model.layers.6.mlp.gate": {
474
+ "group_size": 64,
475
+ "bits": 8
476
+ },
477
+ "model.layers.6.mlp.shared_expert_gate": {
478
+ "group_size": 64,
479
+ "bits": 8
480
+ },
481
+ "model.layers.7.mlp.gate": {
482
+ "group_size": 64,
483
+ "bits": 8
484
+ },
485
+ "model.layers.7.mlp.shared_expert_gate": {
486
+ "group_size": 64,
487
+ "bits": 8
488
+ },
489
+ "model.layers.8.mlp.gate": {
490
+ "group_size": 64,
491
+ "bits": 8
492
+ },
493
+ "model.layers.8.mlp.shared_expert_gate": {
494
+ "group_size": 64,
495
+ "bits": 8
496
+ },
497
+ "model.layers.9.mlp.gate": {
498
+ "group_size": 64,
499
+ "bits": 8
500
+ },
501
+ "model.layers.9.mlp.shared_expert_gate": {
502
+ "group_size": 64,
503
+ "bits": 8
504
+ },
505
+ "model.layers.10.mlp.gate": {
506
+ "group_size": 64,
507
+ "bits": 8
508
+ },
509
+ "model.layers.10.mlp.shared_expert_gate": {
510
+ "group_size": 64,
511
+ "bits": 8
512
+ },
513
+ "model.layers.11.mlp.gate": {
514
+ "group_size": 64,
515
+ "bits": 8
516
+ },
517
+ "model.layers.11.mlp.shared_expert_gate": {
518
+ "group_size": 64,
519
+ "bits": 8
520
+ },
521
+ "model.layers.12.mlp.gate": {
522
+ "group_size": 64,
523
+ "bits": 8
524
+ },
525
+ "model.layers.12.mlp.shared_expert_gate": {
526
+ "group_size": 64,
527
+ "bits": 8
528
+ },
529
+ "model.layers.13.mlp.gate": {
530
+ "group_size": 64,
531
+ "bits": 8
532
+ },
533
+ "model.layers.13.mlp.shared_expert_gate": {
534
+ "group_size": 64,
535
+ "bits": 8
536
+ },
537
+ "model.layers.14.mlp.gate": {
538
+ "group_size": 64,
539
+ "bits": 8
540
+ },
541
+ "model.layers.14.mlp.shared_expert_gate": {
542
+ "group_size": 64,
543
+ "bits": 8
544
+ },
545
+ "model.layers.15.mlp.gate": {
546
+ "group_size": 64,
547
+ "bits": 8
548
+ },
549
+ "model.layers.15.mlp.shared_expert_gate": {
550
+ "group_size": 64,
551
+ "bits": 8
552
+ },
553
+ "model.layers.16.mlp.gate": {
554
+ "group_size": 64,
555
+ "bits": 8
556
+ },
557
+ "model.layers.16.mlp.shared_expert_gate": {
558
+ "group_size": 64,
559
+ "bits": 8
560
+ },
561
+ "model.layers.17.mlp.gate": {
562
+ "group_size": 64,
563
+ "bits": 8
564
+ },
565
+ "model.layers.17.mlp.shared_expert_gate": {
566
+ "group_size": 64,
567
+ "bits": 8
568
+ },
569
+ "model.layers.18.mlp.gate": {
570
+ "group_size": 64,
571
+ "bits": 8
572
+ },
573
+ "model.layers.18.mlp.shared_expert_gate": {
574
+ "group_size": 64,
575
+ "bits": 8
576
+ },
577
+ "model.layers.19.mlp.gate": {
578
+ "group_size": 64,
579
+ "bits": 8
580
+ },
581
+ "model.layers.19.mlp.shared_expert_gate": {
582
+ "group_size": 64,
583
+ "bits": 8
584
+ },
585
+ "model.layers.20.mlp.gate": {
586
+ "group_size": 64,
587
+ "bits": 8
588
+ },
589
+ "model.layers.20.mlp.shared_expert_gate": {
590
+ "group_size": 64,
591
+ "bits": 8
592
+ },
593
+ "model.layers.21.mlp.gate": {
594
+ "group_size": 64,
595
+ "bits": 8
596
+ },
597
+ "model.layers.21.mlp.shared_expert_gate": {
598
+ "group_size": 64,
599
+ "bits": 8
600
+ },
601
+ "model.layers.22.mlp.gate": {
602
+ "group_size": 64,
603
+ "bits": 8
604
+ },
605
+ "model.layers.22.mlp.shared_expert_gate": {
606
+ "group_size": 64,
607
+ "bits": 8
608
+ },
609
+ "model.layers.23.mlp.gate": {
610
+ "group_size": 64,
611
+ "bits": 8
612
+ },
613
+ "model.layers.23.mlp.shared_expert_gate": {
614
+ "group_size": 64,
615
+ "bits": 8
616
+ },
617
+ "model.layers.24.mlp.gate": {
618
+ "group_size": 64,
619
+ "bits": 8
620
+ },
621
+ "model.layers.24.mlp.shared_expert_gate": {
622
+ "group_size": 64,
623
+ "bits": 8
624
+ },
625
+ "model.layers.25.mlp.gate": {
626
+ "group_size": 64,
627
+ "bits": 8
628
+ },
629
+ "model.layers.25.mlp.shared_expert_gate": {
630
+ "group_size": 64,
631
+ "bits": 8
632
+ },
633
+ "model.layers.26.mlp.gate": {
634
+ "group_size": 64,
635
+ "bits": 8
636
+ },
637
+ "model.layers.26.mlp.shared_expert_gate": {
638
+ "group_size": 64,
639
+ "bits": 8
640
+ },
641
+ "model.layers.27.mlp.gate": {
642
+ "group_size": 64,
643
+ "bits": 8
644
+ },
645
+ "model.layers.27.mlp.shared_expert_gate": {
646
+ "group_size": 64,
647
+ "bits": 8
648
+ },
649
+ "model.layers.28.mlp.gate": {
650
+ "group_size": 64,
651
+ "bits": 8
652
+ },
653
+ "model.layers.28.mlp.shared_expert_gate": {
654
+ "group_size": 64,
655
+ "bits": 8
656
+ },
657
+ "model.layers.29.mlp.gate": {
658
+ "group_size": 64,
659
+ "bits": 8
660
+ },
661
+ "model.layers.29.mlp.shared_expert_gate": {
662
+ "group_size": 64,
663
+ "bits": 8
664
+ },
665
+ "model.layers.30.mlp.gate": {
666
+ "group_size": 64,
667
+ "bits": 8
668
+ },
669
+ "model.layers.30.mlp.shared_expert_gate": {
670
+ "group_size": 64,
671
+ "bits": 8
672
+ },
673
+ "model.layers.31.mlp.gate": {
674
+ "group_size": 64,
675
+ "bits": 8
676
+ },
677
+ "model.layers.31.mlp.shared_expert_gate": {
678
+ "group_size": 64,
679
+ "bits": 8
680
+ },
681
+ "model.layers.32.mlp.gate": {
682
+ "group_size": 64,
683
+ "bits": 8
684
+ },
685
+ "model.layers.32.mlp.shared_expert_gate": {
686
+ "group_size": 64,
687
+ "bits": 8
688
+ },
689
+ "model.layers.33.mlp.gate": {
690
+ "group_size": 64,
691
+ "bits": 8
692
+ },
693
+ "model.layers.33.mlp.shared_expert_gate": {
694
+ "group_size": 64,
695
+ "bits": 8
696
+ },
697
+ "model.layers.34.mlp.gate": {
698
+ "group_size": 64,
699
+ "bits": 8
700
+ },
701
+ "model.layers.34.mlp.shared_expert_gate": {
702
+ "group_size": 64,
703
+ "bits": 8
704
+ },
705
+ "model.layers.35.mlp.gate": {
706
+ "group_size": 64,
707
+ "bits": 8
708
+ },
709
+ "model.layers.35.mlp.shared_expert_gate": {
710
+ "group_size": 64,
711
+ "bits": 8
712
+ },
713
+ "model.layers.36.mlp.gate": {
714
+ "group_size": 64,
715
+ "bits": 8
716
+ },
717
+ "model.layers.36.mlp.shared_expert_gate": {
718
+ "group_size": 64,
719
+ "bits": 8
720
+ },
721
+ "model.layers.37.mlp.gate": {
722
+ "group_size": 64,
723
+ "bits": 8
724
+ },
725
+ "model.layers.37.mlp.shared_expert_gate": {
726
+ "group_size": 64,
727
+ "bits": 8
728
+ },
729
+ "model.layers.38.mlp.gate": {
730
+ "group_size": 64,
731
+ "bits": 8
732
+ },
733
+ "model.layers.38.mlp.shared_expert_gate": {
734
+ "group_size": 64,
735
+ "bits": 8
736
+ },
737
+ "model.layers.39.mlp.gate": {
738
+ "group_size": 64,
739
+ "bits": 8
740
+ },
741
+ "model.layers.39.mlp.shared_expert_gate": {
742
+ "group_size": 64,
743
+ "bits": 8
744
+ },
745
+ "model.layers.40.mlp.gate": {
746
+ "group_size": 64,
747
+ "bits": 8
748
+ },
749
+ "model.layers.40.mlp.shared_expert_gate": {
750
+ "group_size": 64,
751
+ "bits": 8
752
+ },
753
+ "model.layers.41.mlp.gate": {
754
+ "group_size": 64,
755
+ "bits": 8
756
+ },
757
+ "model.layers.41.mlp.shared_expert_gate": {
758
+ "group_size": 64,
759
+ "bits": 8
760
+ },
761
+ "model.layers.42.mlp.gate": {
762
+ "group_size": 64,
763
+ "bits": 8
764
+ },
765
+ "model.layers.42.mlp.shared_expert_gate": {
766
+ "group_size": 64,
767
+ "bits": 8
768
+ },
769
+ "model.layers.43.mlp.gate": {
770
+ "group_size": 64,
771
+ "bits": 8
772
+ },
773
+ "model.layers.43.mlp.shared_expert_gate": {
774
+ "group_size": 64,
775
+ "bits": 8
776
+ },
777
+ "model.layers.44.mlp.gate": {
778
+ "group_size": 64,
779
+ "bits": 8
780
+ },
781
+ "model.layers.44.mlp.shared_expert_gate": {
782
+ "group_size": 64,
783
+ "bits": 8
784
+ },
785
+ "model.layers.45.mlp.gate": {
786
+ "group_size": 64,
787
+ "bits": 8
788
+ },
789
+ "model.layers.45.mlp.shared_expert_gate": {
790
+ "group_size": 64,
791
+ "bits": 8
792
+ },
793
+ "model.layers.46.mlp.gate": {
794
+ "group_size": 64,
795
+ "bits": 8
796
+ },
797
+ "model.layers.46.mlp.shared_expert_gate": {
798
+ "group_size": 64,
799
+ "bits": 8
800
+ },
801
+ "model.layers.47.mlp.gate": {
802
+ "group_size": 64,
803
+ "bits": 8
804
+ },
805
+ "model.layers.47.mlp.shared_expert_gate": {
806
+ "group_size": 64,
807
+ "bits": 8
808
+ }
809
+ },
810
+ "rms_norm_eps": 1e-06,
811
+ "rope_scaling": null,
812
+ "rope_theta": 10000000,
813
+ "router_aux_loss_coef": 0.001,
814
+ "shared_expert_intermediate_size": 512,
815
+ "tie_word_embeddings": false,
816
+ "torch_dtype": "bfloat16",
817
+ "transformers_version": "4.57.0.dev0",
818
+ "use_cache": true,
819
+ "use_sliding_window": false,
820
+ "vocab_size": 151936
821
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
+ "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "4.57.0.dev0"
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190d929aa68634668aafc855295940a8fce3f593852226ac679cc4659bd26c39
3
+ size 5138145185
model-00002-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fcfe105bc2558d2d684e8046d083e4047df129b148da3e12caf3099585988a9
3
+ size 5295484498
model-00003-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e11a2064d9d3877a4144ca20435f2863f78c4cce71dba0803867e58b28c5ebf4
3
+ size 5268969275
model-00004-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdac2c0df1bd4677bdd1a3fafa72d48e179b0abce1c98cfebf4692c9b8b2e7d0
3
+ size 5295484681
model-00005-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b05769a6ba2a22b326b7ef7bc438e29af32de9c1e5a919e2394f324fe80ed0
3
+ size 5291004293
model-00006-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f57608f42405d357f2bc6b92f760e52b240cd539fa3a4bc3057c0de9db5cd7a
3
+ size 5268969280
model-00007-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495eb1215f90b033a030ec71873d94177f88840b116c68259718ad18cab51d0a
3
+ size 5295484759
model-00008-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39fd7f9f2ce1f7f7e8d23facefdd75c510f457495028a4445b681eef51018710
3
+ size 5295484631
model-00009-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6936e391b2d0b3598672eaca70e59f4f322f4d320c51db3ad837d38a3327963
3
+ size 5268969280
model-00010-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f388431933623429b622308fd858c481102bcfa4847e6f74e5b03b7909e81785
3
+ size 5295484657
model-00011-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b54c0e6ea0e4192e0e0d2a50176a957233bc40af7def76f5517cf15cefc6b4
3
+ size 2083627441
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 1010000,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null,
239
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}"
240
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff