chenjiel commited on
Commit
2538ded
Β·
unverified Β·
1 Parent(s): bfcfc53
.gitattributes CHANGED
File without changes
LICENSE CHANGED
File without changes
README.md CHANGED
File without changes
added_tokens.json CHANGED
File without changes
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
config.json CHANGED
@@ -1,29 +1,37 @@
1
  {
2
  "architectures": [
3
- "Qwen3ForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
 
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 25600,
14
  "max_position_embeddings": 40960,
15
- "max_window_layers": 64,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 64,
18
- "num_hidden_layers": 64,
19
- "num_key_value_heads": 8,
 
 
 
 
 
 
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
22
- "rope_theta": 1000000,
 
23
  "sliding_window": null,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
- "transformers_version": "4.51.3",
27
  "use_cache": true,
28
  "use_sliding_window": false,
29
  "vocab_size": 151936,
@@ -48,13 +56,65 @@
48
  }
49
  },
50
  "ignore": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "lm_head"
52
  ],
53
  "quant_algo": "NVFP4",
54
- "kv_cache_scheme": "FP8",
 
 
 
 
55
  "producer": {
56
  "name": "modelopt",
57
- "version": "0.32.1.dev10+gb3c37252c"
58
  },
59
  "quant_method": "modelopt"
60
  }
 
1
  {
2
  "architectures": [
3
+ "Qwen3MoeForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
  "max_position_embeddings": 40960,
16
+ "max_window_layers": 48,
17
+ "mlp_only_layers": [],
18
+ "model_type": "qwen3_moe",
19
+ "moe_intermediate_size": 768,
20
+ "norm_topk_prob": true,
21
+ "num_attention_heads": 32,
22
+ "num_experts": 128,
23
+ "num_experts_per_tok": 8,
24
+ "num_hidden_layers": 48,
25
+ "num_key_value_heads": 4,
26
+ "output_router_logits": false,
27
  "rms_norm_eps": 1e-06,
28
  "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "router_aux_loss_coef": 0.001,
31
  "sliding_window": null,
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.53.1",
35
  "use_cache": true,
36
  "use_sliding_window": false,
37
  "vocab_size": 151936,
 
56
  }
57
  },
58
  "ignore": [
59
+ "model.layers.0.mlp.gate",
60
+ "model.layers.1.mlp.gate",
61
+ "model.layers.10.mlp.gate",
62
+ "model.layers.11.mlp.gate",
63
+ "model.layers.12.mlp.gate",
64
+ "model.layers.13.mlp.gate",
65
+ "model.layers.14.mlp.gate",
66
+ "model.layers.15.mlp.gate",
67
+ "model.layers.16.mlp.gate",
68
+ "model.layers.17.mlp.gate",
69
+ "model.layers.18.mlp.gate",
70
+ "model.layers.19.mlp.gate",
71
+ "model.layers.2.mlp.gate",
72
+ "model.layers.20.mlp.gate",
73
+ "model.layers.21.mlp.gate",
74
+ "model.layers.22.mlp.gate",
75
+ "model.layers.23.mlp.gate",
76
+ "model.layers.24.mlp.gate",
77
+ "model.layers.25.mlp.gate",
78
+ "model.layers.26.mlp.gate",
79
+ "model.layers.27.mlp.gate",
80
+ "model.layers.28.mlp.gate",
81
+ "model.layers.29.mlp.gate",
82
+ "model.layers.3.mlp.gate",
83
+ "model.layers.30.mlp.gate",
84
+ "model.layers.31.mlp.gate",
85
+ "model.layers.32.mlp.gate",
86
+ "model.layers.33.mlp.gate",
87
+ "model.layers.34.mlp.gate",
88
+ "model.layers.35.mlp.gate",
89
+ "model.layers.36.mlp.gate",
90
+ "model.layers.37.mlp.gate",
91
+ "model.layers.38.mlp.gate",
92
+ "model.layers.39.mlp.gate",
93
+ "model.layers.4.mlp.gate",
94
+ "model.layers.40.mlp.gate",
95
+ "model.layers.41.mlp.gate",
96
+ "model.layers.42.mlp.gate",
97
+ "model.layers.43.mlp.gate",
98
+ "model.layers.44.mlp.gate",
99
+ "model.layers.45.mlp.gate",
100
+ "model.layers.46.mlp.gate",
101
+ "model.layers.47.mlp.gate",
102
+ "model.layers.5.mlp.gate",
103
+ "model.layers.6.mlp.gate",
104
+ "model.layers.7.mlp.gate",
105
+ "model.layers.8.mlp.gate",
106
+ "model.layers.9.mlp.gate",
107
  "lm_head"
108
  ],
109
  "quant_algo": "NVFP4",
110
+ "kv_cache_scheme": {
111
+ "dynamic": false,
112
+ "num_bits": 8,
113
+ "type": "float"
114
+ },
115
  "producer": {
116
  "name": "modelopt",
117
+ "version": "0.34.1.dev85+g7a72957d"
118
  },
119
  "quant_method": "modelopt"
120
  }
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
- "transformers_version": "4.51.3"
13
  }
 
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
+ "transformers_version": "4.53.1"
13
  }
hf_quant_config.json CHANGED
@@ -1,13 +1,61 @@
1
  {
2
  "producer": {
3
  "name": "modelopt",
4
- "version": "0.33.0"
5
  },
6
  "quantization": {
7
  "quant_algo": "NVFP4",
8
  "kv_cache_quant_algo": "FP8",
9
  "group_size": 16,
10
  "exclude_modules": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "lm_head"
12
  ]
13
  }
 
1
  {
2
  "producer": {
3
  "name": "modelopt",
4
+ "version": "0.34.1.dev85+g7a72957d"
5
  },
6
  "quantization": {
7
  "quant_algo": "NVFP4",
8
  "kv_cache_quant_algo": "FP8",
9
  "group_size": 16,
10
  "exclude_modules": [
11
+ "model.layers.0.mlp.gate",
12
+ "model.layers.1.mlp.gate",
13
+ "model.layers.10.mlp.gate",
14
+ "model.layers.11.mlp.gate",
15
+ "model.layers.12.mlp.gate",
16
+ "model.layers.13.mlp.gate",
17
+ "model.layers.14.mlp.gate",
18
+ "model.layers.15.mlp.gate",
19
+ "model.layers.16.mlp.gate",
20
+ "model.layers.17.mlp.gate",
21
+ "model.layers.18.mlp.gate",
22
+ "model.layers.19.mlp.gate",
23
+ "model.layers.2.mlp.gate",
24
+ "model.layers.20.mlp.gate",
25
+ "model.layers.21.mlp.gate",
26
+ "model.layers.22.mlp.gate",
27
+ "model.layers.23.mlp.gate",
28
+ "model.layers.24.mlp.gate",
29
+ "model.layers.25.mlp.gate",
30
+ "model.layers.26.mlp.gate",
31
+ "model.layers.27.mlp.gate",
32
+ "model.layers.28.mlp.gate",
33
+ "model.layers.29.mlp.gate",
34
+ "model.layers.3.mlp.gate",
35
+ "model.layers.30.mlp.gate",
36
+ "model.layers.31.mlp.gate",
37
+ "model.layers.32.mlp.gate",
38
+ "model.layers.33.mlp.gate",
39
+ "model.layers.34.mlp.gate",
40
+ "model.layers.35.mlp.gate",
41
+ "model.layers.36.mlp.gate",
42
+ "model.layers.37.mlp.gate",
43
+ "model.layers.38.mlp.gate",
44
+ "model.layers.39.mlp.gate",
45
+ "model.layers.4.mlp.gate",
46
+ "model.layers.40.mlp.gate",
47
+ "model.layers.41.mlp.gate",
48
+ "model.layers.42.mlp.gate",
49
+ "model.layers.43.mlp.gate",
50
+ "model.layers.44.mlp.gate",
51
+ "model.layers.45.mlp.gate",
52
+ "model.layers.46.mlp.gate",
53
+ "model.layers.47.mlp.gate",
54
+ "model.layers.5.mlp.gate",
55
+ "model.layers.6.mlp.gate",
56
+ "model.layers.7.mlp.gate",
57
+ "model.layers.8.mlp.gate",
58
+ "model.layers.9.mlp.gate",
59
  "lm_head"
60
  ]
61
  }
merges.txt CHANGED
File without changes
model-00001-of-00005.safetensors β†’ model-00001-of-00004.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8a2eae555271d8b16b4ddd52b4bf23dc068e9565616c51e3577fba991f76ced
3
- size 4974154840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15bb083c92a763a643972134681a65e0953122df749fb0d236ca905e78e709bd
3
+ size 5002180600
model-00002-of-00005.safetensors β†’ model-00002-of-00004.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66854e851fe054dc7b55bb8c1bbdfae9544d2838931e97bf80398959aa1e383b
3
- size 4937271816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6edfed51af3b8c6ca57fc0020290f9da253e9eb2170d7ef051517a845c6b82
3
+ size 5002610584
model-00003-of-00005.safetensors β†’ model-00003-of-00004.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:870d285637f49d5dba32fda2ff8fc89cf3bb59e863e2d19749953d50ecc822e0
3
- size 4937271816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57417b5d0bd8b092024608fa98ae8af722c05fe2abb3640a25326a9c231ad939
3
+ size 5001923016
model-00004-of-00005.safetensors β†’ model-00004-of-00004.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c1a3a51c42e6507f7fbeeb754fd397cf0958070694ce63d3359aaff5b40f699
3
- size 4261880984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06c1a1a674d5cd7ccba0a1cd4c56921b573a8c874e36ef02549913daf3efa501
3
+ size 3089614888
model-00005-of-00005.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd6a7104a886641f322332c121ef34cfaa312ed757e3cb019b14c26002a8fb6
3
- size 1555824768
 
 
 
 
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
File without changes
tokenizer.json CHANGED
File without changes
tokenizer_config.json CHANGED
@@ -227,7 +227,6 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|endoftext|>",
233
  "errors": "replace",
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
 
230
  "clean_up_tokenization_spaces": false,
231
  "eos_token": "<|endoftext|>",
232
  "errors": "replace",
vocab.json CHANGED
File without changes