cpatonn commited on
Commit
fa8b829
·
verified ·
1 Parent(s): 5976f46

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -82,5 +82,5 @@
82
  {%- endif %}
83
  {%- endfor %}
84
  {%- if add_generation_prompt %}
85
- {{- '<|im_start|>assistant\n<think>\n' }}
86
  {%- endif %}
 
82
  {%- endif %}
83
  {%- endfor %}
84
  {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
  {%- endif %}
config.json CHANGED
@@ -6,6 +6,7 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "decoder_sparse_step": 1,
 
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
@@ -27,6 +28,7 @@
27
  "quantization_config": {
28
  "config_groups": {
29
  "group_0": {
 
30
  "input_activations": null,
31
  "output_activations": null,
32
  "targets": [
@@ -36,9 +38,9 @@
36
  "actorder": null,
37
  "block_structure": null,
38
  "dynamic": false,
39
- "group_size": 128,
40
  "num_bits": 4,
41
- "observer": "minmax",
42
  "observer_kwargs": {},
43
  "strategy": "group",
44
  "symmetric": true,
@@ -101,7 +103,10 @@
101
  ],
102
  "kv_cache_scheme": null,
103
  "quant_method": "compressed-tensors",
104
- "quantization_status": "compressed"
 
 
 
105
  },
106
  "rms_norm_eps": 1e-06,
107
  "rope_scaling": null,
@@ -109,8 +114,7 @@
109
  "router_aux_loss_coef": 0.001,
110
  "sliding_window": null,
111
  "tie_word_embeddings": false,
112
- "torch_dtype": "bfloat16",
113
- "transformers_version": "4.52.4",
114
  "use_cache": true,
115
  "use_sliding_window": false,
116
  "vocab_size": 151936
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "decoder_sparse_step": 1,
9
+ "dtype": "bfloat16",
10
  "eos_token_id": 151645,
11
  "head_dim": 128,
12
  "hidden_act": "silu",
 
28
  "quantization_config": {
29
  "config_groups": {
30
  "group_0": {
31
+ "format": "pack-quantized",
32
  "input_activations": null,
33
  "output_activations": null,
34
  "targets": [
 
38
  "actorder": null,
39
  "block_structure": null,
40
  "dynamic": false,
41
+ "group_size": 32,
42
  "num_bits": 4,
43
+ "observer": "mse",
44
  "observer_kwargs": {},
45
  "strategy": "group",
46
  "symmetric": true,
 
103
  ],
104
  "kv_cache_scheme": null,
105
  "quant_method": "compressed-tensors",
106
+ "quantization_status": "compressed",
107
+ "sparsity_config": {},
108
+ "transform_config": {},
109
+ "version": "0.10.3.dev47+ge463fe6"
110
  },
111
  "rms_norm_eps": 1e-06,
112
  "rope_scaling": null,
 
114
  "router_aux_loss_coef": 0.001,
115
  "sliding_window": null,
116
  "tie_word_embeddings": false,
117
+ "transformers_version": "4.56.0.dev0",
 
118
  "use_cache": true,
119
  "use_sliding_window": false,
120
  "vocab_size": 151936
generation_config.json CHANGED
@@ -6,8 +6,8 @@
6
  151643
7
  ],
8
  "pad_token_id": 151643,
9
- "temperature": 0.6,
10
  "top_k": 20,
11
- "top_p": 0.95,
12
- "transformers_version": "4.52.4"
13
  }
 
6
  151643
7
  ],
8
  "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
  "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "4.56.0.dev0"
13
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:091006aa2f15cb171cb1267ed2682f4604878ce94587d488e5dd3f47a0ff35a5
3
- size 5001524144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d48c3f8f76ac78fcc76e637d34d2232f28f6a9ccc903be56bc1ad032d7a703
3
+ size 5001707008
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44467b69b8db9f9547341b83fa9a34895e701521941c25d78ae72939c55b1cd6
3
- size 5001803304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49c510b521eb6db97a782454aa295a59ea34c17546701e561c0615e201c9167
3
+ size 5001283696
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1fda8e2408bfec724ebf483b4c4887ce44c6d218bc9b9fd6e30d35f9fb16ed
3
- size 5002084152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d62f239adb4a61cf68b41e9732d0fe20197c06bc7dd68637819286d6a55de53
3
+ size 5001283912
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae2bd088d46d56d0296e35b7ee2815c6d56cf8d2910d85445e4f22272d20bb30
3
- size 1687667728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec04a666527d82063655688d0c5a2730cb6c993497248b3fd2e76b6c4d1b5b5
3
+ size 3090232736
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -1,9 +1,26 @@
1
- default_stage:
2
- default_modifiers:
3
  AWQModifier:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  targets: [Linear]
5
- ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
6
- scheme: W4A16
7
  mappings:
8
  - smooth_layer: re:.*input_layernorm$
9
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
 
1
+ quant_stage:
2
+ quant_modifiers:
3
  AWQModifier:
4
+ config_groups:
5
+ group_0:
6
+ targets: [Linear]
7
+ weights:
8
+ num_bits: 4
9
+ type: int
10
+ symmetric: true
11
+ group_size: 32
12
+ strategy: group
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ observer: mse
17
+ observer_kwargs: {}
18
+ input_activations: null
19
+ output_activations: null
20
+ format: null
21
  targets: [Linear]
22
+ ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
23
+ model.norm, 're:.*mlp.gate$']
24
  mappings:
25
  - smooth_layer: re:.*input_layernorm$
26
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
tokenizer_config.json CHANGED
@@ -231,7 +231,7 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 262144,
235
  "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 1010000,
235
  "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",