cpatonn commited on
Commit
f58e71f
·
verified ·
1 Parent(s): f0e2dc1

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -6,6 +6,7 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "decoder_sparse_step": 1,
 
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
@@ -27,6 +28,7 @@
27
  "quantization_config": {
28
  "config_groups": {
29
  "group_0": {
 
30
  "input_activations": null,
31
  "output_activations": null,
32
  "targets": [
@@ -36,9 +38,9 @@
36
  "actorder": null,
37
  "block_structure": null,
38
  "dynamic": false,
39
- "group_size": 128,
40
  "num_bits": 4,
41
- "observer": "minmax",
42
  "observer_kwargs": {},
43
  "strategy": "group",
44
  "symmetric": true,
@@ -101,7 +103,10 @@
101
  ],
102
  "kv_cache_scheme": null,
103
  "quant_method": "compressed-tensors",
104
- "quantization_status": "compressed"
 
 
 
105
  },
106
  "rms_norm_eps": 1e-06,
107
  "rope_scaling": null,
@@ -109,8 +114,7 @@
109
  "router_aux_loss_coef": 0.001,
110
  "sliding_window": null,
111
  "tie_word_embeddings": false,
112
- "torch_dtype": "bfloat16",
113
- "transformers_version": "4.54.1",
114
  "use_cache": true,
115
  "use_sliding_window": false,
116
  "vocab_size": 151936
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
  "decoder_sparse_step": 1,
9
+ "dtype": "bfloat16",
10
  "eos_token_id": 151645,
11
  "head_dim": 128,
12
  "hidden_act": "silu",
 
28
  "quantization_config": {
29
  "config_groups": {
30
  "group_0": {
31
+ "format": "pack-quantized",
32
  "input_activations": null,
33
  "output_activations": null,
34
  "targets": [
 
38
  "actorder": null,
39
  "block_structure": null,
40
  "dynamic": false,
41
+ "group_size": 32,
42
  "num_bits": 4,
43
+ "observer": "mse",
44
  "observer_kwargs": {},
45
  "strategy": "group",
46
  "symmetric": true,
 
103
  ],
104
  "kv_cache_scheme": null,
105
  "quant_method": "compressed-tensors",
106
+ "quantization_status": "compressed",
107
+ "sparsity_config": {},
108
+ "transform_config": {},
109
+ "version": "0.10.3.dev47+ge463fe6"
110
  },
111
  "rms_norm_eps": 1e-06,
112
  "rope_scaling": null,
 
114
  "router_aux_loss_coef": 0.001,
115
  "sliding_window": null,
116
  "tie_word_embeddings": false,
117
+ "transformers_version": "4.56.0.dev0",
 
118
  "use_cache": true,
119
  "use_sliding_window": false,
120
  "vocab_size": 151936
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
- "transformers_version": "4.54.1"
13
  }
 
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
+ "transformers_version": "4.56.0.dev0"
13
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:426c24404bc7dc9205ba3b6926164882c67690912402ebc2decd4d357e38ac21
3
- size 5001524144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e60166d0bd6fa030bd248b01cdf89d1626d905ae4701a2f8b52279dbc18722c9
3
+ size 5001707008
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e730b42ec8cdc491dba5764249dfe20e4ad87788c70a81132a42b50b68d6a980
3
- size 5001803304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a8400a155c96c23041c560894a29477cd3ee20ba2a38fe5ec58f21250350ef4
3
+ size 5001283696
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:778ef0a625188686aeea5e971f81af14aefd2f10cab171815ce904826727f77f
3
- size 5002084152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d956b31607b9d9ed81182a6db56cc837068e2cfa566001a880f7f870f1d830ad
3
+ size 5001283912
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1edce07fe26507970c93dd769d88e4f026655b3d0680641c9b5613f2a22e660e
3
- size 1687667728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7f890df5e46bce28631f3a9d428ec29419b7ee254f70be50279036a77e6fe8
3
+ size 3090232736
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -1,9 +1,26 @@
1
- default_stage:
2
- default_modifiers:
3
  AWQModifier:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  targets: [Linear]
5
- ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
6
- scheme: W4A16
7
  mappings:
8
  - smooth_layer: re:.*input_layernorm$
9
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
 
1
+ quant_stage:
2
+ quant_modifiers:
3
  AWQModifier:
4
+ config_groups:
5
+ group_0:
6
+ targets: [Linear]
7
+ weights:
8
+ num_bits: 4
9
+ type: int
10
+ symmetric: true
11
+ group_size: 32
12
+ strategy: group
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ observer: mse
17
+ observer_kwargs: {}
18
+ input_activations: null
19
+ output_activations: null
20
+ format: null
21
  targets: [Linear]
22
+ ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
23
+ model.norm, 're:.*mlp.gate$']
24
  mappings:
25
  - smooth_layer: re:.*input_layernorm$
26
  balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
tokenizer_config.json CHANGED
@@ -231,7 +231,7 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 262144,
235
  "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 1010000,
235
  "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",