Upload folder using huggingface_hub

Files changed (10) hide show

chat_template.jinja CHANGED Viewed

@@ -82,5 +82,5 @@
     {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}
-    {{- '<|im_start|>assistant\n<think>\n' }}
 {%- endif %}

     {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
 {%- endif %}

config.json CHANGED Viewed

@@ -6,6 +6,7 @@
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "decoder_sparse_step": 1,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
@@ -27,6 +28,7 @@
   "quantization_config": {
     "config_groups": {
       "group_0": {
         "input_activations": null,
         "output_activations": null,
         "targets": [
@@ -36,9 +38,9 @@
           "actorder": null,
           "block_structure": null,
           "dynamic": false,
-          "group_size": 128,
           "num_bits": 4,
-          "observer": "minmax",
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
@@ -101,7 +103,10 @@
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
-    "quantization_status": "compressed"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
@@ -109,8 +114,7 @@
   "router_aux_loss_coef": 0.001,
   "sliding_window": null,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.52.4",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "decoder_sparse_step": 1,
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "quantization_config": {
     "config_groups": {
       "group_0": {
+        "format": "pack-quantized",
         "input_activations": null,
         "output_activations": null,
         "targets": [
           "actorder": null,
           "block_structure": null,
           "dynamic": false,
+          "group_size": 32,
           "num_bits": 4,
+          "observer": "mse",
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.10.3.dev47+ge463fe6"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "router_aux_loss_coef": 0.001,
   "sliding_window": null,
   "tie_word_embeddings": false,
+  "transformers_version": "4.56.0.dev0",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -6,8 +6,8 @@
     151643
   ],
   "pad_token_id": 151643,
-  "temperature": 0.6,
   "top_k": 20,
-  "top_p": 0.95,
-  "transformers_version": "4.52.4"
 }

     151643
   ],
   "pad_token_id": 151643,
+  "temperature": 0.7,
   "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.56.0.dev0"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:091006aa2f15cb171cb1267ed2682f4604878ce94587d488e5dd3f47a0ff35a5
-size 5001524144

 version https://git-lfs.github.com/spec/v1
+oid sha256:76d48c3f8f76ac78fcc76e637d34d2232f28f6a9ccc903be56bc1ad032d7a703
+size 5001707008

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44467b69b8db9f9547341b83fa9a34895e701521941c25d78ae72939c55b1cd6
-size 5001803304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c49c510b521eb6db97a782454aa295a59ea34c17546701e561c0615e201c9167
+size 5001283696

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa1fda8e2408bfec724ebf483b4c4887ce44c6d218bc9b9fd6e30d35f9fb16ed
-size 5002084152

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d62f239adb4a61cf68b41e9732d0fe20197c06bc7dd68637819286d6a55de53
+size 5001283912

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae2bd088d46d56d0296e35b7ee2815c6d56cf8d2910d85445e4f22272d20bb30
-size 1687667728

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ec04a666527d82063655688d0c5a2730cb6c993497248b3fd2e76b6c4d1b5b5
+size 3090232736

model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

recipe.yaml CHANGED Viewed

@@ -1,9 +1,26 @@
-default_stage:
-  default_modifiers:
     AWQModifier:
       targets: [Linear]
-      ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
-      scheme: W4A16
       mappings:
       - smooth_layer: re:.*input_layernorm$
         balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']

+quant_stage:
+  quant_modifiers:
     AWQModifier:
+      config_groups:
+        group_0:
+          targets: [Linear]
+          weights:
+            num_bits: 4
+            type: int
+            symmetric: true
+            group_size: 32
+            strategy: group
+            block_structure: null
+            dynamic: false
+            actorder: null
+            observer: mse
+            observer_kwargs: {}
+          input_activations: null
+          output_activations: null
+          format: null
       targets: [Linear]
+      ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
+        model.norm, 're:.*mlp.gate$']
       mappings:
       - smooth_layer: re:.*input_layernorm$
         balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']

tokenizer_config.json CHANGED Viewed

@@ -231,7 +231,7 @@
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
-  "model_max_length": 262144,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",

   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
+  "model_max_length": 1010000,
   "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",