Upload folder using huggingface_hub
Browse files- config.json +3 -1
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +1 -2
- recipe.yaml +3 -1
config.json
CHANGED
@@ -70,7 +70,9 @@
|
|
70 |
},
|
71 |
"format": "int-quantized",
|
72 |
"global_compression_ratio": null,
|
73 |
-
"ignore": [
|
|
|
|
|
74 |
"kv_cache_scheme": null,
|
75 |
"quant_method": "compressed-tensors",
|
76 |
"quantization_status": "compressed"
|
|
|
70 |
},
|
71 |
"format": "int-quantized",
|
72 |
"global_compression_ratio": null,
|
73 |
+
"ignore": [
|
74 |
+
"lm_head"
|
75 |
+
],
|
76 |
"kv_cache_scheme": null,
|
77 |
"quant_method": "compressed-tensors",
|
78 |
"quantization_status": "compressed"
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15b611ab3529b3899356f165b606a9b0abc4f0d7b4861ac58dbfd5fe10b8b93a
|
3 |
+
size 1149745568
|
model.safetensors.index.json
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
7 |
-
"lm_head.weight_scale": "model-00004-of-00004.safetensors",
|
8 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
9 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
10 |
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 16146612352
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
7 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
8 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
9 |
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
recipe.yaml
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
default_stage:
|
2 |
default_modifiers:
|
3 |
-
QuantizationModifier:
|
|
|
|
|
|
1 |
default_stage:
|
2 |
default_modifiers:
|
3 |
+
QuantizationModifier:
|
4 |
+
ignore: [lm_head]
|
5 |
+
scheme: INT8
|