|
default_stage: |
|
default_modifiers: |
|
AWQModifier: |
|
config_groups: |
|
group_0: |
|
targets: [Linear] |
|
weights: |
|
num_bits: 8 |
|
type: int |
|
symmetric: true |
|
group_size: 32 |
|
strategy: group |
|
block_structure: null |
|
dynamic: false |
|
actorder: null |
|
observer: minmax |
|
observer_kwargs: {} |
|
input_activations: null |
|
output_activations: null |
|
format: null |
|
targets: [Linear] |
|
ignore: [model.embed_tokens, 're:.*input_layernorm$', 're:.*linear_attn.*', 're:.*norm.*', |
|
're:.*RMSNorm.*', 're:.*rotary.*', 're:.*shared_expert.*', 're:.*shared_expert_gate$', |
|
're:.*mlp[.]gate$', 're:.*router.*', 're:.*post_attention_layernorm$', 're:.*self_attn.*', |
|
're:mtp.*', lm_head] |
|
mappings: |
|
- smooth_layer: re:.*input_layernorm$ |
|
balance_layers: ['re:.*self_attn[.]q_proj$', 're:.*self_attn[.]k_proj$', 're:.*self_attn[.]v_proj$', |
|
're:.*linear_attn[.]in_proj_qkvz$', 're:.*linear_attn[.]in_proj_ba$'] |
|
- smooth_layer: re:.*self_attn[.]v_proj$ |
|
balance_layers: ['re:.*self_attn[.]o_proj$'] |
|
- smooth_layer: re:.*post_attention_layernorm$ |
|
balance_layers: ['re:.*gate_proj$', 're:.*up_proj$'] |
|
- smooth_layer: re:.*up_proj$ |
|
balance_layers: ['re:.*down_proj$'] |
|
- smooth_layer: re:.*linear_attn[.]norm$ |
|
balance_layers: ['re:.*linear_attn[.]out_proj$'] |
|
duo_scaling: true |
|
|