default_stage: | |
default_modifiers: | |
QuantizationModifier: | |
config_groups: | |
attention_quant: | |
targets: ['re:.*self_attn$'] | |
weights: null | |
input_activations: | |
num_bits: 8 | |
type: float | |
symmetric: true | |
group_size: null | |
strategy: !!python/object/apply:compressed_tensors.quantization.quant_args.QuantizationStrategy [ | |
tensor] | |
block_structure: null | |
dynamic: false | |
actorder: null | |
observer: minmax | |
observer_kwargs: {} | |
output_activations: null | |
targets: [Linear] | |
ignore: [lm_head] | |