| test_stage: | |
| obcq_modifiers: | |
| SmoothQuantModifier: | |
| smoothing_strength: 0.8 | |
| mappings: | |
| - - - re:.*q_proj | |
| - re:.*k_proj | |
| - re:.*v_proj | |
| - re:.*input_layernorm | |
| - - - re:.*gate_proj | |
| - re:.*up_proj | |
| - re:.*post_attention_layernorm | |
| - - - re:.*down_proj | |
| - re:.*up_proj | |
| QuantizationModifier: | |
| ignore: | |
| - LlamaRotaryEmbedding | |
| - LlamaRMSNorm | |
| - SiLUActivation | |
| - model.layers.1.mlp.down_proj | |
| - model.layers.30.mlp.down_proj | |
| - model.layers.0.mlp.down_proj | |
| post_oneshot_calibration: true | |
| scheme_overrides: | |
| Linear: | |
| weights: | |
| num_bits: 8 | |
| symmetric: true | |
| strategy: channel | |
| MatMulLeftInput_QK: | |
| input_activations: | |
| num_bits: 8 | |
| symmetric: true | |
| Embedding: | |
| input_activations: null | |
| weights: | |
| num_bits: 8 | |
| symmetric: false | |
| SparseGPTModifier: | |
| sparsity: 0.0 | |
| block_size: 128 | |
| sequential_update: false | |
| quantize: true | |
| percdamp: 0.01 | |
| mask_structure: 0:0 | |
| targets: | |
| - model.layers.0 | |
| - model.layers.1 | |
| - model.layers.2 | |
| - model.layers.3 | |
| - model.layers.4 | |
| - model.layers.5 | |
| - model.layers.6 | |
| - model.layers.7 | |
| - model.layers.8 | |
| - model.layers.9 | |
| - model.layers.10 | |
| - model.layers.11 | |
| - model.layers.12 | |
| - model.layers.13 | |
| - model.layers.14 | |
| - model.layers.15 | |
| - model.layers.16 | |
| - model.layers.17 | |
| - model.layers.18 | |
| - model.layers.19 | |
| - model.layers.20 | |
| - model.layers.21 | |
| - model.layers.22 | |
| - model.layers.23 | |
| - model.layers.24 | |
| - model.layers.25 | |
| - model.layers.26 | |
| - model.layers.27 | |
| - model.layers.28 | |
| - model.layers.29 | |
| - model.layers.30 | |
| - model.layers.31 | |
| - lm_head | |