quant_config:
  algo: rtn
  kwargs:
    w_dtype: int8    # select from ['int2', 'int3', 'int4', 'int8', 'float16']
    a_dtype: int8   # select from ['int2', 'int3', 'int4', 'int8', 'float16']
    calibrate_seq_length: 2048
    device: 'cuda'
    offload: 'cpu'
    w_groupsize: -1
    a_groupsize: -1
    a_qtype: "per_tensor"              
    w_qtype: "per_channel"             
    w_has_zero: False
    a_has_zero: False
    w_unsign: True
    a_unsign: True
    quantization_type: 'static'
    block_sequential: True
    layer_sequential: True
    skip_layers:                   # the linear layer to skip, should match the module name
      - lm_head
  calibrate_config:
    name: 'cmmlu'
    split: val
    calibrate_subject: STEM
    calibrate_nums:
      anatomy: 7
      astronomy: 7
      college_actuarial_science: 7
      college_engineering_hydrology: 7
      college_mathematics: 7
      college_medical_statistics: 7
      computer_science: 7
      conceptual_physics: 7
      electrical_engineering: 8
      elementary_mathematics: 8
      genetics: 8
      high_school_biology: 8
      high_school_chemistry: 8
      high_school_mathematics: 8
      high_school_physics: 8
      machine_learning: 8
      virology: 8
    calibrate_seqlen: 2048
    shuffle: False
