  quant_config:
    algo: rtn
    kwargs:
      model_type: "llama"
      w_dtype: int8    # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      a_dtype: int8   # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      calibrate_seq_length: 2048
      device: 'cuda'
      offload: 'cpu'
      w_groupsize: -1
      a_groupsize: -1
      a_qtype: "per_tensor"              
      w_qtype: "per_channel"             
      w_has_zero: False
      a_has_zero: False
      w_unsign: True
      a_unsign: True
      quantization_type: 'static'
      block_sequential: True
      layer_sequential: True
      skip_layers:                   # the linear layer to skip, should match the module name
        - lm_head
    calibrate_config:
      name: 'ceval'
      split: test
      calibrate_subject: STEM
      calibrate_nums:
        computer_network: 6
        operating_system: 6
        computer_architecture: 6
        college_programming: 6
        college_physics: 6
        college_chemistry: 6
        advanced_mathematics: 6
        probability_and_statistics: 6
        discrete_mathematics: 6
        electrical_engineer: 6
        metrology_engineer: 6
        high_school_mathematics: 6
        high_school_physics: 7
        high_school_chemistry: 7
        high_school_biology: 7
        middle_school_mathematics: 7
        middle_school_biology: 7
        middle_school_physics: 7
        middle_school_chemistry: 7
        veterinary_medicine: 7
      calibrate_seqlen: 2048
      shuffle: False
  