  quant_config:
    algo: rtn
    kwargs:
      w_dtype: int8    # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      a_dtype: int8   # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      calibrate_seq_length: 2048
      device: 'cuda'
      offload: 'cpu'
      w_groupsize: -1
      a_groupsize: -1
      a_qtype: "per_tensor"              
      w_qtype: "per_channel"             
      w_has_zero: False
      a_has_zero: False
      w_unsign: True
      a_unsign: True
      quantization_type: 'static'
      block_sequential: True
      layer_sequential: True
      skip_layers:                   # the linear layer to skip, should match the module name
        - lm_head
    calibrate_config:
      name: 'ceval'
      split: test
      calibrate_subject: 'Social Science'
      calibrate_nums:
        college_economics: 13
        business_administration: 13
        marxism: 13
        mao_zedong_thought: 12
        education_science: 13
        teacher_qualification: 13
        high_school_politics: 13
        high_school_geography: 13
        middle_school_politics: 13
        middle_school_geography: 12
      calibrate_seqlen: 2048
      shuffle: False
  