  quant_config:
    algo: rtn
    kwargs:
      w_dtype: int8    # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      a_dtype: int8   # select from ['int2', 'int3', 'int4', 'int8', 'float16']
      calibrate_seq_length: 2048
      device: 'cuda'
      offload: 'cpu'
      w_groupsize: -1
      a_groupsize: -1
      a_qtype: "per_tensor"              
      w_qtype: "per_channel"             
      w_has_zero: False
      a_has_zero: False
      w_unsign: True
      a_unsign: True
      quantization_type: 'static'
      block_sequential: True
      layer_sequential: True
      skip_layers:                   # the linear layer to skip, should match the module name
        - lm_head
    calibrate_config:
      name: 'ceval'
      split: test
      calibrate_subject: 'Humanities'
      calibrate_nums:
        modern_chinese_history: 11
        ideological_and_moral_cultivation: 12
        logic: 12
        law: 11
        chinese_language_and_literature: 12
        art_studies: 12
        professional_tour_guide: 11
        legal_professional: 11
        high_school_chinese: 12
        high_school_history: 12
        middle_school_history: 12
      calibrate_seqlen: 2048
      shuffle: False
  