quant_config:
  algo: gptq
  kwargs:
    w_dtype: int4    # select from ['int2', 'int3', 'int4', 'int8', 'float16']
    a_dtype: float16
    device: 'cuda'
    offload: 'cpu'
    blocksize: 128 
    percdamp: .01
    actorder: True
    w_groupsize: -1        
    w_qtype: "per_channel"  #select from ['per_channel', 'per_group']                    
    block_sequential: True
    layer_sequential: True
    skip_layers:                   # the linear layer to skip, should match the module name
      - lm_head
  calibrate_config:
    name: 'cmmlu'
    split: val
    calibrate_subject: Social Sciences
    calibrate_nums:
      business_ethics: 9
      college_education: 9
      economics: 10
      education: 10
      high_school_geography: 10
      journalism: 10
      management: 10
      marketing: 10
      professional_accounting: 10
      professional_psychology: 10
      public_relations: 10
      security_study: 10
      sociology: 10
    calibrate_seqlen: 2048
    shuffle: False