TRAIN:
  OPTIMIZER_NAME: adamw
QUANT:
  P:
    EXCLUDE_SUFFIX: ['classifier.dense.weight', 'classifier.out_proj.weight', 'qa_outputs.weight']
    BITS: 8
    GROUP_SIZE: 64
    SCALE_TYPE:
      DEFAULT: group
      DEFAULT_ONLY: True
    QUANT_TYPE:
      DEFAULT: linear
      DEFAULT_ONLY: True
    ROUND_TYPE: sr
  G:
    ENABLE: False
  M:
    EXCLUDE_SUFFIX: ['classifier.dense.weight', 'classifier.out_proj.weight', 'qa_outputs.weight']
    BITS: 4
    GROUP_SIZE: 128
    SCALE_TYPE:
      DEFAULT: group
      DEFAULT_ONLY: True
    QUANT_TYPE:
      DEFAULT: nonlinear
      DEFAULT_ONLY: True
    ROUND_TYPE: real-nearest
  SQM:
    EXCLUDE_SUFFIX: ['classifier.dense.weight', 'classifier.out_proj.weight', 'qa_outputs.weight']
    BITS: 4
    GROUP_SIZE: 128
    SCALE_TYPE:
      DEFAULT: group
      DEFAULT_ONLY: True
    QUANT_TYPE:
      DEFAULT: power-1
      DEFAULT_ONLY: True
    ROUND_TYPE: real-sr
  DEBUG:
    TRUNCATED_RATE_STAT_ITER : False