


QUANT_CONFIG_ARG_SYNONYMS = {
    "w_bit": BITS_FIELD_CODE,
    # QQQ compat
    "wbits": BITS_FIELD_CODE,
    "q_group_size": GROUP_SIZE_FIELD_CODE,
    # AWQ compat
    "version" : FORMAT_FIELD_CODE,
    "v2": "gptaq",
    "v2_alpha": "gptaq_alpha",
    "v2_memory_device": "gptaq_memory_device",
    # map format field (checkpoint_format) to class/code (format)
    FORMAT_FIELD_CHECKPOINT: FORMAT_FIELD_CODE,

    # NOTE BPDQ
    "bpdq_k_bits": "msb_num",
    "n_iters": "n_iters",
    "alpha": "alpha",
    # NOTE BPDQ
}



class QuantizeConfig():
    bits: int = field(default=4, metadata={"choices": [2, 3, 4, 8, 10, 12, 14, 16]})

    # allow dynamic bitsize per layer, if None or some layer not set, use bits
    dynamic: Optional[Dict[str, Dict[str, Union[int, bool]]]] = field(default=None)

    # 128 offer good balance between inference speed, vram usage (bpw), and quality
    # use 32 for highest quality with slower inference and higher vram usage
    group_size: int = field(default=128)

    # NOTE BPDQ
    # export only float Q weights without replacing/packing; keep HF Linear/Conv structure
    bpdq_flag: bool = field(default=False)
    msb_num: int = field(default=4)
    n_iters: int = field(default=5)
    alpha: float = field(default=1e-4)
    # NOTE BPDQ



    def to_dict(self):
        out = {
            "bits": self.bits,
            "dynamic": self.dynamic,
            "group_size": self.group_size,
            "desc_act": self.desc_act,
            "sym": self.sym,
            "lm_head": self.lm_head,
            QUANT_METHOD_FIELD:self.quant_method,
            FORMAT_FIELD_CHECKPOINT: self.format,
            # torch.dtype convert to string
            PACK_DTYPE_FIELD: str(self.pack_dtype).split(".")[-1],
            META_FIELD: self.meta,
            # DO NOT EXPORT Adapter to config/json since adapter can be swapped out/in
            # ADAPTER_FIELD: self.adapter.to_dict() if self.adapter else None,
    # NOTE BPDQ
            "bpdq_flag": self.bpdq_flag, 
            "bpdq_k_bits": self.msb_num,
            "n_iters": self.n_iters,
            "alpha": self.alpha,
    # NOTE BPDQ
        }
