    @classmethod
    def from_quantized(
            cls,
            model_id_or_path: Optional[str],
            device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
            device: Optional[Union[str, int]] = None,
            backend: Union[str, BACKEND] = BACKEND.AUTO,
            adapter: Optional[Adapter] = None,
            dtype: [str | torch.dtype] = "auto",
            trust_remote_code: bool = False,
            **kwargs,
    ):
        # ...
        # gptqmodel_5.6.0 
            # preload_qlinear_kernel = make_quant(
            #     model,
            #     qcfg=qcfg,
            #     quant_result=modules,
            #     backend=backend,
            #     lm_head_name=cls.lm_head,
            #     device=device,
            # )


        # BPDQ
            preload_qlinear_kernel = make_quant(
                model,
                qcfg=qcfg,
                quant_result=modules,
                backend=backend,
                lm_head_name=cls.lm_head,
                device=device,
                from_quantized=True, # bpdq
            )

        def build_layerwise_device_map(
                model,
                device,
                layers: List[torch.nn.Module],
                ignore_modules: List[torch.nn.Module],
                num_gpus: Optional[int] = None,
        ) -> Dict[str, str]:
        # ...
        # gptqmodel_5.6.0 
            # if qcfg.format == FORMAT.GPTQ:
            #     # validate sym=False v1 loading needs to be protected for models produced with new v2 format codebase
            #     if not qcfg.sym and not qcfg.is_quantized_by_gptaq():
            #         raise ValueError(
            #             f"Format: Loading of a sym=False model with format={FORMAT.GPTQ} is only supported if produced by gptqmodel version >= {MIN_VERSION_WITH_V2}"
            #         )

            #     if preload_qlinear_kernel.REQUIRES_FORMAT_V2:
            #         model = convert_gptq_v1_to_v2_format(
            #             model,
            #             cfg=qcfg,
            #             qlinear_kernel=preload_qlinear_kernel,
            #         )

            #         qcfg.runtime_format = FORMAT.GPTQ_V2

        # BPDQ
            if qcfg.format == FORMAT.GPTQ:
                # validate sym=False v1 loading needs to be protected for models produced with new v2 format codebase
                if not qcfg.sym and not qcfg.is_quantized_by_gptaq():
                    raise ValueError(
                        f"Format: Loading of a sym=False model with format={FORMAT.GPTQ} is only supported if produced by gptqmodel version >= {MIN_VERSION_WITH_V2}"
                    )
                is_bpdq_model = getattr(qcfg, "bpdq_flag", False)
                if preload_qlinear_kernel.REQUIRES_FORMAT_V2 and not is_bpdq_model:
                # if preload_qlinear_kernel.REQUIRES_FORMAT_V2:
                    model = convert_gptq_v1_to_v2_format(
                        model,
                        cfg=qcfg,
                        qlinear_kernel=preload_qlinear_kernel,
                    )
                    qcfg.runtime_format = FORMAT.GPTQ_V2











