    def save_quantized(
            self,
            save_dir: str,
            safetensors_metadata: Optional[Dict[str, str]] = None,
            max_shard_size: Optional[Union[int, str]] = DEFAULT_MAX_SHARD_SIZE,
            meta_quantizer: Optional[str] = None,
            eora_path: Optional[str] = None,
    ):
        # ...
        # gptqmodel_5.6.0 
        # self.quantize_config.meta_set(
        #     key=META_FIELD_ACT_GROUP_AWARE,
        #     value=self.quantize_config.act_group_aware
        # )

        # # The config, quantize_config and model may be edited in place in save_quantized.
        # config = copy.deepcopy(self.model.config)
        # quantize_config = copy.deepcopy(self.quantize_config)

        # if not self.quantized:
        #     raise ValueError("Save aborted as model is not quantized. Please call `quantize()` first.")


        # BPDQ
        self.quantize_config.meta_set(
            key=META_FIELD_ACT_GROUP_AWARE,
            value=self.quantize_config.act_group_aware
        )
        bpdq_flag = getattr(self.quantize_config, "bpdq_flag", False)
        bpdq_k_bits = getattr(self.quantize_config, "msb_num", 0)
        n_iters = getattr(self.quantize_config, "n_iters", 0)
        alpha = getattr(self.quantize_config, "alpha", 0)
        self.quantize_config.meta_set("bpdq_flag", bpdq_flag)
        self.quantize_config.meta_set("bpdq_k_bits", bpdq_k_bits)
        self.quantize_config.meta_set("n_iters", n_iters)
        self.quantize_config.meta_set("alpha", alpha)
        # The config, quantize_config and model may be edited in place in save_quantized.
        quantize_config = copy.deepcopy(self.quantize_config)
        if not self.quantized:
            raise ValueError("Save aborted as model is not quantized. Please call `quantize()` first.")
        


        # ...
        # gptqmodel_5.6.0 
        # if quantize_config.format == FORMAT.GPTQ_V2:
        #     log.warn(
        #         f"Using 'format = {FORMAT.GPTQ_V2}': the serialized model is only supported by GPTQModel version >= {MIN_VERSION_WITH_V2}."
        #     )

        # if self.load_quantized_model:
        #     self.model = self.get_model_with_quantize(
        #         qcfg=quantize_config,
        #         model_id_or_path=self.model_local_path,
        #     )

        # # --- start config save block ---
        # # Save quantized config
        # config.quantization_config = quantize_config.to_dict()
        # self.model.config = config


        # BPDQ
        if quantize_config.format == FORMAT.GPTQ_V2:
            log.warn(
                f"Using 'format = {FORMAT.GPTQ_V2}': the serialized model is only supported by GPTQModel version >= {MIN_VERSION_WITH_V2}."
            )

        if not self.load_quantized_model:
            model = self.model

            bpdq_flag = getattr(quantize_config, "bpdq_flag", False)

            # internal is always gptq v2 but allow users to pass gptq (v1) via config
            if (
                quantize_config.format == FORMAT.GPTQ
                and getattr(quantize_config, "quant_method", None) == METHOD.GPTQ
                and getattr(self.qlinear_kernel, "REQUIRES_FORMAT_V2", False)
                and not bpdq_flag
            ):
                # Model qzeros may be edited in place for export compatibility.
                model = convert_gptq_v2_to_v1_format(
                    model, quantize_config=quantize_config, qlinear_kernel=self.qlinear_kernel
                )
        else:
            model = self.get_model_with_quantize(
                qcfg=quantize_config,
                model_id_or_path=self.model_local_path,
            )

        self.model = model 
        config = copy.deepcopy(model.config)
        # if self.load_quantized_model:
        #     self.model = self.get_model_with_quantize(
        #         qcfg=quantize_config,
        #         model_id_or_path=self.model_local_path,
        #     )


        # --- start config save block ---
        # Save quantized config
        quant_config_dict = quantize_config.to_dict()

        bpdq_flag = getattr(quantize_config, "bpdq_flag", False)
        if bpdq_flag:
            quant_config_dict["bpdq_flag"] = True
            quant_config_dict["bpdq_k_bits"] = getattr(quantize_config, "msb_num", 0)
            quant_config_dict["n_iters"] = getattr(quantize_config, "n_iters", 0)
            quant_config_dict["alpha"] = getattr(quantize_config, "alpha", 0)
        config.quantization_config = quant_config_dict
        model.config = config  




        # ...
        # gptqmodel_5.6.0 
        # model_base_name = "model"
        # model_save_name = model_base_name + ".safetensors"


        # BPDQ
        if getattr(self.quantize_config, "bpdq_flag", False):
            drop_suffixes = (".scales", ".qzeros", ".g_idx", ".qweight")
            removed_keys = []
            for k in list(state_dict.keys()):
                if k.endswith(drop_suffixes):
                    removed_keys.append(k)
                    del state_dict[k]
            if removed_keys:
                log.info(
                    f"BPDQ: pruned {len(removed_keys)} legacy GPTQ tensors "
                    f"(qweight/scales/qzeros/g_idx) from state_dict before saving."
                )

        model_base_name = "model"
        model_save_name = model_base_name + ".safetensors"







