from typing import List, Tuple, Union, Literal, Optional

import numpy as np
import pandas as pd
import torch
import os
import json
import math
import stat
from pathlib import Path

from .label_encoder import LabelEncoderWithNaN
from .scalers import SCALERS, DEFAULT_SCALERS
from ..utils import drop_target


_TRANSFORM_FMT_VERSION = 1
_KNOWN_SCALER_KEYS = ["mean_", "scale_", "var_", "min_", "max_"]

def _is_regular_file(path: str | os.PathLike) -> None:
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"File not found: {p}")
    if p.is_symlink():
        raise ValueError(f"Refusing to use symlink: {p}")
    if not p.is_file():
        raise ValueError(f"Not a regular file: {p}")

def _restrict_owner_readwrite(path: str | os.PathLike) -> None:
    try:
        os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
    except Exception:
        pass

def _json_safe_val(x):
    # NaN/NA → None, 나머지는 그대로
    try:
        if x is None:
            return None
        if isinstance(x, float) and math.isnan(x):
            return None
        import pandas as _pd
        if _pd.isna(x):
            return None
    except Exception:
        pass
    return x

def _jsonify_array(arr) -> list:
    import numpy as _np
    if isinstance(arr, (list, tuple)):
        return [_json_safe_val(v) for v in arr]
    if hasattr(arr, "tolist"):
        return [_json_safe_val(v) for v in _np.array(arr, dtype=object).tolist()]
    return []


def init_transform(
        df: pd.DataFrame,
        dataset_cfg,
        onehot: bool,
        scaler: Literal['uniform', 'normal', 'standard', 'minmax', 'none'],
        use_target: bool
):
    """
    Initialize a TabularTransform object based on the given configuration.
    If `use_target` is False, the target column(s) is/are dropped from `curr_cfg`.

    Args:
        df (pd.DataFrame): The DataFrame used to infer columns and missing values.
        dataset_cfg: A configuration object with 'categorical_columns', 'numerical_columns',
                  and 'missing_values' attributes.
        onehot (bool): Whether to one-hot encode categorical features.
        scaler (str): The name of the scaler to apply to numerical features.
        use_target (bool): If False, remove the target column(s) from the configuration.

    Returns:
        TabularTransform: A configured transform that can be used for fitting or data processing.
    """
    if not use_target:
        dataset_cfg = drop_target(dataset_cfg)
    return TabularTransform(
        df=df,
        categorical_columns=dataset_cfg.categorical_columns,
        numerical_columns=dataset_cfg.numerical_columns,
        missing_values=dataset_cfg.missing_values,
        scaler=scaler,
        onehot=onehot
    )


class TabularTransformSet:

    def __init__(self, cfg, onehot: bool, scaler: str):
        """
        Initialize the TabularTransformSet with a given configuration object.

        Args:
            cfg: A configuration object with fields:
                - cfg.dataset.name (str): Name of the dataset.
                - cfg.dataset.train_path (str): Path to the CSV for training data.
            onehot (bool): Whether to apply one-hot encoding to categorical columns.
            scaler (str): The name of the scaler to use for numerical columns.
        """
        df = pd.read_csv(cfg.dataset.train_path)

        self._dataset_name = cfg.dataset.name
        self.onehot = onehot
        self.scaler = scaler

        self.target_column = cfg.dataset.target_column
        self.target = init_transform(df, cfg.dataset, onehot, scaler, True)
        self.no_target = init_transform(df, cfg.dataset, onehot, scaler, False)

    def fit(self, df: pd.DataFrame):
        self.target.fit(df)
        self.no_target.fit(df)

    @property
    def name(self) -> str:
        """
        Returns the dataset name from the configuration.
u
        Returns:
            str: The name of the dataset.
        """
        return self._dataset_name

    def save_secure(self, dir_path: str | os.PathLike) -> str:
        """
        변환 셋(타깃 포함/제외 두 개)을 디렉터리에 안전 저장.
        생성물:
          - <dir>/meta.json
          - <dir>/target.json
          - <dir>/no_target.json
        """
        d = Path(dir_path)
        d.mkdir(parents=True, exist_ok=True)

        meta = {
            "version": _TRANSFORM_FMT_VERSION,
            "dataset_name": self._dataset_name,
            "onehot": self.onehot,
            "scaler": self.scaler,
            "target_column": self.target_column,
        }
        with open(d / "meta.json", "w", encoding="utf-8") as f:
            json.dump(meta, f, ensure_ascii=False)
        _restrict_owner_readwrite(d / "meta.json")

        self.target.save_secure(d / "target.json")
        self.no_target.save_secure(d / "no_target.json")
        return str(d)

    @classmethod
    def load_secure(cls, dir_path: str | os.PathLike) -> "TabularTransformSet":
        """
        디렉터리에서 안전 로드. __init__을 거치지 않고 상태를 복원합니다.
        """
        d = Path(dir_path)
        _is_regular_file(d / "meta.json")
        _is_regular_file(d / "target.json")
        _is_regular_file(d / "no_target.json")

        with open(d / "meta.json", "r", encoding="utf-8") as f:
            meta = json.load(f)

        if int(meta.get("version", -1)) != _TRANSFORM_FMT_VERSION:
            raise ValueError("Unsupported transform set version.")

        self = cls.__new__(cls)  # __init__ 우회
        self._dataset_name = meta["dataset_name"]
        self.onehot = meta["onehot"]
        self.scaler = meta["scaler"]
        self.target_column = meta["target_column"]

        # 개별 변환기 복원
        self.target = TabularTransform.load_secure(d / "target.json")
        self.no_target = TabularTransform.load_secure(d / "no_target.json")
        return self

    def change_column_names(self, mapping: dict):
        self.target.change_column_names(mapping)
        self.no_target.change_column_names(mapping)

class TabularTransform:
    """
    A transformer class for tabular data that handles:
      - Categorical encoding (via LabelEncoderWithNaN, optionally one-hot).
      - Numerical scaling using a specified method (standard, minmax, etc.).
      - Handling missing values by converting them to pandas.NA before encoding/scaling.
      - Converting between DataFrame and numeric arrays (including optional Torch tensors).

    Attributes:
        categorical_columns (List[str]): List of categorical column names to encode.
        numerical_columns (List[str]): List of numerical column names to scale.
        missing_values (List[str]): Values in the DataFrame to treat as missing/NA.
        onehot (bool): Whether to apply one-hot encoding for categorical features.
        label_encoders (List[LabelEncoderWithNaN]): Encoders for each categorical column.
        n_categories_per_columns (List[int]): Number of distinct categories in each categorical column.
        scaler: A fitted scaler instance for numerical columns (the default chosen by the user).
        _scaler (dict): A dictionary of all possible scaler instances, keyed by their names.
        dtypes (List[str]): Recorded data types of numerical columns (used for inverse_transform).
    """

    def __init__(
            self,
            df: pd.DataFrame,
            categorical_columns: Union[List[str], Tuple[str]],
            numerical_columns: Union[List[str], Tuple[str]],
            missing_values: Union[List[str], Tuple[str]] = (),
            scaler: Literal['uniform', 'normal', 'standard', 'minmax', 'none'] = 'standard',
            onehot=True
    ):
        """
        Initialize the TabularTransform.

        Args:
            df (pd.DataFrame): A reference DataFrame used to fit the encoders and scalers.
            categorical_columns (List[str] or Tuple[str]):
                Names of categorical columns. If None, an empty list is used.
            numerical_columns (List[str] or Tuple[str]):
                Names of numerical columns. If None, an empty list is used.
                They may include a reversed label (split by '!!') for dtype inference.
            missing_values (List[str] or Tuple[str]):
                A list of values to treat as missing (converted to pd.NA).
            scaler (str):
                The name of the scaler to use. Must be one of ['uniform', 'normal', 'standard', 'minmax', 'none'].
            onehot (bool):
                If True, one-hot encode categorical features after label encoding.
        """
        # Handle potential None inputs
        numerical_columns = numerical_columns or []
        categorical_columns = categorical_columns or []

        self.categorical_columns = categorical_columns

        # Extract the raw numerical column names if they contain '!!' descriptors
        self.numerical_columns = [
            col[::-1].split('!!')[-1][::-1] for col in numerical_columns
        ]
        # Retain only columns relevant to transformations
        self.columns = [
            col for col in df.columns
            if col in self.categorical_columns + self.numerical_columns
        ]
        self.missing_values = missing_values
        self.onehot = onehot

        # Replace missing value placeholders in the reference df
        for mv in self.missing_values:
            df = df.replace(mv, pd.NA)

        # Label encoders for categorical columns
        self.label_encoders = [LabelEncoderWithNaN().fit(df[col]) for col in self.categorical_columns]
        self.n_categories_per_columns = [len(enc.classes_) for enc in self.label_encoders]
        self.n_bits_per_columns = []
        for n_cls in self.n_categories_per_columns:
            n_bits = int(np.ceil(np.log2(max(n_cls, 1))))
            self.n_bits_per_columns.append(n_bits)

        # Setup scalers for numerical columns (both the primary scaler and a dict of all others)
        if self.numerical_columns:
            self.scaler = SCALERS[scaler](df)
            self._scaler = {key: item(df) for key, item in DEFAULT_SCALERS.items()}
            self.scaler_key = scaler  # ★ 추가
        else:
            self.scaler = None
            self._scaler = {key: None for key in DEFAULT_SCALERS}
            self.scaler_key = None  # ★ 추가

        # Dtypes for numerical columns (needed in inverse_transform)
        self.dtypes = []
        for col in numerical_columns:
            parts = col[::-1].split('!!')
            if len(parts) == 1:
                # No dtype in the name
                self.dtypes.append(df[col].dtype)
            else:
                # The part before '!!' indicates intended dtype (e.g., 'int', 'float')
                self.dtypes.append(parts[0][::-1])
        self._is_fit = False

    def fit(self, df):
        if not self._is_fit:
            self.scaler.fit(df[self.numerical_columns])
            for _, scaler in self._scaler.items():
                scaler.fit(df[self.numerical_columns])
            self._is_fit = True
        return self

    @property
    def categorical_dim(self) -> int:
        """
        Returns the total dimensionality of the categorical features after encoding.
        If one-hot is enabled, it is the sum of all categories across columns.
        Otherwise, it is simply the number of categorical columns.

        Returns:
            int: Dimension of categorical features in the transformed data.
        """
        if self.onehot == 'anabit':
            return sum(self.n_bits_per_columns)
        elif self.onehot:
            return sum(self.n_categories_per_columns)
        return len(self.categorical_columns)

    @property
    def n_categorical_dim_per_columns(self) -> list:
        if self.onehot == 'anabit':
            return self.n_bits_per_columns
        elif self.onehot:
            return self.n_categories_per_columns
        return [1] * self.categorical_dim

    @property
    def numerical_dim(self) -> int:
        """
        Returns the number of numerical columns.

        Returns:
            int: Dimension of numerical features in the transformed data.
        """
        return len(self.numerical_columns)

    def transform(
            self,
            df: pd.DataFrame,
            scaler: Optional[str] = None,
            onehot: Optional[bool] = None,
            return_as_tensor: bool = False
    ) -> Union[Tuple[np.ndarray, np.ndarray], torch.Tensor]:
        """
        Transform the input DataFrame into numeric arrays suitable for modeling.
        This includes:
          - Encoding categorical columns (label or one-hot).
          - Scaling numerical columns.
          - Optionally returning the result as a Torch tensor.

        Args:
            df (pd.DataFrame): The DataFrame to transform.
            scaler (str, optional): Name of a specific scaler to use from self._scaler.
                                    If None, uses the default self.scaler.
            onehot (bool, optional): Whether to one-hot encode categorical features.
                                     If None, uses self.onehot.
            return_as_tensor (bool): If True, return a torch.Tensor instead of NumPy arrays.

        Returns:
            Union[torch.Tensor, Tuple[np.ndarray, np.ndarray]]:
                - If return_as_tensor=True, returns a single torch.Tensor with shape (n_rows, numerical_dim + categorical_dim).
                - Otherwise, returns a tuple (num_array, cat_array).
        """
        onehot = self.onehot if onehot is None else onehot

        # Replace missing value placeholders
        for mv in self.missing_values:
            df = df.replace(mv, pd.NA)

        # Transform numerical and categorical
        num = self._transform_numerical(df, scaler=scaler)
        cat = self._transform_categorical(df)

        # Apply one-hot encoding if requested
        if onehot == 'anabit':
            cat = self._anabit_encoding(cat)
        elif onehot:
            cat = self._onehot_encoding(cat)

        # Optionally return as torch tensor
        if return_as_tensor:
            return torch.Tensor(np.concatenate([num, cat], axis=1))

        return num, cat

    def inverse_transform(
            self,
            num_or_cat: Union[np.ndarray, torch.Tensor],
            cat: Optional[Union[np.ndarray, torch.Tensor]] = None,
            *,
            scaler: Optional[str] = None,
            onehot: Optional[bool] = None
    ) -> pd.DataFrame:
        """
        Inverse transform numeric/categorical arrays back into a DataFrame with original columns.
        If only one array is provided (`num_or_cat`) and `cat` is None, the method splits
        the array into numeric and categorical segments based on known dimensions.

        Args:
            num_or_cat (np.ndarray): Either numeric data or a combined numeric+categorical array.
            cat (np.ndarray, optional): Categorical data array. If not provided, `num_or_cat` is split.
            scaler (str, optional): Which scaler to use for inverse transform. Defaults to the main one.
            onehot (bool, optional): Whether the original categorical data was one-hot encoded. Defaults to self.onehot.

        Returns:
            pd.DataFrame: The reconstructed DataFrame containing the original numeric and categorical columns.
        """
        onehot = self.onehot if onehot is None else onehot
        if isinstance(num_or_cat, torch.Tensor):
            num_or_cat = num_or_cat.detach().cpu().numpy()
        if isinstance(cat, torch.Tensor):
            cat = cat.detach().cpu().numpy()

        # Split if cat is None
        if cat is None:
            num = num_or_cat[:, :self.numerical_dim]
            cat_array = num_or_cat[:, self.numerical_dim:]
        else:
            num = num_or_cat
            cat_array = cat

        # Convert numeric data back to a DataFrame
        num_df = pd.DataFrame(
            self._inverse_transform_numerical(num, scaler=scaler),
            columns=self.numerical_columns
        )

        # If one-hot encoded, convert categorical array back to integer-coded categories
        if onehot == 'anabit':
            cat_array = self._inverse_transform_categorical_anabit(cat_array)
        elif onehot:
            cat_array = self._inverse_transform_categorical_onehot(cat_array)

        # Convert integer-coded categories back to original labels
        cat_df = pd.DataFrame(
            self._inverse_transform_categorical(cat_array),
            columns=self.categorical_columns
        )

        # Combine numeric and categorical columns in the original order
        df = pd.concat([num_df, cat_df], axis=1)
        df = df[self.columns]  # Keep only relevant columns in the correct order
        return df

    def _transform_categorical(self, df: pd.DataFrame) -> np.ndarray:
        """
        Label-encode the categorical columns.

        Args:
            df (pd.DataFrame): Input DataFrame.

        Returns:
            np.ndarray: Encoded categorical array of shape (n_rows, n_categorical_columns).
                        If there are no categorical columns, returns an empty array.
        """
        if not self.categorical_columns:
            return np.zeros((len(df), 0), dtype=np.int64)

        encoded = []
        for col, encoder in zip(self.categorical_columns, self.label_encoders):
            if col not in df:
                df[col] = pd.NA
            encoded.append(encoder.transform(df[col]))
        return np.stack(encoded, axis=1)

    def _onehot_encoding(self, cat: np.ndarray) -> np.ndarray:
        """
        Convert label-encoded categorical columns into one-hot vectors.

        Args:
            cat (np.ndarray): Integer-encoded categorical array of shape (n_rows, n_categorical_columns).

        Returns:
            np.ndarray: One-hot-encoded array of shape (n_rows, sum(n_categories_per_columns)).
        """
        if cat.shape[1] == 0:
            return cat  # No categorical columns

        onehot_arrays = []
        for col_data, n_cls in zip(cat.T, self.n_categories_per_columns):
            # We create an (n_cls+1) x n_cls matrix with an extra row for missing (NaN)
            mapping = np.concatenate([np.eye(n_cls), np.full((1, n_cls), np.nan)])
            # Convert NaN to -1, so we map it to the last row
            onehot_arrays.append(mapping[np.nan_to_num(col_data, nan=-1).astype(int)])
        onehot_arrays = np.concatenate(onehot_arrays, axis=1)
        return onehot_arrays

    def _anabit_encoding(self, cat: np.ndarray) -> np.ndarray:
        """
        Convert label-encoded categorical columns into fixed-length binary
        (“analog bit”) vectors, using the minimal number of bits required
        to represent the given number of categories in each column.

        Missing values (NaN) are propagated as NaN in every bit position.

        Args
        ----
        cat : np.ndarray
            Integer-encoded array of shape (n_rows, n_categorical_columns).

        Returns
        -------
        np.ndarray
            Binary-encoded array of shape (n_rows, sum(n_bits_per_columns)).
        """
        if cat.shape[1] == 0:
            return cat

        bin_arrays = []

        for col_data, n_cls, n_bits in zip(cat.T, self.n_categories_per_columns, self.n_bits_per_columns):
            col_int = np.nan_to_num(col_data, nan=-1).astype(int)  # NaN → −1

            powers = 2 ** np.arange(n_bits - 1, -1, -1, dtype=np.int64)
            bits   = ((col_int[:, None] & powers) > 0).astype(np.float32)

            nan_rows = col_int < 0
            if nan_rows.any():
                bits[nan_rows, :] = np.nan

            bin_arrays.append(bits)

        return np.concatenate(bin_arrays, axis=1)

    def _transform_numerical(self, df: pd.DataFrame, scaler: Optional[str]) -> np.ndarray:
        """
        Scale the numerical columns, optionally using a specific scaler from self._scaler.

        Args:
            df (pd.DataFrame): Input DataFrame containing numeric columns.
            scaler (str, optional): Key to a specific scaler in self._scaler. If None, use self.scaler.

        Returns:
            np.ndarray: Scaled numeric data of shape (n_rows, n_numerical_columns).
        """
        assert self._is_fit or scaler == 'none'
        if not self.numerical_columns:
            return np.zeros((len(df), 0), dtype=np.float64)

        # Mark missing entries as NaN
        current_columns = df.columns
        missing_columns = [e for e in self.numerical_columns if e not in current_columns]
        df[missing_columns] = pd.NA
        nan_mask = df[self.numerical_columns].isna().to_numpy()
        df[self.numerical_columns] = df[self.numerical_columns].fillna(0)

        active_scaler = self._scaler[scaler] if scaler else self.scaler
        res = active_scaler.transform(df[self.numerical_columns])
        if isinstance(res, pd.DataFrame):
            res = res.to_numpy()

        res = res.astype('float')
        res[nan_mask] = np.nan
        return res

    def _inverse_transform_categorical(self, arr: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
        """
        Convert integer-coded categorical data back to original strings or categories.

        Args:
            arr (Union[np.ndarray, torch.Tensor]): Encoded data of shape (n_rows, n_categorical_columns).

        Returns:
            np.ndarray: Decoded data of shape (n_rows, n_categorical_columns).
        """
        if not self.categorical_columns:
            return np.zeros((len(arr), 0), dtype=np.int64)

        if isinstance(arr, torch.Tensor):
            arr = arr.detach().cpu().numpy()

        decoded = []
        for col_array, encoder in zip(arr.T, self.label_encoders):
            decoded.append(encoder.inverse_transform(col_array))
        return np.stack(decoded, axis=1)

    def _inverse_transform_categorical_onehot(self, cat: np.ndarray) -> np.ndarray:
        """
        Convert one-hot-encoded data back to integer-coded categories.

        Args:
            cat (np.ndarray): One-hot array of shape (n_rows, sum(n_categories_per_columns)).

        Returns:
            np.ndarray: Integer-coded array of shape (n_rows, n_categorical_columns).
        """
        if self.categorical_dim == 0:
            return cat  # No categorical columns

        # Split the one-hot array into columns based on n_categories_per_columns
        splits = np.split(cat, np.cumsum(self.n_categories_per_columns), axis=1)
        # Filter out any empty splits that might appear
        splits = [s for s in splits if s.shape[1] > 0]

        # Argmax each split along axis=1 to get category indices
        cat_indices = [np.argmax(s, axis=1) for s in splits]
        return np.stack(cat_indices, axis=1)

    def _inverse_transform_categorical_anabit(self, cat: np.ndarray) -> np.ndarray:
        """
        Convert binary-encoded data back to integer-coded categories.

        Args
        ----
        cat : np.ndarray
            Binary array of shape (n_rows, sum(n_bits_per_columns)).

        Returns
        -------
        np.ndarray
            Integer-coded array of shape (n_rows, n_categorical_columns),
            with NaN for originally missing entries.
        """
        if self.categorical_dim == 0:
            return cat

        # 컬럼별 비트 길이로 분할
        splits = np.split(cat,
                          np.cumsum(self.n_bits_per_columns),
                          axis=1)
        splits = [s for s in splits if s.shape[1] > 0]

        cat_indices = []
        for bits, n_bits in zip(splits, self.n_bits_per_columns):
            powers = 2 ** np.arange(n_bits - 1, -1, -1, dtype=np.int64)

            nan_rows = np.isnan(bits).any(axis=1)
            idx      = (bits >= 0.5).astype(int) @ powers
            idx      = idx.astype(float)
            if nan_rows.any():
                idx[nan_rows] = np.nan

            cat_indices.append(idx)

        return np.stack(cat_indices, axis=1)

    def _inverse_transform_numerical(
            self,
            arr: Union[np.ndarray, torch.Tensor],
            scaler: Optional[str]
    ) -> pd.DataFrame:
        """
        Convert scaled numeric data back to original scale and dtypes.

        Args:
            arr (Union[np.ndarray, torch.Tensor]): Scaled data of shape (n_rows, n_numerical_columns).
            scaler (str, optional): If not None, use a specific scaler from self._scaler.

        Returns:
            pd.DataFrame: A DataFrame containing the inverse-scaled numeric columns.
        """
        if not self.numerical_columns:
            return pd.DataFrame(
                np.zeros((len(arr), 0), dtype=np.float64),
                columns=[]
            )

        if isinstance(arr, torch.Tensor):
            arr = arr.detach().cpu().numpy()

        active_scaler = self._scaler[scaler] if scaler else self.scaler
        # Convert array back to a DataFrame so we can manipulate columns and dtypes
        res = active_scaler.inverse_transform(arr)
        if isinstance(res, np.ndarray):
            res = pd.DataFrame(res, columns=self.numerical_columns)

        # Convert to the intended dtype (e.g., rounding for int)
        for col, dtype in zip(self.numerical_columns, self.dtypes):
            if dtype == 'int':
                # If intended type is integer, round values and cast to integer
                res[col] = np.round(res[col]).astype('Int64')
            elif dtype == 'float':
                res[col] = res[col].astype('Float32')
            else:
                # If a pandas/numpy dtype object
                res[col] = res[col].astype(dtype)

        return res

    def _export_scaler_state(self, scaler_obj) -> dict:
        """스케일러 상태를 안전하게 추출(JSON 직렬화 가능 항목만)."""
        if scaler_obj is None:
            return {"_present": False}
        state = {"_present": True, "_class_key": None, "params": {}}
        # 알려진 파라미터 키 우선
        for k in _KNOWN_SCALER_KEYS:
            if hasattr(scaler_obj, k):
                state["params"][k] = _jsonify_array(getattr(scaler_obj, k))
        # 부가적으로 feature/column 정보가 있으면 담되 과하게 크지 않게 제한
        if hasattr(scaler_obj, "feature_names_in_"):
            names = list(getattr(scaler_obj, "feature_names_in_"))
            if len(names) <= 10000:
                state["params"]["feature_names_in_"] = [str(x) for x in names]
        return state

    def _apply_scaler_state(self, scaler_obj, state: dict, n_num_cols: int) -> None:
        """저장된 상태를 스케일러 객체에 주입(길이 검증 포함)."""
        if scaler_obj is None or not state.get("_present", False):
            return
        params = state.get("params", {})
        for k, v in params.items():
            if k in _KNOWN_SCALER_KEYS:
                if len(v) != 0 and len(v) != n_num_cols:
                    raise ValueError(f"Scaler param length mismatch: {k} has {len(v)} != {n_num_cols}")
            setattr(scaler_obj, k, np.array(v) if v is not None else None)

    def to_secure_payload(self) -> dict:
        """pickle 없이 직렬화 가능한 안전 페이로드 생성."""
        # 카테고리 클래스(문자/숫자) → JSON 리스트로 저장 (NaN은 None으로)
        classes_list = []
        for enc in self.label_encoders:
            cls = getattr(enc, "classes_", None)
            if cls is None:
                cls = []
            cls = _jsonify_array(cls)
            # 과도한 크기 방지
            if len(cls) > 1_000_000:
                raise ValueError("Too many classes in a categorical column.")
            classes_list.append(cls)

        payload = {
            "version": _TRANSFORM_FMT_VERSION,
            "categorical_columns": list(self.categorical_columns),
            "numerical_columns": list(self.numerical_columns),
            "columns": list(self.columns),
            "missing_values": _jsonify_array(self.missing_values),
            "onehot": self.onehot,
            "dtypes": [str(dt) for dt in self.dtypes],
            "n_categories_per_columns": list(self.n_categories_per_columns),
            "n_bits_per_columns": list(self.n_bits_per_columns),
            "label_classes": classes_list,
            "scaler_name": getattr(self, "scaler", None) and getattr(self, "scaler").__class__.__name__,
            "scaler_key": getattr(self, "scaler_key", None),
            "scaler_key_active": getattr(self, "scaler", None) and self.scaler is not None,
            "scaler_state": self._export_scaler_state(self.scaler),
            "_scaler_states": {},
            "_is_fit": bool(getattr(self, "_is_fit", False)),
        }

        # 모든 후보 스케일러 상태(_scaler dict)도 저장
        states = {}
        for key, obj in getattr(self, "_scaler", {}).items():
            states[key] = self._export_scaler_state(obj)
        payload["_scaler_states"] = states
        return payload

    @classmethod
    def from_secure_payload(cls, payload: dict) -> "TabularTransform":
        """JSON 페이로드로부터 안전 복원 (pickle 미사용)."""
        if int(payload.get("version", -1)) != _TRANSFORM_FMT_VERSION:
            raise ValueError("Unsupported transform payload version.")

        # 새 인스턴스( __init__ 우회 ) 생성
        self = cls.__new__(cls)  # type: ignore

        # 기본 필드 채우기
        self.categorical_columns = list(payload["categorical_columns"])
        self.numerical_columns   = list(payload["numerical_columns"])
        self.columns             = list(payload["columns"])
        self.missing_values      = list(payload.get("missing_values", []))
        self.onehot              = payload["onehot"]
        self.dtypes              = list(payload["dtypes"])
        self.n_bits_per_columns  = list(payload["n_bits_per_columns"])

        # 라벨 인코더 복원 (클래스 집합으로 최소 시뮬레이션 fit)
        self.label_encoders = []
        self.n_categories_per_columns = []
        for col, classes in zip(self.categorical_columns, payload["label_classes"]):
            # None → NA 로 바꿔서 한 번씩 포함
            series_vals = [pd.NA if c is None else c for c in classes]
            enc = LabelEncoderWithNaN().fit(pd.Series(series_vals, dtype="object"))
            self.label_encoders.append(enc)
            self.n_categories_per_columns.append(len(enc.classes_))

        # 스케일러 복원
        # 빈 DF(0행)로 생성해서 상태만 주입 (길이 검증 포함)
        num_df = pd.DataFrame(columns=self.numerical_columns)
        scaler_key = payload.get("scaler_key")
        if scaler_key in SCALERS and self.numerical_columns:
            self.scaler = SCALERS[scaler_key](num_df)
            self.scaler_key = scaler_key  # ★ 추가
        else:
            self.scaler = None
            self.scaler_key = None  # ★ 추가

        self._scaler = {}
        for key, cls_fn in DEFAULT_SCALERS.items():
            self._scaler[key] = cls_fn(num_df) if self.numerical_columns else None

        # 상태 주입(검증 포함)
        self._apply_scaler_state(self.scaler, payload.get("scaler_state", {}), n_num_cols=len(self.numerical_columns))
        for key, state in payload.get("_scaler_states", {}).items():
            self._apply_scaler_state(self._scaler.get(key), state, n_num_cols=len(self.numerical_columns))

        self._is_fit = bool(payload.get("_is_fit", True))
        return self

    def save_secure(self, path: str | os.PathLike) -> str:
        """이 변환기(하나)를 JSON으로 안전 저장 (pickle 미사용)."""
        path = Path(path)
        if path.suffix == "":
            path = path.with_suffix(".json")
        path.parent.mkdir(parents=True, exist_ok=True)
        payload = self.to_secure_payload()
        with open(path, "w", encoding="utf-8") as f:
            json.dump(payload, f, ensure_ascii=False)
        _restrict_owner_readwrite(path)
        return str(path)

    @classmethod
    def load_secure(cls, path: str | os.PathLike) -> "TabularTransform":
        """JSON에서 안전 로드."""
        _is_regular_file(path)
        with open(path, "r", encoding="utf-8") as f:
            payload = json.load(f)

        # 기본 구조/길이 검증
        cat_cols = payload.get("categorical_columns", [])
        num_cols = payload.get("numerical_columns", [])
        ncat = len(cat_cols)
        if not isinstance(cat_cols, list) or not isinstance(num_cols, list):
            raise ValueError("Invalid columns in payload.")
        if "label_classes" in payload and len(payload["label_classes"]) != ncat:
            raise ValueError("label_classes length mismatch.")
        if "n_categories_per_columns" in payload and len(payload["n_categories_per_columns"]) != ncat:
            raise ValueError("n_categories_per_columns length mismatch.")

        return cls.from_secure_payload(payload)

    def change_column_names(self, mapping: dict):
        self.columns = [mapping[e] for e in self.columns]
        self.numerical_columns = [mapping[e] for e in self.numerical_columns]
        self.categorical_columns = [mapping[e] for e in self.categorical_columns]
        self.scaler.feature_names_in_ = np.array([mapping[e] for e in self.scaler.feature_names_in_])
        for key in self._scaler:
            self._scaler[key].feature_names_in_ = np.array([mapping[e] for e in self._scaler[key].feature_names_in_])
