


import pandas as pd
import numpy as np
import time
import os
from pathlib import Path
from Model.DVM_AD import DVM_AD
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (
    StandardScaler, QuantileTransformer, RobustScaler,
    MinMaxScaler, MaxAbsScaler, Normalizer
)




dataset_names = [
    , '2_annthyroid', '3_backdoor', '4_breastw', '5_campaign',
    , '7_Cardiotocography', '8_celeba', '9_census', '10_cover',
    , '12_fault', '13_fraud', '14_glass', '15_Hepatitis', '16_http',
    , '18_Ionosphere', '19_landsat', '20_letter',
    , '22_magic.gamma', '23_mammography', '24_mnist',
    , '26_optdigits', '27_PageBlocks', '28_pendigits', '29_Pima',
    , '31_satimage-2', '32_shuttle','33_skin', '34_smtp',
    , '36_speech', '37_Stamps', '38_thyroid', '39_vertebral',
    , '41_Waveform', '42_WBC', '43_WDBC', '44_Wilt', '45_wine',
    , '47_yeast'
]

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
ROOT_DIR = BASE_DIR
while ROOT_DIR != ROOT_DIR.parent and not (ROOT_DIR / "process_tabular").is_dir():
    ROOT_DIR = ROOT_DIR.parent

dataset_links = {
    : str(ROOT_DIR / 'Data' / 'Classical'),
}




result_dir = ROOT_DIR / "final_results" / "results"
result_dir.mkdir(parents=True, exist_ok=True)
eig_file = result_dir / "Eigen_all.csv"




__SCALERS = [
    , "MinMaxScaler", "RobustScaler", "Normalizer",
    , "MaxAbsScaler", "None"
]

def get_scaler(name):
    if name == "StandardScaler": return StandardScaler()
    if name == "MinMaxScaler": return MinMaxScaler()
    if name == "RobustScaler": return RobustScaler()
    if name == "Normalizer": return Normalizer()
    if name == "QuantileTransformer": return QuantileTransformer(output_distribution="normal", random_state=42)
    if name == "MaxAbsScaler": return MaxAbsScaler()
    if name == "None": return None
    return None




def preprocess_data_OC(train_data, test_data):
    X_train_total = train_data.iloc[:, :-1].to_numpy()
    y_train_total = train_data.iloc[:, -1].to_numpy()
    X_train = X_train_total[y_train_total == 0]
    y_train = y_train_total[y_train_total == 0]

    X_test = test_data.iloc[:, :-1].to_numpy()
    y_test = test_data.iloc[:, -1].to_numpy()
    return X_train, y_train, X_test, y_test

def load_dataset(name, domain):
    try:
        path = os.path.join(dataset_links[domain], f"{name}.npz")
        return np.load(path, allow_pickle=True)
    except Exception as e:
        print(f"❌ Failed to load {domain}/{name}: {e}")
        return None




if __name__ == "__main__":
    for dataset_array, domain in zip([dataset_names], ['Classical']):
        for name in dataset_array:

            data = load_dataset(name, domain)
            if data is None: continue

            try:
                X, y = data['X'], data['y']
                print(f"\n>>> Dataset: {name} | Size: {len(y)}")

                
                if len(y) > 10000:
                    _, X, _, y = train_test_split(X, y, test_size=10000, random_state=42)

                
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.3, random_state=42
                )

                
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train_scaled = scaler.transform(X_train)
                X_test_scaled = scaler.transform(X_test)

                
                Train_data = pd.DataFrame(np.hstack([X_train_scaled, y_train.reshape(-1,1)]))
                Test_data = pd.DataFrame(np.hstack([X_test_scaled, y_test.reshape(-1,1)]))

                
                X_train_proc, y_train_proc, X_test_proc, y_test_proc = preprocess_data_OC(
                    Train_data, Test_data
                )

                d = X_train_proc.shape[1]

                
                dvm_ad = DVM_AD()
                k = d
                dvm_ad.fit(X_train_proc, y_train_proc, k)

                
                with open(eig_file, "a") as f:
                    f.write(",".join(map(str, dvm_ad.eigvals_dcri)) + "\n")

            except Exception as e:
                print(f"❌ Error processing dataset {name}: {e}")
