


import os
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")

import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import MaxAbsScaler
from sklearn.model_selection import train_test_split
from Model.DVM_AD import DVM_AD




DATASET_LIST = [
    , '2_annthyroid', '3_backdoor', '4_breastw', '5_campaign',
    , '7_Cardiotocography', '8_celeba', '9_census', '10_cover',
    , '12_fault', '13_fraud', '14_glass',
    , '16_http', '17_InternetAds', '18_Ionosphere',
    , '20_letter', '21_Lymphography', '22_magic.gamma', '23_mammography',
    , '25_musk', '26_optdigits', '27_PageBlocks', '28_pendigits', '29_Pima',
    , '31_satimage-2', '32_shuttle', '33_skin', '34_smtp', '35_SpamBase',
    , '37_Stamps', '38_thyroid', '39_vertebral', '40_vowels', '41_Waveform',
    , '43_WDBC', '44_Wilt', '45_wine', '46_WPBC', '47_yeast'
]

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
ROOT_DIR = BASE_DIR
while ROOT_DIR != ROOT_DIR.parent and not (ROOT_DIR / "process_tabular").is_dir():
    ROOT_DIR = ROOT_DIR.parent
DATA_DIR = str(ROOT_DIR / "Data" / "Classical")
MAX_TSNE_POINTS = 2000
RANDOM_STATE = 42




def tsne_with_limit(X, y, max_points=10000, random_state=42):
    n = X.shape[0]

    if n > max_points:
        print(f"⚠️  {n} points > {max_points}, subsampling...")
        rng = np.random.RandomState(random_state)

        idx_normal = np.where(y == 0)[0]
        idx_abnormal = np.where(y == 1)[0]

        n_norm = int(max_points * len(idx_normal) / n)
        n_abn = max_points - n_norm

        idx_sample = np.concatenate([
            rng.choice(idx_normal, n_norm, replace=False),
            rng.choice(idx_abnormal, n_abn, replace=False)
        ])
    else:
        idx_sample = np.arange(n)

    X_vis = X[idx_sample]
    y_vis = y[idx_sample]

    if X_vis.shape[1] > 2:
        tsne = TSNE(
            n_components=2,
            perplexity=30,
            learning_rate='auto',
            init='pca',
            random_state=random_state
        )
        X_2d = tsne.fit_transform(X_vis)
    else:
        X_2d = X_vis

    return X_2d, y_vis





for dataset_name in DATASET_LIST:

    print("\n========================================")
    print(f"📊 Processing dataset: {dataset_name}")
    print("========================================")

    dataset_path = os.path.join(DATA_DIR, f"{dataset_name}.npz")
    if not os.path.exists(dataset_path):
        print(f"❌ Dataset not found: {dataset_path}")
        continue

    
    
    
    data = np.load(dataset_path, allow_pickle=True)
    X, y = data['X'], data['y']

    
    if X.shape[0] > 2000:
        _, X, _, y = train_test_split(
            X, y, test_size=2000, random_state=RANDOM_STATE
        )

    
    
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=RANDOM_STATE
    )

    
    
    
    scaler = MaxAbsScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    
    
    
    train_mask = y_train == 0
    X_train_proc = X_train_scaled[train_mask]
    y_train_proc = y_train[train_mask]

    
    
    
    X_all_before = np.concatenate([X_train_scaled, X_test_scaled], axis=0)
    y_all = np.concatenate([y_train, y_test], axis=0)

    
    
    
    dvm_ad = DVM_AD()
    dvm_ad.fit(X_train_proc, y_train_proc)

    
    
    
    X_all_after = dvm_ad.transform(X_all_before)

    
    
    
    X_before_2d, y_vis = tsne_with_limit(
        X_all_before, y_all,
        max_points=MAX_TSNE_POINTS,
        random_state=RANDOM_STATE
    )

    X_after_2d, _ = tsne_with_limit(
        X_all_after, y_all,
        max_points=MAX_TSNE_POINTS,
        random_state=RANDOM_STATE
    )

    
    
    
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))

    
    axs[0].scatter(
        X_before_2d[y_vis == 0, 0],
        X_before_2d[y_vis == 0, 1],
        s=8, alpha=0.6, color='steelblue', label='Normal'
    )
    axs[0].scatter(
        X_before_2d[y_vis == 1, 0],
        X_before_2d[y_vis == 1, 1],
        s=8, alpha=0.6, color='tomato', label='Abnormal'
    )
    axs[0].set_title("Before Projection")
    axs[0].legend()

    
    axs[1].scatter(
        X_after_2d[y_vis == 0, 0],
        X_after_2d[y_vis == 0, 1],
        s=8, alpha=0.6, color='steelblue', label='Normal'
    )
    axs[1].scatter(
        X_after_2d[y_vis == 1, 0],
        X_after_2d[y_vis == 1, 1],
        s=8, alpha=0.6, color='tomato', label='Abnormal'
    )
    axs[1].set_title("After Projection")
    axs[1].legend()

    plt.suptitle(f"{dataset_name}: Normal vs Abnormal (t-SNE)")
    plt.tight_layout()
    plt.show()

print("\n🎉 DONE: All datasets visualized.")
