import os, sys
current_file_path = os.path.abspath(__file__)
sys.path.append(os.path.dirname((current_file_path))+'/../')

import pandas as pd
from pipeline import RunPipeline_unsup, RunPipeline
'''
Params:
suffix: file name suffix;

parallel: running either 'unsupervise', 'semi-supervise', or 'supervise' (AD) algorithms,
corresponding to the Angle I: Availability of Ground Truth Labels (Supervision);

realistic_synthetic_mode: testing on 'local', 'global', 'dependency', and 'cluster' anomalies, 
corresponding to the Angle II: Types of Anomalies;

noise type: evaluating algorithms on 'duplicated_anomalies', 'irrelevant_features' and 'label_contamination',
corresponding to the Angle III: Model Robustness with Noisy and Corrupted Data.
'''

# return the results including [params, model_name, metrics, time_fit, time_inference]
# besides, results will be automatically saved in the dataframe and ouputted as csv file in adbench/result folder

from pyod.models.gmm import GMM as GMMDetector

class GMM_wrapper():
    def __init__(self, seed, model_name) -> None:
        self.model = GMMDetector(random_state=seed, covariance_type='tied')
        self.model_name=model_name
    
    def fit(self, X_train, y_train=None):
        self.model.fit(X=X_train, y=y_train)
        return self
    
    def predict_score(self, X):
        score = self.model.decision_function(X=X)
        return score
    
from kmeansmm import KMeansMM as KMeansMMDetector
import torch
class kmeansmm_wrapper():
    def __init__(self, seed, model_name) -> None:
        
        self._model = KMeansMMDetector(device='cpu')
        self.model_name=model_name
    
    def fit(self, X_train, y_train=None):
        X_train = torch.from_numpy(X_train)
        self._model.fit(X=X_train)
        return self
    
    def predict_score(self, X):
        X = torch.from_numpy(X)
        score = self._model.anomaly_score(X)
        return score


import numpy as np
DEBUG = False
if DEBUG:
        X_train = np.random.randn(1000, 20)
        y_train = np.random.choice([0, 1], 1000)
        X_test = np.random.randn(100, 20)
        
        model = kmeansmm_wrapper(seed=42, model_name='kmeansmm')  # initialization
        model.fit(X_train, None)  # fit
        score = model.predict_score(X_test)  # predict
        
        # customized model on customized dataset
        dataset = {}
        dataset['X'] = np.random.randn(1000, 20)
        dataset['y'] = np.random.choice([0, 1], 1000)
        
        pipeline = RunPipeline(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)
        results = pipeline.run(clf=kmeansmm_wrapper, dataset=dataset)
        print(results)
        exit()
else:
    results_all = []

    pipeline = RunPipeline_unsup(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)

    results = pipeline.run_tabular(clf=GMM_wrapper, model_name='GMM')
    results_all.extend(results)

    results = pipeline.run_tabular(clf=kmeansmm_wrapper, model_name='kmeansmm')
    results_all.extend(results)

    df = pd.DataFrame(results_all)
    df.columns = ['data', 'model', 'metric', 'fit_time', 'infer_time']
    df.to_csv('results/cluster-based-v2.csv', index=None)

