import os, sys
current_file_path = os.path.abspath(__file__)
sys.path.append(os.path.dirname((current_file_path))+'/../')

import pandas as pd
from pipeline import RunPipeline_unsup, RunPipeline

from FairOutlierDetection.code.train import *



class DCFOD_wrapper():
    def __init__(self, seed, model_name) -> None:
        self.model_name=model_name
        
    
    def fit(self, X_train, y_train=None):
        X_norm  = X_train
        Y = y_train
        sensitive_attribute_group = []
        db = "data"
        configuration = 90, 64 if X_norm.shape[0] < 10000 else 40, 256
        
        num_subgroups = 0
        num_centroid = 10
        feature_dimension = X_norm.shape[1]
        embedded_dimension = 64
        
        weight = True
        cuda = torch.device('cuda:0')
    
        self.model = DCFOD(
            feature_dimension, 
            num_centroid, 
            embedded_dimension,
            num_subgroups, 
            cuda
            )
        
        res = Train(self.model, db, X_norm, Y, sensitive_attribute_group,
                    configuration[0], configuration[1], with_weight=weight, kf=0)
        return self
    
    def predict_score(self, X):
        dist = get_score(self.model, X)
        outlier_score, position = torch.min(dist, dim=1)
        for i in range(dist.shape[1]):
            pos = list(x for x in range(len(outlier_score)) if position[x] == i)
            if len(outlier_score[pos]) != 0:
                max_dist = max(outlier_score[pos])
                outlier_score[pos] = torch.div(outlier_score[pos], max_dist).to(cuda)
        return outlier_score.cpu().detach().numpy()


import numpy as np
DEBUG = False
if DEBUG:
        X_train = np.random.randn(1000, 20)
        y_train = np.random.choice([0, 1], 1000)
        X_test = np.random.randn(100, 20)
        
        model = DCFOD_wrapper(seed=42, model_name='kmeansmm')  # initialization
        model.fit(X_train, None)  # fit
        score = model.predict_score(X_test)  # predict
        print(score.shape)
        
        print('\n\n\n\n')
        # customized model on customized dataset
        dataset = {}
        dataset['X'] = np.random.randn(1000, 20)
        dataset['y'] = np.random.choice([0, 1], 1000)
        
        pipeline = RunPipeline(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)
        results = pipeline.run(clf=DCFOD_wrapper, dataset=dataset)
        print(results)
        exit()
else:
    results_all = []

    pipeline = RunPipeline_unsup(suffix='ADBench', parallel='unsupervise', realistic_synthetic_mode=None, noise_type=None)

    results = pipeline.run_tabular(clf=DCFOD_wrapper, model_name='DCFOD')
    results_all.extend(results)

    df = pd.DataFrame(results_all)
    df.columns = ['data', 'model', 'metric', 'fit_time', 'infer_time']
    df.to_csv('results/dcfod.csv', index=None)

