################################################################################################
################################################################################################
# based on https://github.com/vicliv/DTE, adapted by first author
################################################################################################
################################################################################################

import argparse
import os
import pickle

from adbench.myutils import Utils

from data_generator import DataGenerator



def main(args):
    start_seeds = args.start_seed
    end_seed = args.end_seed
    path = args.save_path
    OneClass = args.OneClass
    start = 0
    
    for seed in range(start_seeds, end_seed):

        if OneClass == 'yes':
            datagenerator = DataGenerator(seed = seed, test_size=0.5, normal=True) # data generator
        else:
            datagenerator = DataGenerator(seed = seed, test_size=0, normal=False)

        utils = Utils() # utils function
        utils.set_seed(seed)
        
        # Get the datasets from ADBench
        for dataset_list in [datagenerator.dataset_list_classical, datagenerator.dataset_list_cv, datagenerator.dataset_list_nlp]:
            for dataset in dataset_list:
                '''
                la: ratio of labeled anomalies, from 0.0 to 1.0
                realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
                noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
                '''                
                print(dataset)
                
                # import the dataset
                datagenerator.dataset = dataset # specify the dataset name
                data = datagenerator.generator(la=0, max_size=50000) # maximum of 50,000 data points are available
                
                
                data_dir = f"{path}/{dataset}"
                
                if not os.path.exists(data_dir):
                    os.makedirs(data_dir)
                
                with open(f"{data_dir}/seed_{seed}.pkl" , "wb") as file:
                    pickle.dump(data, file)

                
            
    
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Settings')
    parser.add_argument('--start_seed', type=int, 
        default=0, help='first random seed')
    parser.add_argument('--end_seed', type=int, 
        default=5, help='last random seed -1')
    parser.add_argument('--OneClass', type=str, 
        default='yes', help='One-Class or unsup')    
    parser.add_argument('--save_path', type=str, 
        default='./data', help='folder to saved data files')

    args = parser.parse_args()
    main(args)
