################################################################################################
################################################################################################
# based on https://github.com/vicliv/DTE, adapted by first author
################################################################################################
################################################################################################

import argparse
import os
import pickle

from adbench.myutils import Utils

from data_generator_add_val import DataGenerator



def main(args):
    start_seeds = args.start_seed
    end_seed = args.end_seed
    path = args.save_path
    start = 0
    
    for seed in range(start_seeds, end_seed):
        datagenerator = DataGenerator(seed = seed, test_size=0.5, normal=True) # data generator

        utils = Utils() # utils function
        utils.set_seed(seed)
        
        # Get the datasets from Additional
        for dataset in ['Parkinson', 'abalone', 'arrhythmia', 'ecoli', 'hrss_anomalous_optimized', 'hrss_anomalous_standard',
                        'mif', 'miv', 'mulcross', 'nasa', 'pen-global', 'pen-local', 'seismic-bumps', 'wbc2', 'yeast6']:
            '''
            la: ratio of labeled anomalies, from 0.0 to 1.0
            realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
            noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
            '''                
            print(dataset)
            
            # import the dataset
            datagenerator.dataset = dataset # specify the dataset name
            print(datagenerator.dataset)
            data = datagenerator.generator(la=0, max_size=50000) # maximum of 50,000 data points are available
            
            
            data_dir = f"{path}/{dataset}"
            
            if not os.path.exists(data_dir):
                os.makedirs(data_dir)
            
            with open(f"{data_dir}/seed_{seed}.pkl" , "wb") as file:
                pickle.dump(data, file)

                
            
    
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Settings')
    parser.add_argument('--start_seed', type=int, 
        default=0, help='first random seed')
    parser.add_argument('--end_seed', type=int, 
        default=5, help='last random seed -1')
    parser.add_argument('--save_path', type=str, 
        default='./data_add', help='folder to saved data files')

    args = parser.parse_args()
    main(args)
