import pandas as pd
from config_pool import configs
import argparse
from feature_generation_and_selection import *
import warnings
from copy import deepcopy
warnings.filterwarnings("ignore")

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--seed", type=int, default=1, help='random seed')
    parser.add_argument("--cv", type=int, default=5)
    parser.add_argument("--n_jobs", type=int, default=20)
    parser.add_argument("--file_name", type=str, help='task name in config_pool')
    args = parser.parse_args()
    res = pd.DataFrame(columns=['dataset', 'score', 'std'])
    count = 0
    for key in configs:
        try:
            print("Running dataset", key)
            data_configs = configs[key]
            c_columns = data_configs['c_columns']
            d_columns = data_configs['d_columns']
            target = data_configs['target']
            # dataset_path = './data/%s.csv' % key
            dataset_path = data_configs["dataset_path"]

            mode = data_configs['mode']
            if mode == 'classify': mode = 'classification'
            df = pd.read_csv(dataset_path)
            # df[d_columns] = df[d_columns].astype('str')
            label = df[[target]]
            label[target] = label[target].astype('category').cat.codes.astype(int)
            del df[target]
            columns = df.columns.to_list()
            if mode == "classification":
                metric_function = f1_metric
                model = rf_classify(n_jobs=args.n_jobs)
                my_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
            else:
                metric_function = rae
                model = rf_regression(args.n_jobs)
                my_cv = KFold(n_splits=5, shuffle=True, random_state=1)

            score_list = []
            for index_train, index_test in my_cv.split(df, label.values):
                df_train, label_train = df.iloc[index_train], label.iloc[index_train]
                df_test, label_test = df.iloc[index_test], label.iloc[index_test]
                score = feature_generation(df_train, label_train, df_test, label_test,
                                           deepcopy(d_columns), mode, model, metric_function, args.n_jobs)
                score_list.append(score)
            print(f"The score for dataset {key} is {np.mean(score_list)}")
            res.loc[count] = [key, np.mean(score_list), np.std(score_list)]
            count += 1
            res.to_csv('results.csv', index=False)



        except:
            import traceback
            print(key)
            print(traceback.format_exc())

