
import os
import json
import warnings
import argparse
import pandas as pd
from scipy import stats
import scikit_posthocs as sp
import matplotlib.pyplot as plt
import numpy as np


parser = argparse.ArgumentParser(description='Script description')
parser.add_argument('--dataset_path', type=str, default='../../Records/Hyperband/ImageNet', help='Path of benchmark data')
parser.add_argument('--ult_objs', type=str, nargs='+', default=['test_accuracy', 'test_losses'], help='List of strings (default: ["test_accuracy", "test_losses"])')
parser.add_argument('--max_iters', type=int, nargs='+', default=[20, 50, 81, 120, 150], help='List of integers (default: [20, 50, 81, 120, 150])')
parser.add_argument('--eta', type=int, default=3, help='Fraction of saving in hyperband')
args = parser.parse_args()


max_iters = args.max_iters
ult_objs = args.ult_objs
dataset_path = args.dataset_path
file_name1 = "acc_loss"
file_name2 = "acc_loss_seed"
eta = args.eta

for iter in max_iters:
    print()
    print(f"******** max_iter = {iter} ********")
    for obj in [ult_objs[0]]:
        # Load data
        dir = os.path.join(dataset_path, f"Max_iter_{iter}_eta_{eta}", "cta", f"obj_{obj}")
        if not os.path.exists(dir):
            warnings.warn(f"Directory {dir} doesn't exist.")
            continue
        
        file1 = os.path.join(dir, f"{file_name1}.csv")
        df = pd.read_csv(file1)
        file_seed = os.path.join(dir, f"{file_name2}.csv")
        df_seed = pd.read_csv(file_seed)

        json_rst = dict()
        criterias1 = df.columns[1:]
        criterias2 = df_seed.columns[1:]
        for i, cta1 in enumerate(criterias1):
            for j, cta2 in enumerate(criterias2):

                d = df[cta1] - df_seed[cta2]

                res = stats.wilcoxon(d)
                print(f"cta1 = {cta1}, cta2 = {cta2}, res.pvalue = {res.pvalue}, Cohen's d = {d.mean()/np.std(d)}")
