from tensorboard.backend.event_processing import event_accumulator
from argparse import ArgumentParser
import numpy as np
import os
import glob
from scipy.stats import iqr, ttest_ind

METRICS = {
    'cola': 'eval/matthews_correlation',
    'mnli': 'eval/accuracy',
    'sst2': 'eval/accuracy',
    'stsb': 'eval/pearson',
    'qnli': 'eval/accuracy',
    'qqp': 'eval/accuracy',
    'rte': 'eval/accuracy',
    'mrpc': 'eval/accuracy'
}

def parse_args():
    parser = ArgumentParser()
    parser.add_argument('--task', required=True, default='cola')
    parser.add_argument('--method', type=str, default='Method to evaluate')
    return parser.parse_args()


def _load_scalars(path):
    event_acc = event_accumulator.EventAccumulator(path)
    event_acc.Reload()
    data = {}

    for tag in sorted(event_acc.Tags()['scalars']):
        x, y = [], []

        for scalar_event in event_acc.Scalars(tag):
            x.append(scalar_event.step)
            y.append(scalar_event.value)

            data[tag] = (np.asarray(x), np.asarray(y))
    return data


def main():
    args = parse_args()
    if args.method is None:
        methods = os.listdir(args.task)
    else:
        methods = [args.method]

    for method in methods:
        print(f"Method: {method}")
        settings = os.listdir(os.path.join(args.task, method))
        for setting in settings:
            print(f"Setting: {setting}")
            event_files = glob.glob(os.path.join(args.task, method, setting, '*', '*', '**'), recursive=True)
            event_files = [f for f in event_files if 'event' in f]
            metrics = []
            for file in event_files:
                result_dict = _load_scalars(file)
                try:
                    if len(result_dict[METRICS[args.task]][1]) == 1:
                        os.system(f"rm {file}")
                        continue
                except:
                    continue
                metric = result_dict[METRICS[args.task]][1]
                metrics.append(np.max(metric))
            # if method == 'lora_init':
            #     x = metrics
            # if method == 'lora' and setting == 'r_8_alpha_16':
            #     y = metric
            mean = np.median(metrics)
            std = iqr(metrics)
            print(f"Mean of Max Performance: {mean}+-{std}\n")

    # _, pvalue = ttest_ind(x, y, alternative='greater')
    # print(f"LoRA init vs LoRA: {pvalue}")



if __name__ == '__main__':
    main()
