from scipy.stats import wilcoxon
import pandas as pd
import copy
import json
# HomeDirectory="QueryLevelNDCG/"
# HomeDirectory2="QueryLevelNDCG_V1/"

# HomeDirectory2="QueryLevelNDCG_V2/new_res/"

datasets = ['flight_delay', 'online_delivary', 'student_perf']
features =[
            "attribute",
            "filter",
            "filter_operation",
            "aggregator",
        ]

confidence=0.05
diff = 0
nDCG={}
nDCG_V3={}

def get_vectors(path, date, methods, model_index_map, vector_list, vector_feature, vector_dataset):
    for dataset in datasets:
        file_dir = f"{path}/{dataset}"
        for feature in features:
            file_path = f"{file_dir}/{feature}-cosine-{date}.csv"
            df = pd.read_csv(file_path, index_col=False)
            data = df.T
            for model in methods:
                vector_list[model].extend(data[model_index_map[model]][1:])
                vector_feature[feature][model].extend(data[model_index_map[model]][1:])
                vector_dataset[dataset][model].extend(data[model_index_map[model]][1:])
                
    return vector_list, vector_feature, vector_dataset

def get_heuristic_methods():
    methods=['xlnet', 'bert', 'roberta', 'albert', 'vanilla']
    
    model_index_map = {
        'xlnet': 1, 'bert': 2, 'roberta': 3, 'albert': 4, 'vanilla': 0
    }
    

    

    
    vector_list = {
        'xlnet': [], 'bert': [], 'roberta': [], 'albert': [], 'vanilla': []
    }
    vector_feature = {
        "attribute": copy.deepcopy(vector_list),
        "filter": copy.deepcopy(vector_list),
        "filter_operation": copy.deepcopy(vector_list),
        "aggregator": copy.deepcopy(vector_list),
    }
    vector_dataset = {
        'flight_delay': copy.deepcopy(vector_list), 'online_delivary': copy.deepcopy(vector_list), 'student_perf': copy.deepcopy(vector_list)
    }
    
    return methods, model_index_map, vector_list, vector_feature, vector_dataset
    
def get_t5_methods():
    methods=['roberta-ee-t', 'roberta-qa-t', 'xlnet-t', 'bert-t']
    
    model_index_map = {
        'roberta-ee-t': 0, 'roberta-qa-t': 1, 'xlnet-t': 2, 'bert-t': 3
    }
    

    

    
    vector_list = {
        'roberta-ee-t': [], 'roberta-qa-t': [], 'xlnet-t': [], 'bert-t': []
    }
    vector_feature = {
        "attribute": copy.deepcopy(vector_list),
        "filter": copy.deepcopy(vector_list),
        "filter_operation": copy.deepcopy(vector_list),
        "aggregator": copy.deepcopy(vector_list),
    }
    vector_dataset = {
        'flight_delay': copy.deepcopy(vector_list), 'online_delivary': copy.deepcopy(vector_list), 'student_perf': copy.deepcopy(vector_list)
    }
    
    return methods, model_index_map, vector_list, vector_feature, vector_dataset



path = "src/data/output/emnlp"
methods, model_index_map, vector_list, vector_feature, vector_dataset = get_heuristic_methods()
vector_list, vector_feature, vector_dataset = get_vectors(path, "06-09-2022", methods, model_index_map, vector_list, vector_feature, vector_dataset)

path = "src/data/output/acl/exp"
methods_t5, model_index_map_t5, vector_list_t5, vector_feature_t5, vector_dataset_t5 = get_t5_methods()
vector_list_t5, vector_feature_t5, vector_dataset_t5 = get_vectors(path, "03-13-2023", methods_t5, model_index_map_t5, vector_list_t5, vector_feature_t5, vector_dataset_t5)

w,p_v3 = wilcoxon(vector_list['xlnet'],vector_list['bert'],zero_method="zsplit")
print(w, p_v3)

wil = {}
wilfeat = {}
wildataset = {}
merged_list = {**vector_list, **vector_list_t5}
for vector1 in merged_list:
    if vector1 != 'no Model':
        for vector2 in merged_list:
            if vector2 != 'no Model':
                w,p_v3 = wilcoxon(merged_list[vector1],merged_list[vector2],zero_method="zsplit")
                wil[f'{vector1}-{vector2}'] = p_v3

# for feature in vector_feature:
#     vectors = vector_feature[feature]
#     for vector1 in vectors:
#         if vector1 != 'no Model':
#             for vector2 in vectors:
#                 if vector2 != 'no Model':
#                     w,p_v3 = wilcoxon(vector_list[vector1],vector_list[vector2],zero_method="zsplit")
#                     wilfeat[f'{feature}-{vector1}-{vector2}'] = p_v3

# for data in vector_dataset:
#     vectors = vector_dataset[data]
#     for vector1 in vectors:
#         if vector1 != 'no Model':
#             for vector2 in vectors:
#                 if vector2 != 'no Model':
#                     w,p_v3 = wilcoxon(vector_list[vector1],vector_list[vector2],zero_method="zsplit")
#                     wildataset[f'{data}-{vector1}-{vector2}'] = p_v3

write_path = "src/data/output/acl/wilcoxon/result.json"
writer_file = open(write_path, 'w')
json.dump(wil, writer_file)