# Combine all the results for Cluster SEV into a single table
import pandas as pd
import numpy as np

# read the table
df = pd.read_csv("../Results/csv/Exp1_flexclust_summary_final.csv")
df_original = pd.read_csv("../Results/csv/Exp1_original_summary_final.csv")
df_original = df_original.groupby(["Dataset","Model"]).mean().round(2)

# remove the headline1, headline2, headline3
df = df[df["Dataset"] != "headline1"]
df = df[df["Dataset"] != "headline2"]
df = df[df["Dataset"] != "headline3"]

df_mean = df.groupby(["Dataset","Model"]).mean().round(2)
del df_mean["Iteration"]
# # remove the last row
# df_mean = df_mean.iloc[:-1,:]
df_value = df_mean.copy()
df_std = df.groupby(["Dataset","Model"]).std().round(2)
del df_std["Iteration"]
df_std.columns = [i+"_std" for i in df_std.columns]

print(df_mean)

# calculate the percentage of improvement with the same Dataset and Model for Mean SEV and L_inf
for dataset in df_mean.index.get_level_values(0).unique():
    for model in df_mean.index.get_level_values(1).unique():
        try:
            df_value.loc[(dataset,model),"Average SEV"] = "(" + str(round((df_mean.loc[(dataset,model),"Average SEV"] - df_original.loc[(dataset,model),"Average SEV"])/df_original.loc[(dataset,model),"Average SEV"]*100,2)) + "\%)"
            df_value.loc[(dataset,model),"Median L_inf"] = "(" + str(round((df_mean.loc[(dataset,model),"Median L_inf"] - df_original.loc[(dataset,model),"Median L_inf"])/df_original.loc[(dataset,model),"Median L_inf"]*100,2)) + "\%)"
            df_value.loc[(dataset,model),"Mean Likelihood"] = "(" + str(round((df_mean.loc[(dataset,model),"Mean Likelihood"] - df_original.loc[(dataset,model),"Average GMM Score"])/df_original.loc[(dataset,model),"Average GMM Score"]*100,2)) + "\%)"
        except:
            print(dataset,model,"is not in the original table")

print(df_value)


# for the one with the same column name make it mean+/-std in the same block
df_final = df_mean.copy()
# remove all the elements in df_final
df_final.iloc[:,:] = ""
for i in df_mean.index:
    for j in df_mean.columns:
        if j == "Average SEV" or j == "Median L_inf" or j == "Mean Likelihood":
            # print(df_value.loc[i,j])
            try:
                df_final.loc[i,j] = "$" + str(df_mean.loc[i,j]) + "$" + df_value.loc[i,j]
            except:
                print(df_value.loc[i,:])
        elif j == "Proportion of using flexible SEV":
            df_final.loc[i,j] = str(df_mean.loc[i,j])+"\\%"
        else:
            df_final.loc[i,j] = "$" + str(df_mean.loc[i,j])+"\\pm"+str(df_std.loc[i,j+"_std"])+ "$"

# switch the order of the columns
df_final = df_final[["Train Accuracy","Test Accuracy","Train AUC","Test AUC","Average SEV","Median L_inf","Average Time","Mean Likelihood"]]
# rename the columns
df_final.columns = ["Train Accuracy","Test Accuracy","Train AUC","Test AUC","Average SEV","Average $L_\infty$","Average Time $(10^{-2})$", "Mean Log-Likelihood"]

print(df_final.to_latex(escape=False))


# print(df_final.to_latex(escape=False))
# df_final.to_csv("../Results/csv/Exp1_flexible_summary_latex.csv")