import pandas as pd
import numpy as np
from Wald_Test import Wald_Test

replicate_dfs = []
for i in range(1, 6):  # 1 through 5
    file_path = f'replicate{i}_results.csv'
    replicate_df = pd.read_csv(file_path)
    replicate_dfs.append(replicate_df)

columns = replicate_dfs[0].columns
numeric_columns = ['fw_accuracy', 'ce_accuracy']

df = replicate_dfs[0].copy()

for col in numeric_columns:
    col_values = np.zeros((len(df), len(replicate_dfs)))
    
    for i, rep_df in enumerate(replicate_dfs):
        col_values[:, i] = rep_df[col].values
    
    df[col] = np.median(col_values, axis=1)

df.to_csv('UCI-accuracy-table.csv')

df = df[df['dataset'] != 'nursery']
meta_data = pd.read_csv('metadata/datasets_summary.csv')
meta_data = meta_data[meta_data['dataset_name'] != 'nursery']
df_merged = meta_data.merge(df, left_on='dataset_name', right_on='dataset')


fw_higher_count = (df_merged['fw_accuracy'] > df_merged['ce_accuracy']).sum() 
ce_higher_count = (df_merged['fw_accuracy'] < df_merged['ce_accuracy']).sum() 
equal_count = df_merged.shape[0] - fw_higher_count - ce_higher_count
print("FW is better", fw_higher_count)
print("CE is better", ce_higher_count)
print("Tie", equal_count)
print("Total (sanity check)", fw_higher_count+ ce_higher_count + equal_count)

if ce_higher_count > fw_higher_count:
    Wald_Test(ce_higher_count, fw_higher_count)
else:
    Wald_Test(fw_higher_count, ce_higher_count)



df_subset = df_merged[(df_merged['n_features'] < 30)]
fw_higher_count = (df_subset['fw_accuracy'] > df_subset['ce_accuracy']).sum() 
ce_higher_count = (df_subset['fw_accuracy'] < df_subset['ce_accuracy']).sum() 
equal_count = df_subset.shape[0] - fw_higher_count - ce_higher_count
print("For features < 30")
print("FW is better", fw_higher_count)
print("CE is better", ce_higher_count)
print("Tie", equal_count)
print("Total (sanity check)", fw_higher_count+ ce_higher_count + equal_count)

if ce_higher_count > fw_higher_count:
    Wald_Test(ce_higher_count, fw_higher_count)
else:
    Wald_Test(fw_higher_count, ce_higher_count)


df_subset = df_merged[(df_merged['n_features'] >= 30)]
fw_higher_count = (df_subset['fw_accuracy'] > df_subset['ce_accuracy']).sum() 
ce_higher_count = (df_subset['fw_accuracy'] < df_subset['ce_accuracy']).sum() 
equal_count = df_subset.shape[0] - fw_higher_count - ce_higher_count
print("For features >= 30")
print("FW is better", fw_higher_count)
print("CE is better", ce_higher_count)
print("Tie", equal_count)
print("Total (sanity check)", fw_higher_count+ ce_higher_count + equal_count)

if ce_higher_count > fw_higher_count:
    Wald_Test(ce_higher_count, fw_higher_count)
else:
    Wald_Test(fw_higher_count, ce_higher_count)
