import pandas as pd
import os
import io

BASE_DIR = './' 

# ------------------------------------
all_instance_dfs = []

for i in range(1, 6):
    instance_dir = os.path.join(BASE_DIR, f'instance_{i}')
    results_file_path = os.path.join(instance_dir, f'results_{i}.md')

    with open(results_file_path, 'r', encoding='utf-8') as f:
        file_content = f.read()

    df_instance = pd.read_csv(
        io.StringIO(file_content.strip()),
        sep='\s*\|\s*',
        header=0,
        skiprows=[1],
        engine='python'
    )
    df_instance = df_instance.dropna(axis=1, how='all')
    df_instance.columns = df_instance.columns.str.strip()
    df_instance['alg'] = df_instance['alg'].str.strip()
    print(df_instance['alg'])
    all_instance_dfs.append(df_instance)

combined_df = pd.concat(all_instance_dfs, ignore_index=True)
metric_columns = ['dataloss', 'pr', 'roc']
actual_metric_columns = [col for col in metric_columns if col in combined_df.columns]
grouped_stats = combined_df.groupby(['data_size','prior_rate', 'alg'])[actual_metric_columns].agg(['mean', 'std'])

new_columns = []
for col, stat in grouped_stats.columns:
    new_columns.append(f'{col}_{stat}')
grouped_stats.columns = new_columns
results_df = grouped_stats.reset_index()

output_filename = "overall_summary_stats_mean_std_by_prior_rate_alg.md"
with open(output_filename, "w", encoding="utf-8") as f:
    results_df.to_markdown(f, index=False, floatfmt=".4f")

