#%%
import os
import pandas as pd
import numpy as np

folder_path = 'tree_no_depth_limit_minsample15_calbinning20percentall/sanity-check2d/y_residuals_predict_proba_HistGradientBoosting'
# Get list of all files in the folder
files = os.listdir(folder_path)

# Initialize empty list to store dataframes
dfs = []
# columns = ["Loss", "GL", "GL2", "Uncorrected", "Bias", "Bias2", "Depth", "Seed", "N samples", "Simulated", "True GL"]
# columns = ["Loss", "GL","Induced", "Uncorrected", "Bias", "Bias term 1","Bias term 2", "Depth", "Seed", "N samples", "Simulated", "True GL"]


# columns = ["Loss", "GL","Calibration error", "Uncorrected", "Bias", "Depth", "Seed", "N samples", "Simulated"]
columns = ["Loss", "GL","Induced", "Uncorrected", "Bias", "Bias term 1","Bias term 2", "Depth", "Seed", "N samples", "Simulated"]





# Read each file and append to list
for file in files:
    if file.endswith('.csv'):  # Only process CSV files
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        col = df.columns 
        # print(col[1:].values.reshape(1,-1))
        # print(file)
        # print(col.values[1:].T.shape)
        # print(col.values[1:].reshape(-1,1))
        df = pd.DataFrame(data = col.values[1:].reshape(-1,1).T, columns=columns)
        dfs.append(df)
#%%

dfs

#%%
# Concatenate all dataframes
final_df = pd.concat(dfs, axis=0)

#%%
name = folder_path.split('/')[2]
file_path = f'tree_no_depth_limit_minsample15_calbinning20percentall/concatenated_sanity_check-{name}.csv'
# Save the concatenated dataframe
final_df.to_csv(file_path, index=False)


# %%
