import pandas as pd
import glob
import os

csv_folder = './'  
n_models = 5
consensus_threshold = n_models  

csv_files = sorted(glob.glob(os.path.join(csv_folder, '*.csv')))[:n_models]

dfs = [pd.read_csv(f) for f in csv_files]

base_df = dfs[0].copy()

model_cols = [col for col in base_df.columns if col.endswith('_found_in_note')]

for col in model_cols:
    preds = [df[col] for df in dfs]
    preds_df = pd.concat(preds, axis=1)
    gt_col = col.replace('_found_in_note', '')
    gt = base_df[gt_col]
    consensus = (preds_df.eq(gt, axis=0)).sum(axis=1)
    base_df[f'{col}_consensus'] = consensus

mask = (base_df[[f'{col}_consensus' for col in model_cols]] < consensus_threshold).any(axis=1)
consensus_df = base_df[mask]

consensus_df.to_csv('consensus_less_than_5.csv', index=False)