Model_Name,Benign_Injection_Rate,threshold,F1,Accuracy,Precision,Recall,TP,TN,FP,FN,Too_Early,Too_Late,Never_Triggered,Benign_Flagged,Total_Tasks,Total_Cost,Data
gpt-4o,0.0,0.15,0.8035714285714286,0.8023952095808383,0.7988165680473372,0.8083832335329342,135,133,34,32,0,24,8,34,334,6.5715825000000025,../data/decomposed_queries/test_data.json
gpt-4o,0.2,0.15,0.7436619718309859,0.7275449101796407,0.7021276595744681,0.7904191616766467,132,111,56,35,0,29,6,56,334,7.4200875,../data/decomposed_queries/test_data.json
gpt-4o,0.4,0.15,0.7668393782383419,0.7305389221556886,0.6757990867579908,0.8862275449101796,148,96,71,19,0,16,3,71,334,7.724394999999998,../data/decomposed_queries/test_data.json
gpt-4o,0.6,0.15,0.7170731707317073,0.6526946107784432,0.6049382716049383,0.8802395209580839,147,71,96,20,0,16,4,96,334,8.016770000000003,../data/decomposed_queries/test_data.json
gpt-4o,0.8,0.15,0.7173396674584325,0.6437125748502994,0.594488188976378,0.9041916167664671,151,64,103,16,0,16,0,103,334,8.321077500000007,../data/decomposed_queries/test_data.json
gpt-4o,1.0,0.15,0.7027027027027027,0.6047904191616766,0.5631768953068592,0.9341317365269461,156,46,121,11,0,9,2,121,334,8.667330000000002,../data/decomposed_queries/test_data.json
