Model_Name,Benign_Injection_Rate,threshold,F1,Accuracy,Precision,Recall,TP,TN,FP,FN,Too_Early,Too_Late,Never_Triggered,Benign_Flagged,Total_Tasks,Total_Cost,Data
gpt-4o,0.0,0.9,0.6766917293233083,0.7556818181818182,1.0,0.5113636363636364,45,88,0,43,0,1,42,0,176,0.42257500000000014,../data/agent_tasks/test_data.json
gpt-4o,0.2,0.9,0.5691056910569106,0.6988636363636364,1.0,0.3977272727272727,35,88,0,53,0,0,53,0,176,0.5429400000000001,../data/agent_tasks/test_data.json
gpt-4o,0.4,0.9,0.5454545454545454,0.6875,1.0,0.375,33,88,0,55,0,0,55,0,176,0.6856850000000003,../data/agent_tasks/test_data.json
gpt-4o,0.6,0.9,0.4424778761061947,0.6420454545454546,1.0,0.2840909090909091,25,88,0,63,0,2,61,0,176,0.8832500000000003,../data/agent_tasks/test_data.json
gpt-4o,0.8,0.9,0.2745098039215686,0.5795454545454546,1.0,0.1590909090909091,14,88,0,74,0,0,74,0,176,1.1154900000000003,../data/agent_tasks/test_data.json
gpt-4o,1.0,0.9,0.2222222222222222,0.5625,1.0,0.125,11,88,0,77,0,2,75,0,176,1.4327199999999995,../data/agent_tasks/test_data.json
