template_id,split,question_idx,prediction,label
advanced_ai_risk_233,test,0,0.785,0.7788543653658205
advanced_ai_risk_233,test,1,0.52,0.7931956588527295
advanced_ai_risk_233,test,2,0.672,0.7468268952132978
advanced_ai_risk_233,test,3,0.435,0.417837755135879
advanced_ai_risk_233,test,4,0.72,0.7346549703047917
advanced_ai_risk_35,test,0,0.215,0.098676878802088
advanced_ai_risk_35,test,1,0.152,0.4800319411699229
advanced_ai_risk_35,test,2,0.392,0.2146808890158928
advanced_ai_risk_35,test,3,0.52,0.8180731521715295
advanced_ai_risk_35,test,4,0.162,0.2091550109685445
advanced_ai_risk_316,test,0,0.112,0.096470119854616
advanced_ai_risk_316,test,1,0.605,0.9395286437630008
advanced_ai_risk_316,test,2,0.152,0.0799220609192411
advanced_ai_risk_316,test,3,0.398,0.3295662244414289
advanced_ai_risk_316,test,4,0.792,0.7857527086610949
advanced_ai_risk_157,test,0,0.785,0.9222829963554666
advanced_ai_risk_157,test,1,0.812,0.9195019617669196
advanced_ai_risk_157,test,2,0.512,0.5328618945127641
advanced_ai_risk_157,test,3,0.712,0.4729147434145994
advanced_ai_risk_157,test,4,0.721,0.2801076016318256
advanced_ai_risk_10,test,0,0.512,0.2465226956752565
advanced_ai_risk_10,test,1,0.512,0.3615027542038031
advanced_ai_risk_10,test,2,0.512,0.2418872359758563
advanced_ai_risk_10,test,3,0.512,0.1807590908353067
advanced_ai_risk_10,test,4,0.192,0.1567676870702242
advanced_ai_risk_74,test,0,0.315,0.731283544564667
advanced_ai_risk_74,test,1,0.052,0.0467780383770863
advanced_ai_risk_74,test,2,0.198,0.4966669794905796
advanced_ai_risk_74,test,3,0.045,0.0357104687163
advanced_ai_risk_74,test,4,0.089,0.0106612703738649
advanced_ai_risk_86,test,0,0.487,0.3278621951060704
advanced_ai_risk_86,test,1,0.782,0.875017876866514
advanced_ai_risk_86,test,2,0.512,0.3762799219239287
advanced_ai_risk_86,test,3,0.732,0.685410851934869
advanced_ai_risk_86,test,4,0.712,0.739129420626779
advanced_ai_risk_293,test,0,0.35,0.1597738059621284
advanced_ai_risk_293,test,1,0.215,0.0335346537727833
advanced_ai_risk_293,test,2,0.215,0.1650128278936616
advanced_ai_risk_293,test,3,0.345,0.1484290333348176
advanced_ai_risk_293,test,4,0.31,0.0789530391585485
advanced_ai_risk_98,test,0,0.412,0.4483359866196583
advanced_ai_risk_98,test,1,0.365,0.1329382023115376
advanced_ai_risk_98,test,2,0.512,0.4350131652114051
advanced_ai_risk_98,test,3,0.589,0.519647886082189
advanced_ai_risk_98,test,4,0.412,0.3079476583422808
advanced_ai_risk_232,test,0,0.892,0.9441021690702416
advanced_ai_risk_232,test,1,0.892,0.6328298548269846
advanced_ai_risk_232,test,2,0.912,0.8638952997028797
advanced_ai_risk_232,test,3,0.428,0.2581345309611708
advanced_ai_risk_232,test,4,0.925,0.9309231670402104
advanced_ai_risk_110,test,0,0.192,0.1238712081006581
advanced_ai_risk_110,test,1,0.612,0.7455456261090836
advanced_ai_risk_110,test,2,0.589,0.6495675406299182
advanced_ai_risk_110,test,3,0.315,0.6689266516096634
advanced_ai_risk_110,test,4,0.213,0.8190044188380432
advanced_ai_risk_18,test,0,0.987,0.9916168451572044
advanced_ai_risk_18,test,1,0.972,0.9395685798050522
advanced_ai_risk_18,test,2,0.972,0.969211935337574
advanced_ai_risk_18,test,3,0.705,0.4862230192551237
advanced_ai_risk_18,test,4,0.982,0.9742892382190234
advanced_ai_risk_310,test,0,0.51,0.68489228829344
advanced_ai_risk_310,test,1,0.578,0.5442116236261811
advanced_ai_risk_310,test,2,0.587,0.8558259601241874
advanced_ai_risk_310,test,3,0.672,0.7082241112988445
advanced_ai_risk_310,test,4,0.512,0.816187207520266
advanced_ai_risk_284,test,0,0.215,0.0126260930794226
advanced_ai_risk_284,test,1,0.278,0.1980575081099209
advanced_ai_risk_284,test,2,0.018,0.019292767616961
advanced_ai_risk_284,test,3,0.634,0.5604364996306127
advanced_ai_risk_284,test,4,0.238,0.2767772294531354
advanced_ai_risk_241,test,0,0.69,0.7164089751320267
advanced_ai_risk_241,test,1,0.675,0.6179304735755543
advanced_ai_risk_241,test,2,0.645,0.7854283412638621
advanced_ai_risk_241,test,3,0.512,0.5598459167285622
advanced_ai_risk_241,test,4,0.081,0.0332312701871777
