experiment,task,total,correct,top2_correct,accuracy,top2_accuracy,rms_error,ece,nll,total_tokens,avg_tokens
CoVR-gpt-4o_4options_20250317_134609,image2text_given,2287,1745,1982,0.7630083078268474,0.8666375163970267,0.09235859274003255,0.05948300799648588,1.7655089461320068,6885572,3010.744206383909
CoVR-r1_descripted_4options_20250324_104235,image2text_given,2287,1732,1991,0.7573240052470486,0.8705728027984259,0.09236146040024076,0.05074718613821234,1.2428682564504065,2697675,1179.569304766069
CoVR-o3-mini_descripted_4options_20250324_104235,image2text_given,2287,1722,1954,0.7529514648010495,0.8543944031482291,0.11193745526868829,0.06678766799559867,1.4712995869841123,3065401,1340.3589855706166
CoVR-r1_descripted_4options_20250324_104923,image2text_option,2287,1691,2002,0.7393965894184521,0.8753825972890249,0.10134934856904036,0.06430913860953212,0.9377870905084519,2783651,1217.1626585045913
CoVR-o3-mini_descripted_4options_20250324_104923,image2text_option,2287,1687,1966,0.7376475732400525,0.8596414516834281,0.10829499022552554,0.06339583628763724,1.507233180107084,3159931,1381.6926104066463
CoVR-gpt-4o_4options_20250318_210605,image2text_option,2287,1683,1995,0.7358985570616529,0.8723218189768255,0.09935424166889965,0.07566000929817761,1.428803400155631,7046101,3080.9361609094885
