category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.58,0.135,0.76,0.71,0.73,0.73
attribute_recognition,200,0.7,0.06,0.845,0.82,0.855,0.83
dynamic_temporal,200,0.34,0.03,0.665,0.715,0.695,0.64
hallucination_detection,200,0.55,0.105,0.715,0.76,0.725,0.72
object_counting,200,0.19,0.235,0.49,0.48,0.435,0.455
object_localization,200,0.77,0.03,0.88,0.91,0.86,0.9
object_presence,200,0.615,0.11,0.78,0.725,0.755,0.755
scene_understanding,200,0.93,0.005,0.98,0.97,0.97,0.965
spatial_relationship,200,0.805,0.025,0.945,0.9,0.92,0.925
visual_grounding,200,0.225,0.115,0.545,0.44,0.555,0.57
Average,2000,0.57,0.085,0.761,0.743,0.75,0.749
