category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.575,0.115,0.765,0.73,0.76,0.72
attribute_recognition,200,0.805,0.04,0.9,0.9,0.895,0.905
dynamic_temporal,200,0.24,0.185,0.54,0.52,0.53,0.55
hallucination_detection,200,0.595,0.09,0.765,0.81,0.77,0.8
object_counting,200,0.21,0.205,0.49,0.485,0.48,0.54
object_localization,200,0.78,0.02,0.93,0.865,0.9,0.91
object_presence,200,0.86,0.02,0.93,0.935,0.93,0.925
scene_understanding,200,0.94,0.01,0.98,0.97,0.97,0.96
spatial_relationship,200,0.825,0.02,0.925,0.93,0.915,0.93
visual_grounding,200,0.28,0.025,0.7,0.705,0.735,0.725
Average,2000,0.611,0.073,0.793,0.785,0.788,0.796
