category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.39,0.265,0.605,0.56,0.575,0.565
attribute_recognition,200,0.65,0.065,0.815,0.815,0.795,0.8
dynamic_temporal,200,0.18,0.155,0.485,0.47,0.505,0.475
hallucination_detection,200,0.725,0.03,0.84,0.865,0.855,0.87
object_counting,200,0.12,0.415,0.35,0.385,0.3,0.29
object_localization,200,0.51,0.055,0.725,0.76,0.74,0.76
object_presence,200,0.42,0.27,0.62,0.56,0.585,0.6
scene_understanding,200,0.875,0.025,0.935,0.935,0.94,0.925
spatial_relationship,200,0.45,0.055,0.74,0.725,0.725,0.685
visual_grounding,200,0.01,0.505,0.235,0.17,0.21,0.195
Average,2000,0.433,0.184,0.635,0.624,0.623,0.616
