category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.200,0.200,0.525,0.485,0.505,0.520
attribute_recognition,200,0.525,0.095,0.705,0.745,0.745,0.755
dynamic_temporal,200,0.120,0.295,0.435,0.420,0.430,0.395
hallucination_detection,200,0.370,0.195,0.605,0.580,0.595,0.590
object_counting,200,0.160,0.325,0.435,0.385,0.380,0.430
object_localization,200,0.445,0.050,0.675,0.715,0.740,0.795
object_presence,200,0.440,0.215,0.650,0.625,0.605,0.665
scene_understanding,200,0.615,0.075,0.820,0.765,0.805,0.755
spatial_relationship,200,0.270,0.085,0.610,0.625,0.625,0.625
visual_grounding,200,0.045,0.250,0.390,0.395,0.360,0.355
Average,2000,0.319,0.179,0.585,0.574,0.579,0.589
