category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.265,0.205,0.505,0.51,0.485,0.565
attribute_recognition,200,0.595,0.1,0.74,0.78,0.74,0.79
dynamic_temporal,200,0.13,0.305,0.44,0.42,0.39,0.41
hallucination_detection,200,0.59,0.12,0.77,0.755,0.74,0.745
object_counting,200,0.185,0.355,0.425,0.42,0.38,0.38
object_localization,200,0.47,0.07,0.735,0.725,0.71,0.735
object_presence,200,0.47,0.165,0.685,0.635,0.685,0.655
scene_understanding,200,0.665,0.075,0.82,0.775,0.825,0.795
spatial_relationship,200,0.25,0.1,0.58,0.62,0.585,0.625
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.362,0.25,0.57,0.564,0.554,0.57
