category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.475,0.215,0.625,0.635,0.66,0.675
attribute_recognition,200,0.78,0.055,0.885,0.85,0.86,0.88
dynamic_temporal,200,0.19,0.19,0.475,0.53,0.465,0.485
hallucination_detection,200,0.54,0.16,0.705,0.67,0.695,0.74
object_counting,200,0.255,0.275,0.5,0.475,0.495,0.505
object_localization,200,0.6,0.04,0.845,0.86,0.83,0.83
object_presence,200,0.72,0.1,0.835,0.8,0.81,0.815
scene_understanding,200,0.905,0.025,0.96,0.95,0.925,0.955
spatial_relationship,200,0.635,0.035,0.82,0.82,0.81,0.865
visual_grounding,200,0.22,0.335,0.4,0.465,0.435,0.5
Average,2000,0.532,0.143,0.705,0.706,0.698,0.725
