category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.44,0.2,0.645,0.62,0.62,0.6
attribute_recognition,200,0.495,0.115,0.715,0.73,0.69,0.715
dynamic_temporal,200,0.205,0.31,0.445,0.435,0.45,0.43
hallucination_detection,200,0.38,0.25,0.565,0.595,0.58,0.535
object_counting,200,0.355,0.215,0.54,0.585,0.575,0.555
object_localization,200,0.82,0.025,0.93,0.94,0.915,0.89
object_presence,200,0.73,0.035,0.855,0.875,0.865,0.87
scene_understanding,200,0.855,0.045,0.935,0.89,0.93,0.93
spatial_relationship,200,0.66,0.015,0.825,0.855,0.875,0.87
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.494,0.221,0.646,0.653,0.65,0.64
