category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.465,0.235,0.61,0.615,0.63,0.63
attribute_recognition,200,0.665,0.04,0.785,0.84,0.815,0.86
dynamic_temporal,200,0.165,0.325,0.43,0.435,0.405,0.425
hallucination_detection,200,0.72,0.06,0.83,0.84,0.835,0.83
object_counting,200,0.125,0.425,0.335,0.31,0.35,0.31
object_localization,200,0.715,0.035,0.88,0.88,0.84,0.85
object_presence,200,0.435,0.255,0.615,0.55,0.57,0.625
scene_understanding,200,0.825,0.02,0.94,0.885,0.92,0.91
spatial_relationship,200,0.58,0.03,0.815,0.795,0.845,0.82
visual_grounding,200,0.0,0.815,0.055,0.05,0.085,0.025
Average,2000,0.47,0.224,0.63,0.62,0.63,0.629
