category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.41,0.17,0.615,0.67,0.615,0.655
attribute_recognition,200,0.695,0.065,0.81,0.85,0.8,0.815
dynamic_temporal,200,0.17,0.15,0.51,0.535,0.515,0.46
hallucination_detection,200,0.655,0.065,0.83,0.815,0.815,0.82
object_counting,200,0.125,0.34,0.38,0.35,0.41,0.355
object_localization,200,0.46,0.085,0.71,0.725,0.715,0.735
object_presence,200,0.745,0.075,0.86,0.86,0.83,0.82
scene_understanding,200,0.905,0.015,0.96,0.945,0.94,0.94
spatial_relationship,200,0.62,0.02,0.895,0.845,0.815,0.83
visual_grounding,200,0.21,0.07,0.65,0.605,0.54,0.57
Average,2000,0.5,0.106,0.722,0.72,0.7,0.7
