category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.56,0.225,0.63,0.71,0.67,0.67
attribute_recognition,200,0.69,0.09,0.805,0.815,0.835,0.825
dynamic_temporal,200,0.125,0.405,0.365,0.34,0.36,0.385
hallucination_detection,200,0.585,0.195,0.715,0.695,0.71,0.695
object_counting,200,0.11,0.415,0.305,0.32,0.32,0.31
object_localization,200,0.7,0.04,0.835,0.84,0.85,0.885
object_presence,200,0.755,0.1,0.845,0.84,0.795,0.865
scene_understanding,200,0.82,0.04,0.885,0.89,0.9,0.91
spatial_relationship,200,0.175,0.08,0.59,0.56,0.575,0.54
visual_grounding,200,0.305,0.13,0.575,0.535,0.62,0.56
Average,2000,0.482,0.172,0.655,0.654,0.664,0.664
