category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.42,0.255,0.56,0.605,0.575,0.595
attribute_recognition,200,0.66,0.085,0.79,0.785,0.83,0.825
dynamic_temporal,200,0.16,0.33,0.42,0.425,0.39,0.41
hallucination_detection,200,0.405,0.325,0.55,0.52,0.55,0.565
object_counting,200,0.165,0.34,0.4,0.42,0.385,0.45
object_localization,200,0.57,0.06,0.77,0.775,0.77,0.76
object_presence,200,0.595,0.12,0.765,0.755,0.75,0.765
scene_understanding,200,0.88,0.05,0.935,0.93,0.915,0.915
spatial_relationship,200,0.505,0.03,0.77,0.74,0.795,0.785
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.436,0.26,0.596,0.596,0.596,0.607
