category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.525,0.23,0.645,0.655,0.635,0.675
attribute_recognition,200,0.65,0.085,0.76,0.78,0.8,0.83
dynamic_temporal,200,0.15,0.255,0.475,0.44,0.425,0.405
hallucination_detection,200,0.395,0.27,0.555,0.585,0.52,0.565
object_counting,200,0.32,0.345,0.515,0.47,0.485,0.49
object_localization,200,0.595,0.05,0.81,0.86,0.83,0.79
object_presence,200,0.565,0.165,0.705,0.67,0.7,0.715
scene_understanding,200,0.85,0.04,0.925,0.915,0.905,0.89
spatial_relationship,200,0.585,0.025,0.82,0.815,0.81,0.835
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.464,0.246,0.621,0.619,0.611,0.619
