category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.45,0.19,0.66,0.66,0.62,0.655
attribute_recognition,200,0.63,0.075,0.755,0.78,0.805,0.835
dynamic_temporal,200,0.145,0.36,0.415,0.415,0.395,0.38
hallucination_detection,200,0.415,0.215,0.58,0.61,0.625,0.635
object_counting,200,0.315,0.37,0.46,0.47,0.46,0.47
object_localization,200,0.64,0.04,0.81,0.825,0.79,0.835
object_presence,200,0.485,0.18,0.655,0.64,0.645,0.685
scene_understanding,200,0.83,0.03,0.93,0.905,0.915,0.915
spatial_relationship,200,0.595,0.045,0.775,0.805,0.825,0.825
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.45,0.25,0.604,0.611,0.608,0.624
