skill,score,count,avg,normalize
robustness,42.0,19,2.210526315789474,9.842105263157896
correctness,98.0,49,2.0,9.0
efficiency,52.0,20,2.6,11.4
factuality,117.0,44,2.659090909090909,11.636363636363637
commonsense,134.0,51,2.627450980392157,11.509803921568627
comprehension,289.0,99,2.919191919191919,12.676767676767676
insightfulness,45.0,12,3.75,16.0
completeness,81.0,29,2.793103448275862,12.172413793103448
metacognition,65.0,21,3.0952380952380953,13.380952380952381
readability,59.0,14,4.214285714285714,17.857142857142858
conciseness,52.0,19,2.736842105263158,11.947368421052632
harmlessness,96.0,25,3.84,16.36
