skill,score,count,avg,normalize
robustness,12.0,19,0.631578947368421,3.526315789473684
correctness,21.0,50,0.42,2.6799999999999997
efficiency,12.0,20,0.6,3.4
factuality,19.0,44,0.4318181818181818,2.7272727272727275
commonsense,33.0,51,0.6470588235294118,3.588235294117647
comprehension,46.0,99,0.46464646464646464,2.8585858585858586
insightfulness,7.0,12,0.5833333333333334,3.3333333333333335
completeness,13.0,29,0.4482758620689655,2.793103448275862
metacognition,18.0,21,0.8571428571428571,4.428571428571429
readability,14.0,14,1.0,5.0
conciseness,12.0,19,0.631578947368421,3.526315789473684
harmlessness,21.0,25,0.84,4.359999999999999
