,Llama2-70B FT,Llama2-13B FT,Llama2-7B FT,Mistral-7B FT,GPT-4,Llama2-70B Base,Llama2-7B Base,Llama2-13B Base,Mistral 7B
Llama2-70B HG,81.25,74.75,67.5,72.5,96.75,90.25,71.25,80.5,80.0
Llama3.1-8B HG,75.5,70.5,60.25,59.0,89.0,83.5,65.25,75.0,73.75
Llama2-13B HG,77.5,62.74999999999999,63.0,67.5,93.5,86.5,68.25,75.5,74.5
Llama3-8B HG,85.25,82.75,73.0,76.0,97.25,91.5,76.0,83.25,81.75
Mistral-7B,77.5,68.75,64.5,68.5,92.75,85.25,67.75,75.0,74.5
GPT-4 HG,73.0,59.0,54.25,56.49999999999999,90.0,82.5,60.5,71.5,69.75
Llama3-8B,78.5,75.0,52.75,66.0,76.25,85.25,68.75,76.0,62.5
Llama2-13B,39.5,27.750000000000004,45.75,39.0,73.5,66.5,53.0,61.5,57.49999999999999
Gemma-2B HG,68.5,41.0,57.99999999999999,55.75,80.5,91.25,79.75,87.0,84.0
Llama3.1-70B HG,74.0,64.75,55.50000000000001,60.5,92.25,85.0,62.0,74.25,72.25
Gemma-2B,75.25,44.25,100.0,79.5,99.75,84.0,67.0,74.0,100.0
Llama2-70B,82.25,80.0,64.0,74.25,95.25,86.5,68.25,76.29629629629629,77.0
Mistral-7B HG,82.5,74.75,69.0,72.0,96.25,90.5,72.5,80.75,80.25
Contains,59.5,46.25,38.75,44.0,70.0,68.0,50.74999999999999,60.0,57.25
GPT-4,72.0,57.49999999999999,56.49999999999999,52.75,92.25,85.0,63.24999999999999,75.0,72.5
Llama3-70B HG,75.75,64.0,56.99999999999999,62.5,92.75,86.5,64.25,75.5,73.5
Llama2-7B,57.99999999999999,77.25,58.5,80.5,88.0,80.25,62.5,71.25,68.25
JudgeLM-7B HG,82.75,48.0,63.24999999999999,71.0,94.5,86.25,69.5,77.75,77.25
Llama2-7B HG,90.75,70.75,80.0,83.25,97.75,92.0,80.5,85.25,84.0
EM,36.25,0.25,24.0,20.25,58.25,63.74999999999999,46.75,56.00000000000001,59.5
Llama3-70B,78.25,68.5,61.5,65.0,94.5,87.0,66.0,76.5,73.5
JudgeLM-7B,60.0,32.5,41.25,45.5,68.5,64.0,52.5,60.5,57.25
HumanJudgement,72.25,56.49999999999999,56.00000000000001,60.75000000000001,91.5,83.75,62.25000000000001,72.75,71.75
Human Alignment,72.25,56.49999999999999,56.00000000000001,60.75000000000001,91.5,83.75,62.25000000000001,72.75,71.75
