,dataset,model,nan_values_in_cor,threshold,accuracy,correctness,status,cor_thr
0,BCB,llama3_8b_1.,0,0.1,0.9166666666666666,bleu,ok,bleu_0.1
1,BCB,llama3_8b_1.,0,0.2,0.8271929824561404,bleu,ok,bleu_0.2
2,BCB,llama3_8b_1.,0,0.30000000000000004,0.6456140350877193,bleu,ok,bleu_0.30000000000000004
3,BCB,llama3_8b_1.,0,0.4,0.41403508771929826,bleu,ok,bleu_0.4
4,BCB,llama3_8b_1.,0,0.5,0.24210526315789474,bleu,ok,bleu_0.5
5,BCB,llama3_8b_1.,0,0.6,0.12894736842105264,bleu,ok,bleu_0.6
6,BCB,llama3_8b_1.,0,0.7000000000000001,0.06666666666666667,bleu,ok,bleu_0.7000000000000001
7,BCB,llama3_8b_1.,0,0.8,0.028070175438596492,bleu,ok,bleu_0.8
8,BCB,llama3_8b_1.,0,0.9,0.010526315789473684,bleu,ok,bleu_0.9
9,BCB,llama3_8b_1.,0,0.1,0.9166666666666666,bleu_adapt,ok,bleu_adapt_0.1
10,BCB,llama3_8b_1.,0,0.2,0.8271929824561404,bleu_adapt,ok,bleu_adapt_0.2
11,BCB,llama3_8b_1.,0,0.30000000000000004,0.6456140350877193,bleu_adapt,ok,bleu_adapt_0.30000000000000004
12,BCB,llama3_8b_1.,0,0.4,0.41403508771929826,bleu_adapt,ok,bleu_adapt_0.4
13,BCB,llama3_8b_1.,0,0.5,0.24210526315789474,bleu_adapt,ok,bleu_adapt_0.5
14,BCB,llama3_8b_1.,0,0.6,0.12894736842105264,bleu_adapt,ok,bleu_adapt_0.6
15,BCB,llama3_8b_1.,0,0.7000000000000001,0.06666666666666667,bleu_adapt,ok,bleu_adapt_0.7000000000000001
16,BCB,llama3_8b_1.,0,0.8,0.028070175438596492,bleu_adapt,ok,bleu_adapt_0.8
17,BCB,llama3_8b_1.,0,0.9,0.010526315789473684,bleu_adapt,ok,bleu_adapt_0.9
18,BCB,llama3_8b_1.,0,0.1,0.9736842105263158,rouge1,ok,rouge1_0.1
19,BCB,llama3_8b_1.,0,0.2,0.9394736842105263,rouge1,ok,rouge1_0.2
20,BCB,llama3_8b_1.,0,0.30000000000000004,0.9149122807017543,rouge1,ok,rouge1_0.30000000000000004
21,BCB,llama3_8b_1.,0,0.4,0.8543859649122807,rouge1,ok,rouge1_0.4
22,BCB,llama3_8b_1.,0,0.5,0.7149122807017544,rouge1,ok,rouge1_0.5
23,BCB,llama3_8b_1.,0,0.6,0.5052631578947369,rouge1,ok,rouge1_0.6
24,BCB,llama3_8b_1.,0,0.7000000000000001,0.26842105263157895,rouge1,ok,rouge1_0.7000000000000001
25,BCB,llama3_8b_1.,0,0.8,0.11578947368421053,rouge1,ok,rouge1_0.8
26,BCB,llama3_8b_1.,0,0.9,0.03508771929824561,rouge1,ok,rouge1_0.9
27,BCB,llama3_8b_1.,0,0.1,0.9324561403508772,rouge2,ok,rouge2_0.1
28,BCB,llama3_8b_1.,0,0.2,0.8403508771929824,rouge2,ok,rouge2_0.2
29,BCB,llama3_8b_1.,0,0.30000000000000004,0.6859649122807018,rouge2,ok,rouge2_0.30000000000000004
30,BCB,llama3_8b_1.,0,0.4,0.47543859649122805,rouge2,ok,rouge2_0.4
31,BCB,llama3_8b_1.,0,0.5,0.2675438596491228,rouge2,ok,rouge2_0.5
32,BCB,llama3_8b_1.,0,0.6,0.15087719298245614,rouge2,ok,rouge2_0.6
33,BCB,llama3_8b_1.,0,0.7000000000000001,0.07631578947368421,rouge2,ok,rouge2_0.7000000000000001
34,BCB,llama3_8b_1.,0,0.8,0.035964912280701755,rouge2,ok,rouge2_0.8
35,BCB,llama3_8b_1.,0,0.9,0.014912280701754385,rouge2,ok,rouge2_0.9
36,BCB,llama3_8b_1.,0,0.1,0.9666666666666667,rougeL,ok,rougeL_0.1
37,BCB,llama3_8b_1.,0,0.2,0.9175438596491228,rougeL,ok,rougeL_0.2
38,BCB,llama3_8b_1.,0,0.30000000000000004,0.8482456140350877,rougeL,ok,rougeL_0.30000000000000004
39,BCB,llama3_8b_1.,0,0.4,0.6912280701754386,rougeL,ok,rougeL_0.4
40,BCB,llama3_8b_1.,0,0.5,0.46228070175438596,rougeL,ok,rougeL_0.5
41,BCB,llama3_8b_1.,0,0.6,0.2789473684210526,rougeL,ok,rougeL_0.6
42,BCB,llama3_8b_1.,0,0.7000000000000001,0.13508771929824562,rougeL,ok,rougeL_0.7000000000000001
43,BCB,llama3_8b_1.,0,0.8,0.06842105263157895,rougeL,ok,rougeL_0.8
44,BCB,llama3_8b_1.,0,0.9,0.02368421052631579,rougeL,ok,rougeL_0.9
45,BCB,llama3_8b_1.,0,0.1,0.9710526315789474,rougeLsum,ok,rougeLsum_0.1
46,BCB,llama3_8b_1.,0,0.2,0.9394736842105263,rougeLsum,ok,rougeLsum_0.2
47,BCB,llama3_8b_1.,0,0.30000000000000004,0.9122807017543859,rougeLsum,ok,rougeLsum_0.30000000000000004
48,BCB,llama3_8b_1.,0,0.4,0.85,rougeLsum,ok,rougeLsum_0.4
49,BCB,llama3_8b_1.,0,0.5,0.7043859649122807,rougeLsum,ok,rougeLsum_0.5
50,BCB,llama3_8b_1.,0,0.6,0.48947368421052634,rougeLsum,ok,rougeLsum_0.6
51,BCB,llama3_8b_1.,0,0.7000000000000001,0.2517543859649123,rougeLsum,ok,rougeLsum_0.7000000000000001
52,BCB,llama3_8b_1.,0,0.8,0.11491228070175438,rougeLsum,ok,rougeLsum_0.8
53,BCB,llama3_8b_1.,0,0.9,0.031578947368421054,rougeLsum,ok,rougeLsum_0.9
54,BCB,llama3_8b_1.,2,0.1,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
55,BCB,llama3_8b_1.,2,0.2,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
56,BCB,llama3_8b_1.,2,0.30000000000000004,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
57,BCB,llama3_8b_1.,2,0.4,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
58,BCB,llama3_8b_1.,2,0.5,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
59,BCB,llama3_8b_1.,2,0.6,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
60,BCB,llama3_8b_1.,2,0.7000000000000001,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
61,BCB,llama3_8b_1.,2,0.8,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
62,BCB,llama3_8b_1.,2,0.9,0.5325131810193322,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
63,BCB,llama3_8b_1.,0,0.1,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.1
64,BCB,llama3_8b_1.,0,0.2,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.2
65,BCB,llama3_8b_1.,0,0.30000000000000004,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.30000000000000004
66,BCB,llama3_8b_1.,0,0.4,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.4
67,BCB,llama3_8b_1.,0,0.5,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.5
68,BCB,llama3_8b_1.,0,0.6,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.6
69,BCB,llama3_8b_1.,0,0.7000000000000001,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.7000000000000001
70,BCB,llama3_8b_1.,0,0.8,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.8
71,BCB,llama3_8b_1.,0,0.9,0.38421052631578945,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.9
72,BCB,llama3_8b_1.,0,0.1,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.1
73,BCB,llama3_8b_1.,0,0.2,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.2
74,BCB,llama3_8b_1.,0,0.30000000000000004,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.30000000000000004
75,BCB,llama3_8b_1.,0,0.4,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.4
76,BCB,llama3_8b_1.,0,0.5,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.5
77,BCB,llama3_8b_1.,0,0.6,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.6
78,BCB,llama3_8b_1.,0,0.7000000000000001,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.7000000000000001
79,BCB,llama3_8b_1.,0,0.8,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.8
80,BCB,llama3_8b_1.,0,0.9,0.4307017543859649,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.9
81,BCB,llama3_8b_1.,0,0.1,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.1
82,BCB,llama3_8b_1.,0,0.2,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.2
83,BCB,llama3_8b_1.,0,0.30000000000000004,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.30000000000000004
84,BCB,llama3_8b_1.,0,0.4,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.4
85,BCB,llama3_8b_1.,0,0.5,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.5
86,BCB,llama3_8b_1.,0,0.6,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.6
87,BCB,llama3_8b_1.,0,0.7000000000000001,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.7000000000000001
88,BCB,llama3_8b_1.,0,0.8,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.8
89,BCB,llama3_8b_1.,0,0.9,0.3333333333333333,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.9
90,BCB,llama3_8b_1.,0,0.1,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
91,BCB,llama3_8b_1.,0,0.2,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
92,BCB,llama3_8b_1.,0,0.30000000000000004,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
93,BCB,llama3_8b_1.,0,0.4,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
94,BCB,llama3_8b_1.,0,0.5,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
95,BCB,llama3_8b_1.,0,0.6,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
96,BCB,llama3_8b_1.,0,0.7000000000000001,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
97,BCB,llama3_8b_1.,0,0.8,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
98,BCB,llama3_8b_1.,0,0.9,0.2719298245614035,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
99,BCB,llama3_8b_1.,0,0.1,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
100,BCB,llama3_8b_1.,0,0.2,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
101,BCB,llama3_8b_1.,0,0.30000000000000004,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
102,BCB,llama3_8b_1.,0,0.4,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
103,BCB,llama3_8b_1.,0,0.5,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
104,BCB,llama3_8b_1.,0,0.6,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
105,BCB,llama3_8b_1.,0,0.7000000000000001,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
106,BCB,llama3_8b_1.,0,0.8,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
107,BCB,llama3_8b_1.,0,0.9,0.46228070175438596,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
108,BCB,llama3_8b_1.,0,0.1,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
109,BCB,llama3_8b_1.,0,0.2,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
110,BCB,llama3_8b_1.,0,0.30000000000000004,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
111,BCB,llama3_8b_1.,0,0.4,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
112,BCB,llama3_8b_1.,0,0.5,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
113,BCB,llama3_8b_1.,0,0.6,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
114,BCB,llama3_8b_1.,0,0.7000000000000001,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
115,BCB,llama3_8b_1.,0,0.8,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
116,BCB,llama3_8b_1.,0,0.9,0.512280701754386,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
117,BCB,llama3_8b_1.,0,0.1,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.1
118,BCB,llama3_8b_1.,0,0.2,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.2
119,BCB,llama3_8b_1.,0,0.30000000000000004,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.30000000000000004
120,BCB,llama3_8b_1.,0,0.4,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.4
121,BCB,llama3_8b_1.,0,0.5,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.5
122,BCB,llama3_8b_1.,0,0.6,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.6
123,BCB,llama3_8b_1.,0,0.7000000000000001,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.7000000000000001
124,BCB,llama3_8b_1.,0,0.8,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.8
125,BCB,llama3_8b_1.,0,0.9,0.5149122807017544,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.9
126,BCB,llama3_8b_1.,0,0.1,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
127,BCB,llama3_8b_1.,0,0.2,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
128,BCB,llama3_8b_1.,0,0.30000000000000004,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
129,BCB,llama3_8b_1.,0,0.4,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
130,BCB,llama3_8b_1.,0,0.5,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
131,BCB,llama3_8b_1.,0,0.6,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
132,BCB,llama3_8b_1.,0,0.7000000000000001,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
133,BCB,llama3_8b_1.,0,0.8,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
134,BCB,llama3_8b_1.,0,0.9,0.3701754385964912,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
135,BCB,llama3_8b_1.,0,0.1,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
136,BCB,llama3_8b_1.,0,0.2,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
137,BCB,llama3_8b_1.,0,0.30000000000000004,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
138,BCB,llama3_8b_1.,0,0.4,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
139,BCB,llama3_8b_1.,0,0.5,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
140,BCB,llama3_8b_1.,0,0.6,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
141,BCB,llama3_8b_1.,0,0.7000000000000001,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
142,BCB,llama3_8b_1.,0,0.8,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
143,BCB,llama3_8b_1.,0,0.9,0.35526315789473684,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
144,BCB,llama3_8b_1.,0,0.5,0.3482456140350877,exact_correctness,ok,exact_correctness_0.5
145,BCB,llama3_8b_1.,0,0.1,0.6763157894736842,bma_judge_w8,ok,bma_judge_w8_0.1
146,BCB,llama3_8b_1.,0,0.2,0.5912280701754385,bma_judge_w8,ok,bma_judge_w8_0.2
147,BCB,llama3_8b_1.,0,0.30000000000000004,0.5307017543859649,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
148,BCB,llama3_8b_1.,0,0.4,0.46842105263157896,bma_judge_w8,ok,bma_judge_w8_0.4
149,BCB,llama3_8b_1.,0,0.5,0.4149122807017544,bma_judge_w8,ok,bma_judge_w8_0.5
150,BCB,llama3_8b_1.,0,0.6,0.31140350877192985,bma_judge_w8,ok,bma_judge_w8_0.6
151,BCB,llama3_8b_1.,0,0.7000000000000001,0.21666666666666667,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
152,BCB,llama3_8b_1.,0,0.8,0.12543859649122807,bma_judge_w8,ok,bma_judge_w8_0.8
153,BCB,llama3_8b_1.,0,0.9,0.04736842105263158,bma_judge_w8,ok,bma_judge_w8_0.9
154,BCB,llama3_8b_1.,0,0.1,0.618421052631579,bma_judge,ok,bma_judge_0.1
155,BCB,llama3_8b_1.,0,0.2,0.5596491228070175,bma_judge,ok,bma_judge_0.2
156,BCB,llama3_8b_1.,0,0.30000000000000004,0.5596491228070175,bma_judge,ok,bma_judge_0.30000000000000004
157,BCB,llama3_8b_1.,0,0.4,0.5149122807017544,bma_judge,ok,bma_judge_0.4
158,BCB,llama3_8b_1.,0,0.5,0.46578947368421053,bma_judge,ok,bma_judge_0.5
159,BCB,llama3_8b_1.,0,0.6,0.46578947368421053,bma_judge,ok,bma_judge_0.6
160,BCB,llama3_8b_1.,0,0.7000000000000001,0.3850877192982456,bma_judge,ok,bma_judge_0.7000000000000001
161,BCB,llama3_8b_1.,0,0.8,0.3850877192982456,bma_judge,ok,bma_judge_0.8
162,BCB,llama3_8b_1.,0,0.9,0.29210526315789476,bma_judge,ok,bma_judge_0.9
163,BCB,llama3_8b_i_1.,0,0.1,0.5,bleu,ok,bleu_0.1
164,BCB,llama3_8b_i_1.,0,0.2,0.21052631578947367,bleu,ok,bleu_0.2
165,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.13157894736842105,bleu,ok,bleu_0.30000000000000004
166,BCB,llama3_8b_i_1.,0,0.4,0.08157894736842106,bleu,ok,bleu_0.4
167,BCB,llama3_8b_i_1.,0,0.5,0.037719298245614034,bleu,ok,bleu_0.5
168,BCB,llama3_8b_i_1.,0,0.6,0.014912280701754385,bleu,ok,bleu_0.6
169,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.002631578947368421,bleu,ok,bleu_0.7000000000000001
170,BCB,llama3_8b_i_1.,0,0.8,0.0008771929824561404,bleu,ok,bleu_0.8
171,BCB,llama3_8b_i_1.,0,0.9,0.0,bleu,ok,bleu_0.9
172,BCB,llama3_8b_i_1.,0,0.1,0.5,bleu_adapt,ok,bleu_adapt_0.1
173,BCB,llama3_8b_i_1.,0,0.2,0.21052631578947367,bleu_adapt,ok,bleu_adapt_0.2
174,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.13157894736842105,bleu_adapt,ok,bleu_adapt_0.30000000000000004
175,BCB,llama3_8b_i_1.,0,0.4,0.08157894736842106,bleu_adapt,ok,bleu_adapt_0.4
176,BCB,llama3_8b_i_1.,0,0.5,0.037719298245614034,bleu_adapt,ok,bleu_adapt_0.5
177,BCB,llama3_8b_i_1.,0,0.6,0.014912280701754385,bleu_adapt,ok,bleu_adapt_0.6
178,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.002631578947368421,bleu_adapt,ok,bleu_adapt_0.7000000000000001
179,BCB,llama3_8b_i_1.,0,0.8,0.0008771929824561404,bleu_adapt,ok,bleu_adapt_0.8
180,BCB,llama3_8b_i_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
181,BCB,llama3_8b_i_1.,0,0.1,0.9324561403508772,rouge1,ok,rouge1_0.1
182,BCB,llama3_8b_i_1.,0,0.2,0.6140350877192983,rouge1,ok,rouge1_0.2
183,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.33596491228070174,rouge1,ok,rouge1_0.30000000000000004
184,BCB,llama3_8b_i_1.,0,0.4,0.20789473684210527,rouge1,ok,rouge1_0.4
185,BCB,llama3_8b_i_1.,0,0.5,0.14385964912280702,rouge1,ok,rouge1_0.5
186,BCB,llama3_8b_i_1.,0,0.6,0.0956140350877193,rouge1,ok,rouge1_0.6
187,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.04035087719298246,rouge1,ok,rouge1_0.7000000000000001
188,BCB,llama3_8b_i_1.,0,0.8,0.012280701754385965,rouge1,ok,rouge1_0.8
189,BCB,llama3_8b_i_1.,0,0.9,0.0008771929824561404,rouge1,ok,rouge1_0.9
190,BCB,llama3_8b_i_1.,0,0.1,0.7149122807017544,rouge2,ok,rouge2_0.1
191,BCB,llama3_8b_i_1.,0,0.2,0.2982456140350877,rouge2,ok,rouge2_0.2
192,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.15964912280701754,rouge2,ok,rouge2_0.30000000000000004
193,BCB,llama3_8b_i_1.,0,0.4,0.09298245614035087,rouge2,ok,rouge2_0.4
194,BCB,llama3_8b_i_1.,0,0.5,0.04736842105263158,rouge2,ok,rouge2_0.5
195,BCB,llama3_8b_i_1.,0,0.6,0.014035087719298246,rouge2,ok,rouge2_0.6
196,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.005263157894736842,rouge2,ok,rouge2_0.7000000000000001
197,BCB,llama3_8b_i_1.,0,0.8,0.0008771929824561404,rouge2,ok,rouge2_0.8
198,BCB,llama3_8b_i_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
199,BCB,llama3_8b_i_1.,0,0.1,0.8587719298245614,rougeL,ok,rougeL_0.1
200,BCB,llama3_8b_i_1.,0,0.2,0.4394736842105263,rougeL,ok,rougeL_0.2
201,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.22017543859649122,rougeL,ok,rougeL_0.30000000000000004
202,BCB,llama3_8b_i_1.,0,0.4,0.1412280701754386,rougeL,ok,rougeL_0.4
203,BCB,llama3_8b_i_1.,0,0.5,0.08771929824561403,rougeL,ok,rougeL_0.5
204,BCB,llama3_8b_i_1.,0,0.6,0.03684210526315789,rougeL,ok,rougeL_0.6
205,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.014035087719298246,rougeL,ok,rougeL_0.7000000000000001
206,BCB,llama3_8b_i_1.,0,0.8,0.0035087719298245615,rougeL,ok,rougeL_0.8
207,BCB,llama3_8b_i_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
208,BCB,llama3_8b_i_1.,0,0.1,0.9271929824561403,rougeLsum,ok,rougeLsum_0.1
209,BCB,llama3_8b_i_1.,0,0.2,0.6070175438596491,rougeLsum,ok,rougeLsum_0.2
210,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.3298245614035088,rougeLsum,ok,rougeLsum_0.30000000000000004
211,BCB,llama3_8b_i_1.,0,0.4,0.20175438596491227,rougeLsum,ok,rougeLsum_0.4
212,BCB,llama3_8b_i_1.,0,0.5,0.14385964912280702,rougeLsum,ok,rougeLsum_0.5
213,BCB,llama3_8b_i_1.,0,0.6,0.09210526315789473,rougeLsum,ok,rougeLsum_0.6
214,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.04035087719298246,rougeLsum,ok,rougeLsum_0.7000000000000001
215,BCB,llama3_8b_i_1.,0,0.8,0.008771929824561403,rougeLsum,ok,rougeLsum_0.8
216,BCB,llama3_8b_i_1.,0,0.9,0.0008771929824561404,rougeLsum,ok,rougeLsum_0.9
217,BCB,llama3_8b_i_1.,0,0.1,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
218,BCB,llama3_8b_i_1.,0,0.2,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
219,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
220,BCB,llama3_8b_i_1.,0,0.4,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
221,BCB,llama3_8b_i_1.,0,0.5,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
222,BCB,llama3_8b_i_1.,0,0.6,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
223,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
224,BCB,llama3_8b_i_1.,0,0.8,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
225,BCB,llama3_8b_i_1.,0,0.9,0.6070175438596491,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
226,BCB,llama3_8b_i_1.,0,0.1,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
227,BCB,llama3_8b_i_1.,0,0.2,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
228,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
229,BCB,llama3_8b_i_1.,0,0.4,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
230,BCB,llama3_8b_i_1.,0,0.5,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
231,BCB,llama3_8b_i_1.,0,0.6,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
232,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
233,BCB,llama3_8b_i_1.,0,0.8,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
234,BCB,llama3_8b_i_1.,0,0.9,0.593859649122807,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
235,BCB,llama3_8b_i_1.,2,0.5,0.210896309314587,exact_correctness,ok,exact_correctness_0.5
236,BCB,llama3_8b_i_1.,0,0.1,0.7868421052631579,bma_judge_w8,ok,bma_judge_w8_0.1
237,BCB,llama3_8b_i_1.,0,0.2,0.7868421052631579,bma_judge_w8,ok,bma_judge_w8_0.2
238,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.7868421052631579,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
239,BCB,llama3_8b_i_1.,0,0.4,0.7868421052631579,bma_judge_w8,ok,bma_judge_w8_0.4
240,BCB,llama3_8b_i_1.,0,0.5,0.41403508771929826,bma_judge_w8,ok,bma_judge_w8_0.5
241,BCB,llama3_8b_i_1.,0,0.6,0.41403508771929826,bma_judge_w8,ok,bma_judge_w8_0.6
242,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.41403508771929826,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
243,BCB,llama3_8b_i_1.,0,0.8,0.41403508771929826,bma_judge_w8,ok,bma_judge_w8_0.8
244,BCB,llama3_8b_i_1.,0,0.9,0.41403508771929826,bma_judge_w8,ok,bma_judge_w8_0.9
245,BCB,llama3_8b_i_1.,0,0.1,0.6070175438596491,bma_judge,ok,bma_judge_0.1
246,BCB,llama3_8b_i_1.,0,0.2,0.6070175438596491,bma_judge,ok,bma_judge_0.2
247,BCB,llama3_8b_i_1.,0,0.30000000000000004,0.6070175438596491,bma_judge,ok,bma_judge_0.30000000000000004
248,BCB,llama3_8b_i_1.,0,0.4,0.6070175438596491,bma_judge,ok,bma_judge_0.4
249,BCB,llama3_8b_i_1.,0,0.5,0.6070175438596491,bma_judge,ok,bma_judge_0.5
250,BCB,llama3_8b_i_1.,0,0.6,0.6070175438596491,bma_judge,ok,bma_judge_0.6
251,BCB,llama3_8b_i_1.,0,0.7000000000000001,0.6070175438596491,bma_judge,ok,bma_judge_0.7000000000000001
252,BCB,llama3_8b_i_1.,0,0.8,0.6070175438596491,bma_judge,ok,bma_judge_0.8
253,BCB,llama3_8b_i_1.,0,0.9,0.6070175438596491,bma_judge,ok,bma_judge_0.9
254,COLLIE,phi35_1.,0,0.1,0.0889423076923077,bleu,ok,bleu_0.1
255,COLLIE,phi35_1.,0,0.2,0.05480769230769231,bleu,ok,bleu_0.2
256,COLLIE,phi35_1.,0,0.30000000000000004,0.03076923076923077,bleu,ok,bleu_0.30000000000000004
257,COLLIE,phi35_1.,0,0.4,0.013942307692307693,bleu,ok,bleu_0.4
258,COLLIE,phi35_1.,0,0.5,0.0038461538461538464,bleu,ok,bleu_0.5
259,COLLIE,phi35_1.,0,0.6,0.0014423076923076924,bleu,ok,bleu_0.6
260,COLLIE,phi35_1.,0,0.7000000000000001,0.0014423076923076924,bleu,ok,bleu_0.7000000000000001
261,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,bleu,ok,bleu_0.8
262,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,bleu,ok,bleu_0.9
263,COLLIE,phi35_1.,0,0.1,0.08942307692307692,bleu_adapt,ok,bleu_adapt_0.1
264,COLLIE,phi35_1.,0,0.2,0.05480769230769231,bleu_adapt,ok,bleu_adapt_0.2
265,COLLIE,phi35_1.,0,0.30000000000000004,0.03076923076923077,bleu_adapt,ok,bleu_adapt_0.30000000000000004
266,COLLIE,phi35_1.,0,0.4,0.013942307692307693,bleu_adapt,ok,bleu_adapt_0.4
267,COLLIE,phi35_1.,0,0.5,0.0038461538461538464,bleu_adapt,ok,bleu_adapt_0.5
268,COLLIE,phi35_1.,0,0.6,0.0014423076923076924,bleu_adapt,ok,bleu_adapt_0.6
269,COLLIE,phi35_1.,0,0.7000000000000001,0.0014423076923076924,bleu_adapt,ok,bleu_adapt_0.7000000000000001
270,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,bleu_adapt,ok,bleu_adapt_0.8
271,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,bleu_adapt,ok,bleu_adapt_0.9
272,COLLIE,phi35_1.,0,0.1,0.6927884615384615,rouge1,ok,rouge1_0.1
273,COLLIE,phi35_1.,0,0.2,0.3769230769230769,rouge1,ok,rouge1_0.2
274,COLLIE,phi35_1.,0,0.30000000000000004,0.16201923076923078,rouge1,ok,rouge1_0.30000000000000004
275,COLLIE,phi35_1.,0,0.4,0.08701923076923077,rouge1,ok,rouge1_0.4
276,COLLIE,phi35_1.,0,0.5,0.04471153846153846,rouge1,ok,rouge1_0.5
277,COLLIE,phi35_1.,0,0.6,0.014903846153846155,rouge1,ok,rouge1_0.6
278,COLLIE,phi35_1.,0,0.7000000000000001,0.0033653846153846156,rouge1,ok,rouge1_0.7000000000000001
279,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,rouge1,ok,rouge1_0.8
280,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,rouge1,ok,rouge1_0.9
281,COLLIE,phi35_1.,0,0.1,0.12259615384615384,rouge2,ok,rouge2_0.1
282,COLLIE,phi35_1.,0,0.2,0.08557692307692308,rouge2,ok,rouge2_0.2
283,COLLIE,phi35_1.,0,0.30000000000000004,0.05384615384615385,rouge2,ok,rouge2_0.30000000000000004
284,COLLIE,phi35_1.,0,0.4,0.027403846153846154,rouge2,ok,rouge2_0.4
285,COLLIE,phi35_1.,0,0.5,0.01201923076923077,rouge2,ok,rouge2_0.5
286,COLLIE,phi35_1.,0,0.6,0.0057692307692307696,rouge2,ok,rouge2_0.6
287,COLLIE,phi35_1.,0,0.7000000000000001,0.0019230769230769232,rouge2,ok,rouge2_0.7000000000000001
288,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,rouge2,ok,rouge2_0.8
289,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,rouge2,ok,rouge2_0.9
290,COLLIE,phi35_1.,0,0.1,0.5865384615384616,rougeL,ok,rougeL_0.1
291,COLLIE,phi35_1.,0,0.2,0.19375,rougeL,ok,rougeL_0.2
292,COLLIE,phi35_1.,0,0.30000000000000004,0.0889423076923077,rougeL,ok,rougeL_0.30000000000000004
293,COLLIE,phi35_1.,0,0.4,0.04326923076923077,rougeL,ok,rougeL_0.4
294,COLLIE,phi35_1.,0,0.5,0.014903846153846155,rougeL,ok,rougeL_0.5
295,COLLIE,phi35_1.,0,0.6,0.006730769230769231,rougeL,ok,rougeL_0.6
296,COLLIE,phi35_1.,0,0.7000000000000001,0.002403846153846154,rougeL,ok,rougeL_0.7000000000000001
297,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,rougeL,ok,rougeL_0.8
298,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,rougeL,ok,rougeL_0.9
299,COLLIE,phi35_1.,0,0.1,0.5860576923076923,rougeLsum,ok,rougeLsum_0.1
300,COLLIE,phi35_1.,0,0.2,0.20336538461538461,rougeLsum,ok,rougeLsum_0.2
301,COLLIE,phi35_1.,0,0.30000000000000004,0.11153846153846154,rougeLsum,ok,rougeLsum_0.30000000000000004
302,COLLIE,phi35_1.,0,0.4,0.06298076923076923,rougeLsum,ok,rougeLsum_0.4
303,COLLIE,phi35_1.,0,0.5,0.028846153846153848,rougeLsum,ok,rougeLsum_0.5
304,COLLIE,phi35_1.,0,0.6,0.009615384615384616,rougeLsum,ok,rougeLsum_0.6
305,COLLIE,phi35_1.,0,0.7000000000000001,0.0028846153846153848,rougeLsum,ok,rougeLsum_0.7000000000000001
306,COLLIE,phi35_1.,0,0.8,0.0014423076923076924,rougeLsum,ok,rougeLsum_0.8
307,COLLIE,phi35_1.,0,0.9,0.0014423076923076924,rougeLsum,ok,rougeLsum_0.9
308,COLLIE,phi35_1.,0,0.1,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
309,COLLIE,phi35_1.,0,0.2,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
310,COLLIE,phi35_1.,0,0.30000000000000004,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
311,COLLIE,phi35_1.,0,0.4,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
312,COLLIE,phi35_1.,0,0.5,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
313,COLLIE,phi35_1.,0,0.6,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
314,COLLIE,phi35_1.,0,0.7000000000000001,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
315,COLLIE,phi35_1.,0,0.8,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
316,COLLIE,phi35_1.,0,0.9,0.26971153846153845,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
317,COLLIE,phi35_1.,0,0.1,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
318,COLLIE,phi35_1.,0,0.2,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
319,COLLIE,phi35_1.,0,0.30000000000000004,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
320,COLLIE,phi35_1.,0,0.4,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
321,COLLIE,phi35_1.,0,0.5,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
322,COLLIE,phi35_1.,0,0.6,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
323,COLLIE,phi35_1.,0,0.7000000000000001,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
324,COLLIE,phi35_1.,0,0.8,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
325,COLLIE,phi35_1.,0,0.9,0.42932692307692305,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
326,COLLIE,phi35_1.,0,0.5,0.3033653846153846,exact_correctness,ok,exact_correctness_0.5
327,COLLIE,phi35_1.,0,0.1,0.5283653846153846,bma_judge_w8,ok,bma_judge_w8_0.1
328,COLLIE,phi35_1.,0,0.2,0.5283653846153846,bma_judge_w8,ok,bma_judge_w8_0.2
329,COLLIE,phi35_1.,0,0.30000000000000004,0.5283653846153846,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
330,COLLIE,phi35_1.,0,0.4,0.5283653846153846,bma_judge_w8,ok,bma_judge_w8_0.4
331,COLLIE,phi35_1.,0,0.5,0.17067307692307693,bma_judge_w8,ok,bma_judge_w8_0.5
332,COLLIE,phi35_1.,0,0.6,0.17067307692307693,bma_judge_w8,ok,bma_judge_w8_0.6
333,COLLIE,phi35_1.,0,0.7000000000000001,0.17067307692307693,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
334,COLLIE,phi35_1.,0,0.8,0.17067307692307693,bma_judge_w8,ok,bma_judge_w8_0.8
335,COLLIE,phi35_1.,0,0.9,0.17067307692307693,bma_judge_w8,ok,bma_judge_w8_0.9
336,COLLIE,phi35_1.,0,0.1,0.26971153846153845,bma_judge,ok,bma_judge_0.1
337,COLLIE,phi35_1.,0,0.2,0.26971153846153845,bma_judge,ok,bma_judge_0.2
338,COLLIE,phi35_1.,0,0.30000000000000004,0.26971153846153845,bma_judge,ok,bma_judge_0.30000000000000004
339,COLLIE,phi35_1.,0,0.4,0.26971153846153845,bma_judge,ok,bma_judge_0.4
340,COLLIE,phi35_1.,0,0.5,0.26971153846153845,bma_judge,ok,bma_judge_0.5
341,COLLIE,phi35_1.,0,0.6,0.26971153846153845,bma_judge,ok,bma_judge_0.6
342,COLLIE,phi35_1.,0,0.7000000000000001,0.26971153846153845,bma_judge,ok,bma_judge_0.7000000000000001
343,COLLIE,phi35_1.,0,0.8,0.26971153846153845,bma_judge,ok,bma_judge_0.8
344,COLLIE,phi35_1.,0,0.9,0.26971153846153845,bma_judge,ok,bma_judge_0.9
345,COQA,llama3_8b_i_1.,5,0.1,0.08786663324141389,bleu,ok,bleu_0.1
346,COQA,llama3_8b_i_1.,5,0.2,0.07896715968914515,bleu,ok,bleu_0.2
347,COQA,llama3_8b_i_1.,5,0.30000000000000004,0.06868889445976435,bleu,ok,bleu_0.30000000000000004
348,COQA,llama3_8b_i_1.,5,0.4,0.059789420907495616,bleu,ok,bleu_0.4
349,COQA,llama3_8b_i_1.,5,0.5,0.051265981448984706,bleu,ok,bleu_0.5
350,COQA,llama3_8b_i_1.,5,0.6,0.04249185259463525,bleu,ok,bleu_0.6
351,COQA,llama3_8b_i_1.,5,0.7000000000000001,0.03284031085485084,bleu,ok,bleu_0.7000000000000001
352,COQA,llama3_8b_i_1.,5,0.8,0.024316871396339933,bleu,ok,bleu_0.8
353,COQA,llama3_8b_i_1.,5,0.9,0.0188017046878917,bleu,ok,bleu_0.9
354,COQA,llama3_8b_i_1.,5,0.1,0.32714966156931563,bleu_adapt,ok,bleu_adapt_0.1
355,COQA,llama3_8b_i_1.,5,0.2,0.26698420656806215,bleu_adapt,ok,bleu_adapt_0.2
356,COQA,llama3_8b_i_1.,5,0.30000000000000004,0.2446728503384307,bleu_adapt,ok,bleu_adapt_0.30000000000000004
357,COQA,llama3_8b_i_1.,5,0.4,0.2115818500877413,bleu_adapt,ok,bleu_adapt_0.4
358,COQA,llama3_8b_i_1.,5,0.5,0.19303083479568814,bleu_adapt,ok,bleu_adapt_0.5
359,COQA,llama3_8b_i_1.,5,0.6,0.17874153923289046,bleu_adapt,ok,bleu_adapt_0.6
360,COQA,llama3_8b_i_1.,5,0.7000000000000001,0.16169466031586863,bleu_adapt,ok,bleu_adapt_0.7000000000000001
361,COQA,llama3_8b_i_1.,5,0.8,0.15003760340937577,bleu_adapt,ok,bleu_adapt_0.8
362,COQA,llama3_8b_i_1.,5,0.9,0.14452243670092754,bleu_adapt,ok,bleu_adapt_0.9
363,COQA,llama3_8b_i_1.,0,0.1,0.8306401102342478,rouge1,ok,rouge1_0.1
364,COQA,llama3_8b_i_1.,0,0.2,0.7538519353626456,rouge1,ok,rouge1_0.2
365,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.6884629838406614,rouge1,ok,rouge1_0.30000000000000004
366,COQA,llama3_8b_i_1.,0,0.4,0.6109232118251284,rouge1,ok,rouge1_0.4
367,COQA,llama3_8b_i_1.,0,0.5,0.5446573969685582,rouge1,ok,rouge1_0.5
368,COQA,llama3_8b_i_1.,0,0.6,0.5129650507328072,rouge1,ok,rouge1_0.6
369,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.4509582863585118,rouge1,ok,rouge1_0.7000000000000001
370,COQA,llama3_8b_i_1.,0,0.8,0.4027308029562821,rouge1,ok,rouge1_0.8
371,COQA,llama3_8b_i_1.,0,0.9,0.37592383815608166,rouge1,ok,rouge1_0.9
372,COQA,llama3_8b_i_1.,0,0.1,0.4047350620067644,rouge2,ok,rouge2_0.1
373,COQA,llama3_8b_i_1.,0,0.2,0.37617437053739194,rouge2,ok,rouge2_0.2
374,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.3446072904922961,rouge2,ok,rouge2_0.30000000000000004
375,COQA,llama3_8b_i_1.,0,0.4,0.30853062758361516,rouge2,ok,rouge2_0.4
376,COQA,llama3_8b_i_1.,0,0.5,0.2762119503945885,rouge2,ok,rouge2_0.5
377,COQA,llama3_8b_i_1.,0,0.6,0.2556682951271452,rouge2,ok,rouge2_0.6
378,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.22046849555305023,rouge2,ok,rouge2_0.7000000000000001
379,COQA,llama3_8b_i_1.,0,0.8,0.19579105599398722,rouge2,ok,rouge2_0.8
380,COQA,llama3_8b_i_1.,0,0.9,0.1838907678817487,rouge2,ok,rouge2_0.9
381,COQA,llama3_8b_i_1.,0,0.1,0.8300137792809721,rougeL,ok,rougeL_0.1
382,COQA,llama3_8b_i_1.,0,0.2,0.7508455467869222,rougeL,ok,rougeL_0.2
383,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.6839534009770762,rougeL,ok,rougeL_0.30000000000000004
384,COQA,llama3_8b_i_1.,0,0.4,0.6059125641989227,rougeL,ok,rougeL_0.4
385,COQA,llama3_8b_i_1.,0,0.5,0.5412752098208693,rougeL,ok,rougeL_0.5
386,COQA,llama3_8b_i_1.,0,0.6,0.5097081297757735,rougeL,ok,rougeL_0.6
387,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.44795189778278843,rougeL,ok,rougeL_0.7000000000000001
388,COQA,llama3_8b_i_1.,0,0.8,0.4006012777151447,rougeL,ok,rougeL_0.8
389,COQA,llama3_8b_i_1.,0,0.9,0.3746711762495302,rougeL,ok,rougeL_0.9
390,COQA,llama3_8b_i_1.,0,0.1,0.8300137792809721,rougeLsum,ok,rougeLsum_0.1
391,COQA,llama3_8b_i_1.,0,0.2,0.7508455467869222,rougeLsum,ok,rougeLsum_0.2
392,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.6839534009770762,rougeLsum,ok,rougeLsum_0.30000000000000004
393,COQA,llama3_8b_i_1.,0,0.4,0.6059125641989227,rougeLsum,ok,rougeLsum_0.4
394,COQA,llama3_8b_i_1.,0,0.5,0.5412752098208693,rougeLsum,ok,rougeLsum_0.5
395,COQA,llama3_8b_i_1.,0,0.6,0.5097081297757735,rougeLsum,ok,rougeLsum_0.6
396,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.44795189778278843,rougeLsum,ok,rougeLsum_0.7000000000000001
397,COQA,llama3_8b_i_1.,0,0.8,0.4006012777151447,rougeLsum,ok,rougeLsum_0.8
398,COQA,llama3_8b_i_1.,0,0.9,0.3746711762495302,rougeLsum,ok,rougeLsum_0.9
399,COQA,llama3_8b_i_1.,0,0.1,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.1
400,COQA,llama3_8b_i_1.,0,0.2,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.2
401,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.30000000000000004
402,COQA,llama3_8b_i_1.,0,0.4,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.4
403,COQA,llama3_8b_i_1.,0,0.5,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.5
404,COQA,llama3_8b_i_1.,0,0.6,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.6
405,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.9993736690467243,bert_score_f1,ok,bert_score_f1_0.7000000000000001
406,COQA,llama3_8b_i_1.,0,0.8,0.9564073656520106,bert_score_f1,ok,bert_score_f1_0.8
407,COQA,llama3_8b_i_1.,0,0.9,0.43943379681823874,bert_score_f1,ok,bert_score_f1_0.9
408,COQA,llama3_8b_i_1.,0,0.1,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
409,COQA,llama3_8b_i_1.,0,0.2,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
410,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
411,COQA,llama3_8b_i_1.,0,0.4,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
412,COQA,llama3_8b_i_1.,0,0.5,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
413,COQA,llama3_8b_i_1.,0,0.6,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
414,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
415,COQA,llama3_8b_i_1.,0,0.8,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
416,COQA,llama3_8b_i_1.,0,0.9,0.8416635350119003,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
417,COQA,llama3_8b_i_1.,0,0.1,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
418,COQA,llama3_8b_i_1.,0,0.2,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
419,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
420,COQA,llama3_8b_i_1.,0,0.4,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
421,COQA,llama3_8b_i_1.,0,0.5,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
422,COQA,llama3_8b_i_1.,0,0.6,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
423,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
424,COQA,llama3_8b_i_1.,0,0.8,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
425,COQA,llama3_8b_i_1.,0,0.9,0.8664662407616185,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
426,COQA,llama3_8b_i_1.,0,0.1,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
427,COQA,llama3_8b_i_1.,0,0.2,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
428,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
429,COQA,llama3_8b_i_1.,0,0.4,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
430,COQA,llama3_8b_i_1.,0,0.5,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
431,COQA,llama3_8b_i_1.,0,0.6,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
432,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
433,COQA,llama3_8b_i_1.,0,0.8,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
434,COQA,llama3_8b_i_1.,0,0.9,0.7771514468245021,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
435,COQA,llama3_8b_i_1.,0,0.1,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
436,COQA,llama3_8b_i_1.,0,0.2,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
437,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
438,COQA,llama3_8b_i_1.,0,0.4,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
439,COQA,llama3_8b_i_1.,0,0.5,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
440,COQA,llama3_8b_i_1.,0,0.6,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
441,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
442,COQA,llama3_8b_i_1.,0,0.8,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
443,COQA,llama3_8b_i_1.,0,0.9,0.7613679068019541,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
444,COQA,llama3_8b_i_1.,0,0.1,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
445,COQA,llama3_8b_i_1.,0,0.2,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
446,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
447,COQA,llama3_8b_i_1.,0,0.4,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
448,COQA,llama3_8b_i_1.,0,0.5,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
449,COQA,llama3_8b_i_1.,0,0.6,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
450,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
451,COQA,llama3_8b_i_1.,0,0.8,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
452,COQA,llama3_8b_i_1.,0,0.9,0.780658900162846,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
453,COQA,llama3_8b_i_1.,0,0.1,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
454,COQA,llama3_8b_i_1.,0,0.2,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
455,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
456,COQA,llama3_8b_i_1.,0,0.4,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
457,COQA,llama3_8b_i_1.,0,0.5,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
458,COQA,llama3_8b_i_1.,0,0.6,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
459,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
460,COQA,llama3_8b_i_1.,0,0.8,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
461,COQA,llama3_8b_i_1.,0,0.9,0.8657146436176876,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
462,COQA,llama3_8b_i_1.,0,0.1,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
463,COQA,llama3_8b_i_1.,0,0.2,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
464,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
465,COQA,llama3_8b_i_1.,0,0.4,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
466,COQA,llama3_8b_i_1.,0,0.5,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
467,COQA,llama3_8b_i_1.,0,0.6,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
468,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
469,COQA,llama3_8b_i_1.,0,0.8,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
470,COQA,llama3_8b_i_1.,0,0.9,0.8163597644995616,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
471,COQA,llama3_8b_i_1.,223,0.1,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.1
472,COQA,llama3_8b_i_1.,223,0.2,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.2
473,COQA,llama3_8b_i_1.,223,0.30000000000000004,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.30000000000000004
474,COQA,llama3_8b_i_1.,223,0.4,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.4
475,COQA,llama3_8b_i_1.,223,0.5,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.5
476,COQA,llama3_8b_i_1.,223,0.6,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.6
477,COQA,llama3_8b_i_1.,223,0.7000000000000001,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.7000000000000001
478,COQA,llama3_8b_i_1.,223,0.8,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.8
479,COQA,llama3_8b_i_1.,223,0.9,0.6679123711340206,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.9
480,COQA,llama3_8b_i_1.,0,0.1,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.1
481,COQA,llama3_8b_i_1.,0,0.2,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.2
482,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.30000000000000004
483,COQA,llama3_8b_i_1.,0,0.4,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.4
484,COQA,llama3_8b_i_1.,0,0.5,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.5
485,COQA,llama3_8b_i_1.,0,0.6,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.6
486,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.7000000000000001
487,COQA,llama3_8b_i_1.,0,0.8,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.8
488,COQA,llama3_8b_i_1.,0,0.9,0.7492170863084053,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.9
489,COQA,llama3_8b_i_1.,2,0.1,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
490,COQA,llama3_8b_i_1.,2,0.2,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
491,COQA,llama3_8b_i_1.,2,0.30000000000000004,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
492,COQA,llama3_8b_i_1.,2,0.4,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
493,COQA,llama3_8b_i_1.,2,0.5,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
494,COQA,llama3_8b_i_1.,2,0.6,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
495,COQA,llama3_8b_i_1.,2,0.7000000000000001,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
496,COQA,llama3_8b_i_1.,2,0.8,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
497,COQA,llama3_8b_i_1.,2,0.9,0.6777346197218393,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
498,COQA,llama3_8b_i_1.,11,0.1,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
499,COQA,llama3_8b_i_1.,11,0.2,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
500,COQA,llama3_8b_i_1.,11,0.30000000000000004,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
501,COQA,llama3_8b_i_1.,11,0.4,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
502,COQA,llama3_8b_i_1.,11,0.5,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
503,COQA,llama3_8b_i_1.,11,0.6,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
504,COQA,llama3_8b_i_1.,11,0.7000000000000001,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
505,COQA,llama3_8b_i_1.,11,0.8,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
506,COQA,llama3_8b_i_1.,11,0.9,0.7789764174611139,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
507,COQA,llama3_8b_i_1.,5,0.1,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
508,COQA,llama3_8b_i_1.,5,0.2,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
509,COQA,llama3_8b_i_1.,5,0.30000000000000004,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
510,COQA,llama3_8b_i_1.,5,0.4,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
511,COQA,llama3_8b_i_1.,5,0.5,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
512,COQA,llama3_8b_i_1.,5,0.6,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
513,COQA,llama3_8b_i_1.,5,0.7000000000000001,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
514,COQA,llama3_8b_i_1.,5,0.8,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
515,COQA,llama3_8b_i_1.,5,0.9,0.8670092755076461,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
516,COQA,llama3_8b_i_1.,0,0.1,0.9090567455843668,bma_judge_w8,ok,bma_judge_w8_0.1
517,COQA,llama3_8b_i_1.,0,0.2,0.8941500688964049,bma_judge_w8,ok,bma_judge_w8_0.2
518,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8738569460102719,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
519,COQA,llama3_8b_i_1.,0,0.4,0.8576976074157585,bma_judge_w8,ok,bma_judge_w8_0.4
520,COQA,llama3_8b_i_1.,0,0.5,0.814104973067769,bma_judge_w8,ok,bma_judge_w8_0.5
521,COQA,llama3_8b_i_1.,0,0.6,0.7837905549292246,bma_judge_w8,ok,bma_judge_w8_0.6
522,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.7434548415382688,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
523,COQA,llama3_8b_i_1.,0,0.8,0.6973568833771765,bma_judge_w8,ok,bma_judge_w8_0.8
524,COQA,llama3_8b_i_1.,0,0.9,0.6176875861205061,bma_judge_w8,ok,bma_judge_w8_0.9
525,COQA,llama3_8b_i_1.,0,0.1,0.8938995365150946,bma_judge,ok,bma_judge_0.1
526,COQA,llama3_8b_i_1.,0,0.2,0.8728548164850307,bma_judge,ok,bma_judge_0.2
527,COQA,llama3_8b_i_1.,0,0.30000000000000004,0.8728548164850307,bma_judge,ok,bma_judge_0.30000000000000004
528,COQA,llama3_8b_i_1.,0,0.4,0.8667167731429287,bma_judge,ok,bma_judge_0.4
529,COQA,llama3_8b_i_1.,0,0.5,0.8460478516848302,bma_judge,ok,bma_judge_0.5
530,COQA,llama3_8b_i_1.,0,0.6,0.8459225854941751,bma_judge,ok,bma_judge_0.6
531,COQA,llama3_8b_i_1.,0,0.7000000000000001,0.7732681949141926,bma_judge,ok,bma_judge_0.7000000000000001
532,COQA,llama3_8b_i_1.,0,0.8,0.7732681949141926,bma_judge,ok,bma_judge_0.8
533,COQA,llama3_8b_i_1.,0,0.9,0.6272078166102969,bma_judge,ok,bma_judge_0.9
534,COQA,phi35_i_1.,6,0.1,0.07822489657766077,bleu,ok,bleu_0.1
535,COQA,phi35_i_1.,6,0.2,0.06882286573899962,bleu,ok,bleu_0.2
536,COQA,phi35_i_1.,6,0.30000000000000004,0.05678826626551335,bleu,ok,bleu_0.30000000000000004
537,COQA,phi35_i_1.,6,0.4,0.04701015419330575,bleu,ok,bleu_0.4
538,COQA,phi35_i_1.,6,0.5,0.03948852952237683,bleu,ok,bleu_0.5
539,COQA,phi35_i_1.,6,0.6,0.030713300739626425,bleu,ok,bleu_0.6
540,COQA,phi35_i_1.,6,0.7000000000000001,0.023066315657515355,bleu,ok,bleu_0.7000000000000001
541,COQA,phi35_i_1.,6,0.8,0.01704901592077222,bleu,ok,bleu_0.8
542,COQA,phi35_i_1.,6,0.9,0.012661401529397017,bleu,ok,bleu_0.9
543,COQA,phi35_i_1.,6,0.1,0.264761188416698,bleu_adapt,ok,bleu_adapt_0.1
544,COQA,phi35_i_1.,6,0.2,0.18039363169111194,bleu_adapt,ok,bleu_adapt_0.2
545,COQA,phi35_i_1.,6,0.30000000000000004,0.15256362040867494,bleu_adapt,ok,bleu_adapt_0.30000000000000004
546,COQA,phi35_i_1.,6,0.4,0.11733734486649117,bleu_adapt,ok,bleu_adapt_0.4
547,COQA,phi35_i_1.,6,0.5,0.09840792277798671,bleu_adapt,ok,bleu_adapt_0.5
548,COQA,phi35_i_1.,6,0.6,0.0852450796038611,bleu_adapt,ok,bleu_adapt_0.6
549,COQA,phi35_i_1.,6,0.7000000000000001,0.07245831766328194,bleu_adapt,ok,bleu_adapt_0.7000000000000001
550,COQA,phi35_i_1.,6,0.8,0.06430989093644227,bleu_adapt,ok,bleu_adapt_0.8
551,COQA,phi35_i_1.,6,0.9,0.059922276545067064,bleu_adapt,ok,bleu_adapt_0.9
552,COQA,phi35_i_1.,0,0.1,0.7856695477890517,rouge1,ok,rouge1_0.1
553,COQA,phi35_i_1.,0,0.2,0.6300889389953651,rouge1,ok,rouge1_0.2
554,COQA,phi35_i_1.,0,0.30000000000000004,0.5288738569460103,rouge1,ok,rouge1_0.30000000000000004
555,COQA,phi35_i_1.,0,0.4,0.4335462858574471,rouge1,ok,rouge1_0.4
556,COQA,phi35_i_1.,0,0.5,0.3571339095578103,rouge1,ok,rouge1_0.5
557,COQA,phi35_i_1.,0,0.6,0.3178003256920957,rouge1,ok,rouge1_0.6
558,COQA,phi35_i_1.,0,0.7000000000000001,0.25854941751221344,rouge1,ok,rouge1_0.7000000000000001
559,COQA,phi35_i_1.,0,0.8,0.21407991982963798,rouge1,ok,rouge1_0.8
560,COQA,phi35_i_1.,0,0.9,0.19103094074909183,rouge1,ok,rouge1_0.9
561,COQA,phi35_i_1.,0,0.1,0.3789302267318051,rouge2,ok,rouge2_0.1
562,COQA,phi35_i_1.,0,0.2,0.3347112614305399,rouge2,ok,rouge2_0.2
563,COQA,phi35_i_1.,0,0.30000000000000004,0.29061756231992986,rouge2,ok,rouge2_0.30000000000000004
564,COQA,phi35_i_1.,0,0.4,0.24188901415507955,rouge2,ok,rouge2_0.4
565,COQA,phi35_i_1.,0,0.5,0.2049354879118126,rouge2,ok,rouge2_0.5
566,COQA,phi35_i_1.,0,0.6,0.18439183264436929,rouge2,ok,rouge2_0.6
567,COQA,phi35_i_1.,0,0.7000000000000001,0.15094575973944632,rouge2,ok,rouge2_0.7000000000000001
568,COQA,phi35_i_1.,0,0.8,0.12902417637479643,rouge2,ok,rouge2_0.8
569,COQA,phi35_i_1.,0,0.9,0.11787548540648879,rouge2,ok,rouge2_0.9
570,COQA,phi35_i_1.,0,0.1,0.7840410873105349,rougeL,ok,rougeL_0.1
571,COQA,phi35_i_1.,0,0.2,0.6263309532757109,rougeL,ok,rougeL_0.2
572,COQA,phi35_i_1.,0,0.30000000000000004,0.5228610797945634,rougeL,ok,rougeL_0.30000000000000004
573,COQA,phi35_i_1.,0,0.4,0.4276587748966554,rougeL,ok,rougeL_0.4
574,COQA,phi35_i_1.,0,0.5,0.35199799574094953,rougeL,ok,rougeL_0.5
575,COQA,phi35_i_1.,0,0.6,0.31291494425654515,rougeL,ok,rougeL_0.6
576,COQA,phi35_i_1.,0,0.7000000000000001,0.2554177627458349,rougeL,ok,rougeL_0.7000000000000001
577,COQA,phi35_i_1.,0,0.8,0.21270199173243143,rougeL,ok,rougeL_0.8
578,COQA,phi35_i_1.,0,0.9,0.1904046097958161,rougeL,ok,rougeL_0.9
579,COQA,phi35_i_1.,0,0.1,0.7840410873105349,rougeLsum,ok,rougeLsum_0.1
580,COQA,phi35_i_1.,0,0.2,0.6265814856570212,rougeLsum,ok,rougeLsum_0.2
581,COQA,phi35_i_1.,0,0.30000000000000004,0.5228610797945634,rougeLsum,ok,rougeLsum_0.30000000000000004
582,COQA,phi35_i_1.,0,0.4,0.42778404108731055,rougeLsum,ok,rougeLsum_0.4
583,COQA,phi35_i_1.,0,0.5,0.35199799574094953,rougeLsum,ok,rougeLsum_0.5
584,COQA,phi35_i_1.,0,0.6,0.31291494425654515,rougeLsum,ok,rougeLsum_0.6
585,COQA,phi35_i_1.,0,0.7000000000000001,0.2554177627458349,rougeLsum,ok,rougeLsum_0.7000000000000001
586,COQA,phi35_i_1.,0,0.8,0.21270199173243143,rougeLsum,ok,rougeLsum_0.8
587,COQA,phi35_i_1.,0,0.9,0.1904046097958161,rougeLsum,ok,rougeLsum_0.9
588,COQA,phi35_i_1.,0,0.1,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
589,COQA,phi35_i_1.,0,0.2,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
590,COQA,phi35_i_1.,0,0.30000000000000004,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
591,COQA,phi35_i_1.,0,0.4,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
592,COQA,phi35_i_1.,0,0.5,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
593,COQA,phi35_i_1.,0,0.6,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
594,COQA,phi35_i_1.,0,0.7000000000000001,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
595,COQA,phi35_i_1.,0,0.8,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
596,COQA,phi35_i_1.,0,0.9,0.7830389577852938,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
597,COQA,phi35_i_1.,0,0.1,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.1
598,COQA,phi35_i_1.,0,0.2,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.2
599,COQA,phi35_i_1.,0,0.30000000000000004,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.30000000000000004
600,COQA,phi35_i_1.,0,0.4,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.4
601,COQA,phi35_i_1.,0,0.5,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.5
602,COQA,phi35_i_1.,0,0.6,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.6
603,COQA,phi35_i_1.,0,0.7000000000000001,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.7000000000000001
604,COQA,phi35_i_1.,0,0.8,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.8
605,COQA,phi35_i_1.,0,0.9,0.6016535137166479,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.9
606,COQA,phi35_i_1.,0,0.1,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
607,COQA,phi35_i_1.,0,0.2,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
608,COQA,phi35_i_1.,0,0.30000000000000004,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
609,COQA,phi35_i_1.,0,0.4,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
610,COQA,phi35_i_1.,0,0.5,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
611,COQA,phi35_i_1.,0,0.6,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
612,COQA,phi35_i_1.,0,0.7000000000000001,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
613,COQA,phi35_i_1.,0,0.8,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
614,COQA,phi35_i_1.,0,0.9,0.8083427282976324,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
615,COQA,phi35_i_1.,0,0.1,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
616,COQA,phi35_i_1.,0,0.2,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
617,COQA,phi35_i_1.,0,0.30000000000000004,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
618,COQA,phi35_i_1.,0,0.4,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
619,COQA,phi35_i_1.,0,0.5,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
620,COQA,phi35_i_1.,0,0.6,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
621,COQA,phi35_i_1.,0,0.7000000000000001,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
622,COQA,phi35_i_1.,0,0.8,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
623,COQA,phi35_i_1.,0,0.9,0.6724289114368032,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
624,COQA,phi35_i_1.,0,0.1,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
625,COQA,phi35_i_1.,0,0.2,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
626,COQA,phi35_i_1.,0,0.30000000000000004,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
627,COQA,phi35_i_1.,0,0.4,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
628,COQA,phi35_i_1.,0,0.5,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
629,COQA,phi35_i_1.,0,0.6,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
630,COQA,phi35_i_1.,0,0.7000000000000001,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
631,COQA,phi35_i_1.,0,0.8,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
632,COQA,phi35_i_1.,0,0.9,0.571840160340724,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
633,COQA,phi35_i_1.,0,0.1,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
634,COQA,phi35_i_1.,0,0.2,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
635,COQA,phi35_i_1.,0,0.30000000000000004,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
636,COQA,phi35_i_1.,0,0.4,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
637,COQA,phi35_i_1.,0,0.5,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
638,COQA,phi35_i_1.,0,0.6,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
639,COQA,phi35_i_1.,0,0.7000000000000001,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
640,COQA,phi35_i_1.,0,0.8,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
641,COQA,phi35_i_1.,0,0.9,0.6758110985844921,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
642,COQA,phi35_i_1.,0,0.1,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
643,COQA,phi35_i_1.,0,0.2,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
644,COQA,phi35_i_1.,0,0.30000000000000004,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
645,COQA,phi35_i_1.,0,0.4,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
646,COQA,phi35_i_1.,0,0.5,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
647,COQA,phi35_i_1.,0,0.6,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
648,COQA,phi35_i_1.,0,0.7000000000000001,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
649,COQA,phi35_i_1.,0,0.8,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
650,COQA,phi35_i_1.,0,0.9,0.6481272704497056,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
651,COQA,phi35_i_1.,0,0.1,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
652,COQA,phi35_i_1.,0,0.2,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
653,COQA,phi35_i_1.,0,0.30000000000000004,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
654,COQA,phi35_i_1.,0,0.4,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
655,COQA,phi35_i_1.,0,0.5,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
656,COQA,phi35_i_1.,0,0.6,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
657,COQA,phi35_i_1.,0,0.7000000000000001,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
658,COQA,phi35_i_1.,0,0.8,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
659,COQA,phi35_i_1.,0,0.9,0.8079669297256671,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
660,COQA,phi35_i_1.,0,0.1,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.1
661,COQA,phi35_i_1.,0,0.2,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.2
662,COQA,phi35_i_1.,0,0.30000000000000004,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.30000000000000004
663,COQA,phi35_i_1.,0,0.4,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.4
664,COQA,phi35_i_1.,0,0.5,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.5
665,COQA,phi35_i_1.,0,0.6,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.6
666,COQA,phi35_i_1.,0,0.7000000000000001,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.7000000000000001
667,COQA,phi35_i_1.,0,0.8,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.8
668,COQA,phi35_i_1.,0,0.9,0.680195415257422,j_llama8b_qa_16_0.49_1,ok,j_llama8b_qa_16_0.49_1_0.9
669,COQA,phi35_i_1.,0,0.1,0.8882625579356132,bma_judge_w8,ok,bma_judge_w8_0.1
670,COQA,phi35_i_1.,0,0.2,0.8503069021671051,bma_judge_w8,ok,bma_judge_w8_0.2
671,COQA,phi35_i_1.,0,0.30000000000000004,0.8161092321182513,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
672,COQA,phi35_i_1.,0,0.4,0.7766503820618815,bma_judge_w8,ok,bma_judge_w8_0.4
673,COQA,phi35_i_1.,0,0.5,0.7305524239007892,bma_judge_w8,ok,bma_judge_w8_0.5
674,COQA,phi35_i_1.,0,0.6,0.6697983214330452,bma_judge_w8,ok,bma_judge_w8_0.6
675,COQA,phi35_i_1.,0,0.7000000000000001,0.6065388951521984,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
676,COQA,phi35_i_1.,0,0.8,0.5168483026431167,bma_judge_w8,ok,bma_judge_w8_0.8
677,COQA,phi35_i_1.,0,0.9,0.3943379681823876,bma_judge_w8,ok,bma_judge_w8_0.9
678,COQA,phi35_i_1.,0,0.1,0.8561944131278968,bma_judge,ok,bma_judge_0.1
679,COQA,phi35_i_1.,0,0.2,0.8214956783164225,bma_judge,ok,bma_judge_0.2
680,COQA,phi35_i_1.,0,0.30000000000000004,0.8214956783164225,bma_judge,ok,bma_judge_0.30000000000000004
681,COQA,phi35_i_1.,0,0.4,0.7924339220844294,bma_judge,ok,bma_judge_0.4
682,COQA,phi35_i_1.,0,0.5,0.7108856319679319,bma_judge,ok,bma_judge_0.5
683,COQA,phi35_i_1.,0,0.6,0.7108856319679319,bma_judge,ok,bma_judge_0.6
684,COQA,phi35_i_1.,0,0.7000000000000001,0.6039083051484404,bma_judge,ok,bma_judge_0.7000000000000001
685,COQA,phi35_i_1.,0,0.8,0.6039083051484404,bma_judge,ok,bma_judge_0.8
686,COQA,phi35_i_1.,0,0.9,0.4603532506576475,bma_judge,ok,bma_judge_0.9
687,COLLIE,llama3_70b_i_1.,0,0.1,0.09711538461538462,bleu,ok,bleu_0.1
688,COLLIE,llama3_70b_i_1.,0,0.2,0.06538461538461539,bleu,ok,bleu_0.2
689,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.03990384615384615,bleu,ok,bleu_0.30000000000000004
690,COLLIE,llama3_70b_i_1.,0,0.4,0.013942307692307693,bleu,ok,bleu_0.4
691,COLLIE,llama3_70b_i_1.,0,0.5,0.004326923076923077,bleu,ok,bleu_0.5
692,COLLIE,llama3_70b_i_1.,0,0.6,0.0004807692307692308,bleu,ok,bleu_0.6
693,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
694,COLLIE,llama3_70b_i_1.,0,0.8,0.0,bleu,ok,bleu_0.8
695,COLLIE,llama3_70b_i_1.,0,0.9,0.0,bleu,ok,bleu_0.9
696,COLLIE,llama3_70b_i_1.,0,0.1,0.09759615384615385,bleu_adapt,ok,bleu_adapt_0.1
697,COLLIE,llama3_70b_i_1.,0,0.2,0.06586538461538462,bleu_adapt,ok,bleu_adapt_0.2
698,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.04038461538461539,bleu_adapt,ok,bleu_adapt_0.30000000000000004
699,COLLIE,llama3_70b_i_1.,0,0.4,0.013942307692307693,bleu_adapt,ok,bleu_adapt_0.4
700,COLLIE,llama3_70b_i_1.,0,0.5,0.004326923076923077,bleu_adapt,ok,bleu_adapt_0.5
701,COLLIE,llama3_70b_i_1.,0,0.6,0.0004807692307692308,bleu_adapt,ok,bleu_adapt_0.6
702,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
703,COLLIE,llama3_70b_i_1.,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
704,COLLIE,llama3_70b_i_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
705,COLLIE,llama3_70b_i_1.,0,0.1,0.7153846153846154,rouge1,ok,rouge1_0.1
706,COLLIE,llama3_70b_i_1.,0,0.2,0.4774038461538462,rouge1,ok,rouge1_0.2
707,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.24807692307692308,rouge1,ok,rouge1_0.30000000000000004
708,COLLIE,llama3_70b_i_1.,0,0.4,0.12451923076923077,rouge1,ok,rouge1_0.4
709,COLLIE,llama3_70b_i_1.,0,0.5,0.06923076923076923,rouge1,ok,rouge1_0.5
710,COLLIE,llama3_70b_i_1.,0,0.6,0.03173076923076923,rouge1,ok,rouge1_0.6
711,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.008653846153846154,rouge1,ok,rouge1_0.7000000000000001
712,COLLIE,llama3_70b_i_1.,0,0.8,0.0019230769230769232,rouge1,ok,rouge1_0.8
713,COLLIE,llama3_70b_i_1.,0,0.9,0.0009615384615384616,rouge1,ok,rouge1_0.9
714,COLLIE,llama3_70b_i_1.,0,0.1,0.15144230769230768,rouge2,ok,rouge2_0.1
715,COLLIE,llama3_70b_i_1.,0,0.2,0.11201923076923077,rouge2,ok,rouge2_0.2
716,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.08413461538461539,rouge2,ok,rouge2_0.30000000000000004
717,COLLIE,llama3_70b_i_1.,0,0.4,0.04903846153846154,rouge2,ok,rouge2_0.4
718,COLLIE,llama3_70b_i_1.,0,0.5,0.022596153846153846,rouge2,ok,rouge2_0.5
719,COLLIE,llama3_70b_i_1.,0,0.6,0.006730769230769231,rouge2,ok,rouge2_0.6
720,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0019230769230769232,rouge2,ok,rouge2_0.7000000000000001
721,COLLIE,llama3_70b_i_1.,0,0.8,0.0014423076923076924,rouge2,ok,rouge2_0.8
722,COLLIE,llama3_70b_i_1.,0,0.9,0.0009615384615384616,rouge2,ok,rouge2_0.9
723,COLLIE,llama3_70b_i_1.,0,0.1,0.6480769230769231,rougeL,ok,rougeL_0.1
724,COLLIE,llama3_70b_i_1.,0,0.2,0.3038461538461538,rougeL,ok,rougeL_0.2
725,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.15048076923076922,rougeL,ok,rougeL_0.30000000000000004
726,COLLIE,llama3_70b_i_1.,0,0.4,0.08461538461538462,rougeL,ok,rougeL_0.4
727,COLLIE,llama3_70b_i_1.,0,0.5,0.03365384615384615,rougeL,ok,rougeL_0.5
728,COLLIE,llama3_70b_i_1.,0,0.6,0.01201923076923077,rougeL,ok,rougeL_0.6
729,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0033653846153846156,rougeL,ok,rougeL_0.7000000000000001
730,COLLIE,llama3_70b_i_1.,0,0.8,0.0014423076923076924,rougeL,ok,rougeL_0.8
731,COLLIE,llama3_70b_i_1.,0,0.9,0.0009615384615384616,rougeL,ok,rougeL_0.9
732,COLLIE,llama3_70b_i_1.,0,0.1,0.6451923076923077,rougeLsum,ok,rougeLsum_0.1
733,COLLIE,llama3_70b_i_1.,0,0.2,0.30480769230769234,rougeLsum,ok,rougeLsum_0.2
734,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.16298076923076923,rougeLsum,ok,rougeLsum_0.30000000000000004
735,COLLIE,llama3_70b_i_1.,0,0.4,0.09903846153846153,rougeLsum,ok,rougeLsum_0.4
736,COLLIE,llama3_70b_i_1.,0,0.5,0.05673076923076923,rougeLsum,ok,rougeLsum_0.5
737,COLLIE,llama3_70b_i_1.,0,0.6,0.027403846153846154,rougeLsum,ok,rougeLsum_0.6
738,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0057692307692307696,rougeLsum,ok,rougeLsum_0.7000000000000001
739,COLLIE,llama3_70b_i_1.,0,0.8,0.0014423076923076924,rougeLsum,ok,rougeLsum_0.8
740,COLLIE,llama3_70b_i_1.,0,0.9,0.0009615384615384616,rougeLsum,ok,rougeLsum_0.9
741,COLLIE,llama3_70b_i_1.,0,0.1,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.1
742,COLLIE,llama3_70b_i_1.,0,0.2,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.2
743,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.30000000000000004
744,COLLIE,llama3_70b_i_1.,0,0.4,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.4
745,COLLIE,llama3_70b_i_1.,0,0.5,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.5
746,COLLIE,llama3_70b_i_1.,0,0.6,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.6
747,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.7000000000000001
748,COLLIE,llama3_70b_i_1.,0,0.8,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.8
749,COLLIE,llama3_70b_i_1.,0,0.9,0.5038461538461538,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.9
750,COLLIE,llama3_70b_i_1.,0,0.1,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
751,COLLIE,llama3_70b_i_1.,0,0.2,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
752,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
753,COLLIE,llama3_70b_i_1.,0,0.4,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
754,COLLIE,llama3_70b_i_1.,0,0.5,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
755,COLLIE,llama3_70b_i_1.,0,0.6,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
756,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
757,COLLIE,llama3_70b_i_1.,0,0.8,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
758,COLLIE,llama3_70b_i_1.,0,0.9,0.0,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
759,COLLIE,llama3_70b_i_1.,0,0.1,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
760,COLLIE,llama3_70b_i_1.,0,0.2,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
761,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
762,COLLIE,llama3_70b_i_1.,0,0.4,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
763,COLLIE,llama3_70b_i_1.,0,0.5,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
764,COLLIE,llama3_70b_i_1.,0,0.6,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
765,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
766,COLLIE,llama3_70b_i_1.,0,0.8,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
767,COLLIE,llama3_70b_i_1.,0,0.9,0.31826923076923075,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
768,COLLIE,llama3_70b_i_1.,0,0.1,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
769,COLLIE,llama3_70b_i_1.,0,0.2,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
770,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
771,COLLIE,llama3_70b_i_1.,0,0.4,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
772,COLLIE,llama3_70b_i_1.,0,0.5,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
773,COLLIE,llama3_70b_i_1.,0,0.6,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
774,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
775,COLLIE,llama3_70b_i_1.,0,0.8,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
776,COLLIE,llama3_70b_i_1.,0,0.9,0.02403846153846154,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
777,COLLIE,llama3_70b_i_1.,0,0.5,0.4889423076923077,exact_correctness,ok,exact_correctness_0.5
778,COLLIE,llama3_70b_i_1.,0,0.1,0.60625,bma_judge_w8,ok,bma_judge_w8_0.1
779,COLLIE,llama3_70b_i_1.,0,0.2,0.60625,bma_judge_w8,ok,bma_judge_w8_0.2
780,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.23076923076923078,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
781,COLLIE,llama3_70b_i_1.,0,0.4,0.23076923076923078,bma_judge_w8,ok,bma_judge_w8_0.4
782,COLLIE,llama3_70b_i_1.,0,0.5,0.009134615384615385,bma_judge_w8,ok,bma_judge_w8_0.5
783,COLLIE,llama3_70b_i_1.,0,0.6,0.009134615384615385,bma_judge_w8,ok,bma_judge_w8_0.6
784,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.009134615384615385,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
785,COLLIE,llama3_70b_i_1.,0,0.8,0.0,bma_judge_w8,ok,bma_judge_w8_0.8
786,COLLIE,llama3_70b_i_1.,0,0.9,0.0,bma_judge_w8,ok,bma_judge_w8_0.9
787,COLLIE,llama3_70b_i_1.,0,0.1,0.60625,bma_judge,ok,bma_judge_0.1
788,COLLIE,llama3_70b_i_1.,0,0.2,0.60625,bma_judge,ok,bma_judge_0.2
789,COLLIE,llama3_70b_i_1.,0,0.30000000000000004,0.23076923076923078,bma_judge,ok,bma_judge_0.30000000000000004
790,COLLIE,llama3_70b_i_1.,0,0.4,0.23076923076923078,bma_judge,ok,bma_judge_0.4
791,COLLIE,llama3_70b_i_1.,0,0.5,0.009134615384615385,bma_judge,ok,bma_judge_0.5
792,COLLIE,llama3_70b_i_1.,0,0.6,0.009134615384615385,bma_judge,ok,bma_judge_0.6
793,COLLIE,llama3_70b_i_1.,0,0.7000000000000001,0.009134615384615385,bma_judge,ok,bma_judge_0.7000000000000001
794,COLLIE,llama3_70b_i_1.,0,0.8,0.0,bma_judge,ok,bma_judge_0.8
795,COLLIE,llama3_70b_i_1.,0,0.9,0.0,bma_judge,ok,bma_judge_0.9
796,TRIVIA,phi35_i_1.,0,0.1,0.036502452073116365,bleu,ok,bleu_0.1
797,TRIVIA,phi35_i_1.,0,0.2,0.017108782880071335,bleu,ok,bleu_0.2
798,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.0057400802496656265,bleu,ok,bleu_0.30000000000000004
799,TRIVIA,phi35_i_1.,0,0.4,0.0026192599197503346,bleu,ok,bleu_0.4
800,TRIVIA,phi35_i_1.,0,0.5,0.0006130182790905038,bleu,ok,bleu_0.5
801,TRIVIA,phi35_i_1.,0,0.6,0.00022291573785109228,bleu,ok,bleu_0.6
802,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.00011145786892554614,bleu,ok,bleu_0.7000000000000001
803,TRIVIA,phi35_i_1.,0,0.8,0.00011145786892554614,bleu,ok,bleu_0.8
804,TRIVIA,phi35_i_1.,0,0.9,0.00011145786892554614,bleu,ok,bleu_0.9
805,TRIVIA,phi35_i_1.,0,0.1,0.05834819438252341,bleu_adapt,ok,bleu_adapt_0.1
806,TRIVIA,phi35_i_1.,0,0.2,0.020341061078912172,bleu_adapt,ok,bleu_adapt_0.2
807,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.006408827463218903,bleu_adapt,ok,bleu_adapt_0.30000000000000004
808,TRIVIA,phi35_i_1.,0,0.4,0.003065091395452519,bleu_adapt,ok,bleu_adapt_0.4
809,TRIVIA,phi35_i_1.,0,0.5,0.0010588497547926884,bleu_adapt,ok,bleu_adapt_0.5
810,TRIVIA,phi35_i_1.,0,0.6,0.0006687472135532769,bleu_adapt,ok,bleu_adapt_0.6
811,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.0005572893446277307,bleu_adapt,ok,bleu_adapt_0.7000000000000001
812,TRIVIA,phi35_i_1.,0,0.8,0.0005572893446277307,bleu_adapt,ok,bleu_adapt_0.8
813,TRIVIA,phi35_i_1.,0,0.9,0.0005572893446277307,bleu_adapt,ok,bleu_adapt_0.9
814,TRIVIA,phi35_i_1.,0,0.1,0.6472358448506464,rouge1,ok,rouge1_0.1
815,TRIVIA,phi35_i_1.,0,0.2,0.36909273294694606,rouge1,ok,rouge1_0.2
816,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.18279090503789566,rouge1,ok,rouge1_0.30000000000000004
817,TRIVIA,phi35_i_1.,0,0.4,0.061246098974587604,rouge1,ok,rouge1_0.4
818,TRIVIA,phi35_i_1.,0,0.5,0.01995095853767276,rouge1,ok,rouge1_0.5
819,TRIVIA,phi35_i_1.,0,0.6,0.00635309852875613,rouge1,ok,rouge1_0.6
820,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.0019505127061970576,rouge1,ok,rouge1_0.7000000000000001
821,TRIVIA,phi35_i_1.,0,0.8,0.0005572893446277307,rouge1,ok,rouge1_0.8
822,TRIVIA,phi35_i_1.,0,0.9,0.00022291573785109228,rouge1,ok,rouge1_0.9
823,TRIVIA,phi35_i_1.,0,0.1,0.3073450735621935,rouge2,ok,rouge2_0.1
824,TRIVIA,phi35_i_1.,0,0.2,0.11976148016049933,rouge2,ok,rouge2_0.2
825,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.051827909050378956,rouge2,ok,rouge2_0.30000000000000004
826,TRIVIA,phi35_i_1.,0,0.4,0.01649576460098083,rouge2,ok,rouge2_0.4
827,TRIVIA,phi35_i_1.,0,0.5,0.005962995987516719,rouge2,ok,rouge2_0.5
828,TRIVIA,phi35_i_1.,0,0.6,0.0018390548372715114,rouge2,ok,rouge2_0.6
829,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.0005572893446277307,rouge2,ok,rouge2_0.7000000000000001
830,TRIVIA,phi35_i_1.,0,0.8,0.00011145786892554614,rouge2,ok,rouge2_0.8
831,TRIVIA,phi35_i_1.,0,0.9,0.00011145786892554614,rouge2,ok,rouge2_0.9
832,TRIVIA,phi35_i_1.,0,0.1,0.6352541239411502,rougeL,ok,rougeL_0.1
833,TRIVIA,phi35_i_1.,0,0.2,0.3429001337494427,rougeL,ok,rougeL_0.2
834,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.16150245207311636,rougeL,ok,rougeL_0.30000000000000004
835,TRIVIA,phi35_i_1.,0,0.4,0.053444048149799375,rougeL,ok,rougeL_0.4
836,TRIVIA,phi35_i_1.,0,0.5,0.01649576460098083,rougeL,ok,rougeL_0.5
837,TRIVIA,phi35_i_1.,0,0.6,0.005182790905037896,rougeL,ok,rougeL_0.6
838,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.0015046812304948729,rougeL,ok,rougeL_0.7000000000000001
839,TRIVIA,phi35_i_1.,0,0.8,0.00044583147570218456,rougeL,ok,rougeL_0.8
840,TRIVIA,phi35_i_1.,0,0.9,0.00022291573785109228,rougeL,ok,rougeL_0.9
841,TRIVIA,phi35_i_1.,0,0.1,0.6351426660722247,rougeLsum,ok,rougeLsum_0.1
842,TRIVIA,phi35_i_1.,0,0.2,0.3429001337494427,rougeLsum,ok,rougeLsum_0.2
843,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.16161390994204192,rougeLsum,ok,rougeLsum_0.30000000000000004
844,TRIVIA,phi35_i_1.,0,0.4,0.053444048149799375,rougeLsum,ok,rougeLsum_0.4
845,TRIVIA,phi35_i_1.,0,0.5,0.01649576460098083,rougeLsum,ok,rougeLsum_0.5
846,TRIVIA,phi35_i_1.,0,0.6,0.005182790905037896,rougeLsum,ok,rougeLsum_0.6
847,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.0015046812304948729,rougeLsum,ok,rougeLsum_0.7000000000000001
848,TRIVIA,phi35_i_1.,0,0.8,0.00044583147570218456,rougeLsum,ok,rougeLsum_0.8
849,TRIVIA,phi35_i_1.,0,0.9,0.00022291573785109228,rougeLsum,ok,rougeLsum_0.9
850,TRIVIA,phi35_i_1.,0,0.1,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
851,TRIVIA,phi35_i_1.,0,0.2,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
852,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
853,TRIVIA,phi35_i_1.,0,0.4,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
854,TRIVIA,phi35_i_1.,0,0.5,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
855,TRIVIA,phi35_i_1.,0,0.6,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
856,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
857,TRIVIA,phi35_i_1.,0,0.8,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
858,TRIVIA,phi35_i_1.,0,0.9,0.579079358002675,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
859,TRIVIA,phi35_i_1.,0,0.1,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
860,TRIVIA,phi35_i_1.,0,0.2,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
861,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
862,TRIVIA,phi35_i_1.,0,0.4,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
863,TRIVIA,phi35_i_1.,0,0.5,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
864,TRIVIA,phi35_i_1.,0,0.6,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
865,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
866,TRIVIA,phi35_i_1.,0,0.8,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
867,TRIVIA,phi35_i_1.,0,0.9,0.5779647793134195,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
868,TRIVIA,phi35_i_1.,0,0.1,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
869,TRIVIA,phi35_i_1.,0,0.2,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
870,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
871,TRIVIA,phi35_i_1.,0,0.4,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
872,TRIVIA,phi35_i_1.,0,0.5,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
873,TRIVIA,phi35_i_1.,0,0.6,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
874,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
875,TRIVIA,phi35_i_1.,0,0.8,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
876,TRIVIA,phi35_i_1.,0,0.9,0.658883192153366,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
877,TRIVIA,phi35_i_1.,0,0.1,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
878,TRIVIA,phi35_i_1.,0,0.2,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
879,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
880,TRIVIA,phi35_i_1.,0,0.4,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
881,TRIVIA,phi35_i_1.,0,0.5,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
882,TRIVIA,phi35_i_1.,0,0.6,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
883,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
884,TRIVIA,phi35_i_1.,0,0.8,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
885,TRIVIA,phi35_i_1.,0,0.9,0.6521399910833705,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
886,TRIVIA,phi35_i_1.,0,0.1,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
887,TRIVIA,phi35_i_1.,0,0.2,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
888,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
889,TRIVIA,phi35_i_1.,0,0.4,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
890,TRIVIA,phi35_i_1.,0,0.5,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
891,TRIVIA,phi35_i_1.,0,0.6,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
892,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
893,TRIVIA,phi35_i_1.,0,0.8,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
894,TRIVIA,phi35_i_1.,0,0.9,0.6594404814979937,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
895,TRIVIA,phi35_i_1.,0,0.1,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
896,TRIVIA,phi35_i_1.,0,0.2,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
897,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
898,TRIVIA,phi35_i_1.,0,0.4,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
899,TRIVIA,phi35_i_1.,0,0.5,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
900,TRIVIA,phi35_i_1.,0,0.6,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
901,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
902,TRIVIA,phi35_i_1.,0,0.8,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
903,TRIVIA,phi35_i_1.,0,0.9,0.5782991529201962,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
904,TRIVIA,phi35_i_1.,0,0.1,0.7326125724476148,bma_judge_w8,ok,bma_judge_w8_0.1
905,TRIVIA,phi35_i_1.,0,0.2,0.6676883637984842,bma_judge_w8,ok,bma_judge_w8_0.2
906,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.6676883637984842,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
907,TRIVIA,phi35_i_1.,0,0.4,0.6249442710655372,bma_judge_w8,ok,bma_judge_w8_0.4
908,TRIVIA,phi35_i_1.,0,0.5,0.5798595630851539,bma_judge_w8,ok,bma_judge_w8_0.5
909,TRIVIA,phi35_i_1.,0,0.6,0.5798595630851539,bma_judge_w8,ok,bma_judge_w8_0.6
910,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.5704971020954079,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
911,TRIVIA,phi35_i_1.,0,0.8,0.5704971020954079,bma_judge_w8,ok,bma_judge_w8_0.8
912,TRIVIA,phi35_i_1.,0,0.9,0.530205082478823,bma_judge_w8,ok,bma_judge_w8_0.9
913,TRIVIA,phi35_i_1.,0,0.1,0.584986625055729,bma_judge,ok,bma_judge_0.1
914,TRIVIA,phi35_i_1.,0,0.2,0.584986625055729,bma_judge,ok,bma_judge_0.2
915,TRIVIA,phi35_i_1.,0,0.30000000000000004,0.584986625055729,bma_judge,ok,bma_judge_0.30000000000000004
916,TRIVIA,phi35_i_1.,0,0.4,0.5782434239857334,bma_judge,ok,bma_judge_0.4
917,TRIVIA,phi35_i_1.,0,0.5,0.5782434239857334,bma_judge,ok,bma_judge_0.5
918,TRIVIA,phi35_i_1.,0,0.6,0.5782434239857334,bma_judge,ok,bma_judge_0.6
919,TRIVIA,phi35_i_1.,0,0.7000000000000001,0.5721132411948283,bma_judge,ok,bma_judge_0.7000000000000001
920,TRIVIA,phi35_i_1.,0,0.8,0.5721132411948283,bma_judge,ok,bma_judge_0.8
921,TRIVIA,phi35_i_1.,0,0.9,0.5721132411948283,bma_judge,ok,bma_judge_0.9
922,SQUAD,phi35_i_1.,5946,0.1,0.14661717563691581,bleu,ok,bleu_0.1
923,SQUAD,phi35_i_1.,5946,0.2,0.11844103256284798,bleu,ok,bleu_0.2
924,SQUAD,phi35_i_1.,5946,0.30000000000000004,0.09093976716720094,bleu,ok,bleu_0.30000000000000004
925,SQUAD,phi35_i_1.,5946,0.4,0.07170575333220854,bleu,ok,bleu_0.4
926,SQUAD,phi35_i_1.,5946,0.5,0.056183566728530455,bleu,ok,bleu_0.5
927,SQUAD,phi35_i_1.,5946,0.6,0.04471064619537709,bleu,ok,bleu_0.6
928,SQUAD,phi35_i_1.,5946,0.7000000000000001,0.0342500421798549,bleu,ok,bleu_0.7000000000000001
929,SQUAD,phi35_i_1.,5946,0.8,0.026657668297621055,bleu,ok,bleu_0.8
930,SQUAD,phi35_i_1.,5946,0.9,0.02058376919183398,bleu,ok,bleu_0.9
931,SQUAD,phi35_i_1.,5946,0.1,0.39058545638603004,bleu_adapt,ok,bleu_adapt_0.1
932,SQUAD,phi35_i_1.,5946,0.2,0.30082672515606546,bleu_adapt,ok,bleu_adapt_0.2
933,SQUAD,phi35_i_1.,5946,0.30000000000000004,0.2535852876666104,bleu_adapt,ok,bleu_adapt_0.30000000000000004
934,SQUAD,phi35_i_1.,5946,0.4,0.21528597941623082,bleu_adapt,ok,bleu_adapt_0.4
935,SQUAD,phi35_i_1.,5946,0.5,0.18339800911084866,bleu_adapt,ok,bleu_adapt_0.5
936,SQUAD,phi35_i_1.,5946,0.6,0.16753838366796017,bleu_adapt,ok,bleu_adapt_0.6
937,SQUAD,phi35_i_1.,5946,0.7000000000000001,0.15252235532309769,bleu_adapt,ok,bleu_adapt_0.7000000000000001
938,SQUAD,phi35_i_1.,5946,0.8,0.14425510376244305,bleu_adapt,ok,bleu_adapt_0.8
939,SQUAD,phi35_i_1.,5946,0.9,0.13818120465665598,bleu_adapt,ok,bleu_adapt_0.9
940,SQUAD,phi35_i_1.,5945,0.1,0.9267881241565452,rouge1,ok,rouge1_0.1
941,SQUAD,phi35_i_1.,5945,0.2,0.8265856950067476,rouge1,ok,rouge1_0.2
942,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.7280701754385965,rouge1,ok,rouge1_0.30000000000000004
943,SQUAD,phi35_i_1.,5945,0.4,0.6007085020242915,rouge1,ok,rouge1_0.4
944,SQUAD,phi35_i_1.,5945,0.5,0.5089406207827261,rouge1,ok,rouge1_0.5
945,SQUAD,phi35_i_1.,5945,0.6,0.4475371120107962,rouge1,ok,rouge1_0.6
946,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.3685897435897436,rouge1,ok,rouge1_0.7000000000000001
947,SQUAD,phi35_i_1.,5945,0.8,0.3021255060728745,rouge1,ok,rouge1_0.8
948,SQUAD,phi35_i_1.,5945,0.9,0.2699055330634278,rouge1,ok,rouge1_0.9
949,SQUAD,phi35_i_1.,5945,0.1,0.6361336032388664,rouge2,ok,rouge2_0.1
950,SQUAD,phi35_i_1.,5945,0.2,0.5406545209176788,rouge2,ok,rouge2_0.2
951,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.4542847503373819,rouge2,ok,rouge2_0.30000000000000004
952,SQUAD,phi35_i_1.,5945,0.4,0.3741565452091768,rouge2,ok,rouge2_0.4
953,SQUAD,phi35_i_1.,5945,0.5,0.3107287449392713,rouge2,ok,rouge2_0.5
954,SQUAD,phi35_i_1.,5945,0.6,0.2758097165991903,rouge2,ok,rouge2_0.6
955,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.22351551956815116,rouge2,ok,rouge2_0.7000000000000001
956,SQUAD,phi35_i_1.,5945,0.8,0.19078947368421054,rouge2,ok,rouge2_0.8
957,SQUAD,phi35_i_1.,5945,0.9,0.17088394062078271,rouge2,ok,rouge2_0.9
958,SQUAD,phi35_i_1.,5945,0.1,0.9242577597840755,rougeL,ok,rougeL_0.1
959,SQUAD,phi35_i_1.,5945,0.2,0.819838056680162,rougeL,ok,rougeL_0.2
960,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.7208164642375169,rougeL,ok,rougeL_0.30000000000000004
961,SQUAD,phi35_i_1.,5945,0.4,0.5897435897435898,rougeL,ok,rougeL_0.4
962,SQUAD,phi35_i_1.,5945,0.5,0.49780701754385964,rougeL,ok,rougeL_0.5
963,SQUAD,phi35_i_1.,5945,0.6,0.43859649122807015,rougeL,ok,rougeL_0.6
964,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.3615047233468286,rougeL,ok,rougeL_0.7000000000000001
965,SQUAD,phi35_i_1.,5945,0.8,0.2984143049932524,rougeL,ok,rougeL_0.8
966,SQUAD,phi35_i_1.,5945,0.9,0.268387314439946,rougeL,ok,rougeL_0.9
967,SQUAD,phi35_i_1.,5945,0.1,0.9244264507422402,rougeLsum,ok,rougeLsum_0.1
968,SQUAD,phi35_i_1.,5945,0.2,0.8200067476383266,rougeLsum,ok,rougeLsum_0.2
969,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.7208164642375169,rougeLsum,ok,rougeLsum_0.30000000000000004
970,SQUAD,phi35_i_1.,5945,0.4,0.5897435897435898,rougeLsum,ok,rougeLsum_0.4
971,SQUAD,phi35_i_1.,5945,0.5,0.49780701754385964,rougeLsum,ok,rougeLsum_0.5
972,SQUAD,phi35_i_1.,5945,0.6,0.43859649122807015,rougeLsum,ok,rougeLsum_0.6
973,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.3615047233468286,rougeLsum,ok,rougeLsum_0.7000000000000001
974,SQUAD,phi35_i_1.,5945,0.8,0.2984143049932524,rougeLsum,ok,rougeLsum_0.8
975,SQUAD,phi35_i_1.,5945,0.9,0.268387314439946,rougeLsum,ok,rougeLsum_0.9
976,SQUAD,phi35_i_1.,5945,0.1,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
977,SQUAD,phi35_i_1.,5945,0.2,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
978,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
979,SQUAD,phi35_i_1.,5945,0.4,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
980,SQUAD,phi35_i_1.,5945,0.5,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
981,SQUAD,phi35_i_1.,5945,0.6,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
982,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
983,SQUAD,phi35_i_1.,5945,0.8,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
984,SQUAD,phi35_i_1.,5945,0.9,0.8734817813765182,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
985,SQUAD,phi35_i_1.,5945,0.1,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.1
986,SQUAD,phi35_i_1.,5945,0.2,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.2
987,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.30000000000000004
988,SQUAD,phi35_i_1.,5945,0.4,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.4
989,SQUAD,phi35_i_1.,5945,0.5,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.5
990,SQUAD,phi35_i_1.,5945,0.6,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.6
991,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.7000000000000001
992,SQUAD,phi35_i_1.,5945,0.8,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.8
993,SQUAD,phi35_i_1.,5945,0.9,0.9202091767881242,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.9
994,SQUAD,phi35_i_1.,5945,0.1,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.1
995,SQUAD,phi35_i_1.,5945,0.2,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.2
996,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.30000000000000004
997,SQUAD,phi35_i_1.,5945,0.4,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.4
998,SQUAD,phi35_i_1.,5945,0.5,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.5
999,SQUAD,phi35_i_1.,5945,0.6,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.6
1000,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.7000000000000001
1001,SQUAD,phi35_i_1.,5945,0.8,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.8
1002,SQUAD,phi35_i_1.,5945,0.9,0.8539136302294197,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.9
1003,SQUAD,phi35_i_1.,5945,0.1,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.1
1004,SQUAD,phi35_i_1.,5945,0.2,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.2
1005,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.30000000000000004
1006,SQUAD,phi35_i_1.,5945,0.4,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.4
1007,SQUAD,phi35_i_1.,5945,0.5,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.5
1008,SQUAD,phi35_i_1.,5945,0.6,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.6
1009,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.7000000000000001
1010,SQUAD,phi35_i_1.,5945,0.8,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.8
1011,SQUAD,phi35_i_1.,5945,0.9,0.9200404858299596,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.9
1012,SQUAD,phi35_i_1.,5945,0.1,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
1013,SQUAD,phi35_i_1.,5945,0.2,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
1014,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
1015,SQUAD,phi35_i_1.,5945,0.4,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
1016,SQUAD,phi35_i_1.,5945,0.5,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
1017,SQUAD,phi35_i_1.,5945,0.6,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
1018,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
1019,SQUAD,phi35_i_1.,5945,0.8,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
1020,SQUAD,phi35_i_1.,5945,0.9,0.8556005398110661,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
1021,SQUAD,phi35_i_1.,6169,0.1,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.1
1022,SQUAD,phi35_i_1.,6169,0.2,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.2
1023,SQUAD,phi35_i_1.,6169,0.30000000000000004,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.30000000000000004
1024,SQUAD,phi35_i_1.,6169,0.4,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.4
1025,SQUAD,phi35_i_1.,6169,0.5,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.5
1026,SQUAD,phi35_i_1.,6169,0.6,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.6
1027,SQUAD,phi35_i_1.,6169,0.7000000000000001,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.7000000000000001
1028,SQUAD,phi35_i_1.,6169,0.8,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.8
1029,SQUAD,phi35_i_1.,6169,0.9,0.8685133239831697,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.9
1030,SQUAD,phi35_i_1.,5945,0.1,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.1
1031,SQUAD,phi35_i_1.,5945,0.2,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.2
1032,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.30000000000000004
1033,SQUAD,phi35_i_1.,5945,0.4,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.4
1034,SQUAD,phi35_i_1.,5945,0.5,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.5
1035,SQUAD,phi35_i_1.,5945,0.6,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.6
1036,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.7000000000000001
1037,SQUAD,phi35_i_1.,5945,0.8,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.8
1038,SQUAD,phi35_i_1.,5945,0.9,0.8751686909581646,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.9
1039,SQUAD,phi35_i_1.,5945,0.1,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
1040,SQUAD,phi35_i_1.,5945,0.2,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
1041,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
1042,SQUAD,phi35_i_1.,5945,0.4,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
1043,SQUAD,phi35_i_1.,5945,0.5,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
1044,SQUAD,phi35_i_1.,5945,0.6,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
1045,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
1046,SQUAD,phi35_i_1.,5945,0.8,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
1047,SQUAD,phi35_i_1.,5945,0.9,0.9200404858299596,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
1048,SQUAD,phi35_i_1.,5945,0.1,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
1049,SQUAD,phi35_i_1.,5945,0.2,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
1050,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
1051,SQUAD,phi35_i_1.,5945,0.4,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
1052,SQUAD,phi35_i_1.,5945,0.5,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
1053,SQUAD,phi35_i_1.,5945,0.6,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
1054,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
1055,SQUAD,phi35_i_1.,5945,0.8,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
1056,SQUAD,phi35_i_1.,5945,0.9,0.9180161943319838,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
1057,SQUAD,phi35_i_1.,5945,0.1,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
1058,SQUAD,phi35_i_1.,5945,0.2,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
1059,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
1060,SQUAD,phi35_i_1.,5945,0.4,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
1061,SQUAD,phi35_i_1.,5945,0.5,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
1062,SQUAD,phi35_i_1.,5945,0.6,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
1063,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
1064,SQUAD,phi35_i_1.,5945,0.8,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
1065,SQUAD,phi35_i_1.,5945,0.9,0.8696018893387314,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
1066,SQUAD,phi35_i_1.,5945,0.1,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
1067,SQUAD,phi35_i_1.,5945,0.2,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
1068,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
1069,SQUAD,phi35_i_1.,5945,0.4,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
1070,SQUAD,phi35_i_1.,5945,0.5,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
1071,SQUAD,phi35_i_1.,5945,0.6,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
1072,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
1073,SQUAD,phi35_i_1.,5945,0.8,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
1074,SQUAD,phi35_i_1.,5945,0.9,0.8522267206477733,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
1075,SQUAD,phi35_i_1.,5945,0.1,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.1
1076,SQUAD,phi35_i_1.,5945,0.2,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.2
1077,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.30000000000000004
1078,SQUAD,phi35_i_1.,5945,0.4,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.4
1079,SQUAD,phi35_i_1.,5945,0.5,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.5
1080,SQUAD,phi35_i_1.,5945,0.6,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.6
1081,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.7000000000000001
1082,SQUAD,phi35_i_1.,5945,0.8,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.8
1083,SQUAD,phi35_i_1.,5945,0.9,0.8567813765182186,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.9
1084,SQUAD,phi35_i_1.,5945,0.1,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.1
1085,SQUAD,phi35_i_1.,5945,0.2,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.2
1086,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.30000000000000004
1087,SQUAD,phi35_i_1.,5945,0.4,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.4
1088,SQUAD,phi35_i_1.,5945,0.5,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.5
1089,SQUAD,phi35_i_1.,5945,0.6,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.6
1090,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.7000000000000001
1091,SQUAD,phi35_i_1.,5945,0.8,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.8
1092,SQUAD,phi35_i_1.,5945,0.9,0.8510458839406208,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.9
1093,SQUAD,phi35_i_1.,5945,0.1,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.1
1094,SQUAD,phi35_i_1.,5945,0.2,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.2
1095,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.30000000000000004
1096,SQUAD,phi35_i_1.,5945,0.4,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.4
1097,SQUAD,phi35_i_1.,5945,0.5,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.5
1098,SQUAD,phi35_i_1.,5945,0.6,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.6
1099,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.7000000000000001
1100,SQUAD,phi35_i_1.,5945,0.8,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.8
1101,SQUAD,phi35_i_1.,5945,0.9,0.9191970310391363,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.9
1102,SQUAD,phi35_i_1.,0,0.1,0.5007159100480081,ood_label,ok,ood_label_0.1
1103,SQUAD,phi35_i_1.,0,0.2,0.5007159100480081,ood_label,ok,ood_label_0.2
1104,SQUAD,phi35_i_1.,0,0.30000000000000004,0.5007159100480081,ood_label,ok,ood_label_0.30000000000000004
1105,SQUAD,phi35_i_1.,0,0.4,0.5007159100480081,ood_label,ok,ood_label_0.4
1106,SQUAD,phi35_i_1.,0,0.5,0.5007159100480081,ood_label,ok,ood_label_0.5
1107,SQUAD,phi35_i_1.,0,0.6,0.5007159100480081,ood_label,ok,ood_label_0.6
1108,SQUAD,phi35_i_1.,0,0.7000000000000001,0.5007159100480081,ood_label,ok,ood_label_0.7000000000000001
1109,SQUAD,phi35_i_1.,0,0.8,0.5007159100480081,ood_label,ok,ood_label_0.8
1110,SQUAD,phi35_i_1.,0,0.9,0.5007159100480081,ood_label,ok,ood_label_0.9
1111,SQUAD,phi35_i_1.,5945,0.1,0.9654183535762483,bma_judge_w8,ok,bma_judge_w8_0.1
1112,SQUAD,phi35_i_1.,5945,0.2,0.9559716599190283,bma_judge_w8,ok,bma_judge_w8_0.2
1113,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9407894736842105,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1114,SQUAD,phi35_i_1.,5945,0.4,0.9293184885290149,bma_judge_w8,ok,bma_judge_w8_0.4
1115,SQUAD,phi35_i_1.,5945,0.5,0.9075573549257759,bma_judge_w8,ok,bma_judge_w8_0.5
1116,SQUAD,phi35_i_1.,5945,0.6,0.8965924426450742,bma_judge_w8,ok,bma_judge_w8_0.6
1117,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.8782051282051282,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1118,SQUAD,phi35_i_1.,5945,0.8,0.809885290148448,bma_judge_w8,ok,bma_judge_w8_0.8
1119,SQUAD,phi35_i_1.,5945,0.9,0.7511808367071525,bma_judge_w8,ok,bma_judge_w8_0.9
1120,SQUAD,phi35_i_1.,5945,0.1,0.9446693657219973,bma_judge,ok,bma_judge_0.1
1121,SQUAD,phi35_i_1.,5945,0.2,0.926450742240216,bma_judge,ok,bma_judge_0.2
1122,SQUAD,phi35_i_1.,5945,0.30000000000000004,0.9222334682860999,bma_judge,ok,bma_judge_0.30000000000000004
1123,SQUAD,phi35_i_1.,5945,0.4,0.9222334682860999,bma_judge,ok,bma_judge_0.4
1124,SQUAD,phi35_i_1.,5945,0.5,0.9202091767881242,bma_judge,ok,bma_judge_0.5
1125,SQUAD,phi35_i_1.,5945,0.6,0.9141363022941971,bma_judge,ok,bma_judge_0.6
1126,SQUAD,phi35_i_1.,5945,0.7000000000000001,0.9141363022941971,bma_judge,ok,bma_judge_0.7000000000000001
1127,SQUAD,phi35_i_1.,5945,0.8,0.8776990553306343,bma_judge,ok,bma_judge_0.8
1128,SQUAD,phi35_i_1.,5945,0.9,0.8139338731443995,bma_judge,ok,bma_judge_0.9
1129,COLLIE,falcon_mamba_1.,0,0.1,0.0375,bleu,ok,bleu_0.1
1130,COLLIE,falcon_mamba_1.,0,0.2,0.01201923076923077,bleu,ok,bleu_0.2
1131,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.0028846153846153848,bleu,ok,bleu_0.30000000000000004
1132,COLLIE,falcon_mamba_1.,0,0.4,0.0,bleu,ok,bleu_0.4
1133,COLLIE,falcon_mamba_1.,0,0.5,0.0,bleu,ok,bleu_0.5
1134,COLLIE,falcon_mamba_1.,0,0.6,0.0,bleu,ok,bleu_0.6
1135,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
1136,COLLIE,falcon_mamba_1.,0,0.8,0.0,bleu,ok,bleu_0.8
1137,COLLIE,falcon_mamba_1.,0,0.9,0.0,bleu,ok,bleu_0.9
1138,COLLIE,falcon_mamba_1.,0,0.1,0.0375,bleu_adapt,ok,bleu_adapt_0.1
1139,COLLIE,falcon_mamba_1.,0,0.2,0.01201923076923077,bleu_adapt,ok,bleu_adapt_0.2
1140,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.0028846153846153848,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1141,COLLIE,falcon_mamba_1.,0,0.4,0.0,bleu_adapt,ok,bleu_adapt_0.4
1142,COLLIE,falcon_mamba_1.,0,0.5,0.0,bleu_adapt,ok,bleu_adapt_0.5
1143,COLLIE,falcon_mamba_1.,0,0.6,0.0,bleu_adapt,ok,bleu_adapt_0.6
1144,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1145,COLLIE,falcon_mamba_1.,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
1146,COLLIE,falcon_mamba_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
1147,COLLIE,falcon_mamba_1.,0,0.1,0.39663461538461536,rouge1,ok,rouge1_0.1
1148,COLLIE,falcon_mamba_1.,0,0.2,0.13413461538461538,rouge1,ok,rouge1_0.2
1149,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.04567307692307692,rouge1,ok,rouge1_0.30000000000000004
1150,COLLIE,falcon_mamba_1.,0,0.4,0.01826923076923077,rouge1,ok,rouge1_0.4
1151,COLLIE,falcon_mamba_1.,0,0.5,0.00625,rouge1,ok,rouge1_0.5
1152,COLLIE,falcon_mamba_1.,0,0.6,0.0004807692307692308,rouge1,ok,rouge1_0.6
1153,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,rouge1,ok,rouge1_0.7000000000000001
1154,COLLIE,falcon_mamba_1.,0,0.8,0.0,rouge1,ok,rouge1_0.8
1155,COLLIE,falcon_mamba_1.,0,0.9,0.0,rouge1,ok,rouge1_0.9
1156,COLLIE,falcon_mamba_1.,0,0.1,0.05048076923076923,rouge2,ok,rouge2_0.1
1157,COLLIE,falcon_mamba_1.,0,0.2,0.025961538461538463,rouge2,ok,rouge2_0.2
1158,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.010096153846153847,rouge2,ok,rouge2_0.30000000000000004
1159,COLLIE,falcon_mamba_1.,0,0.4,0.002403846153846154,rouge2,ok,rouge2_0.4
1160,COLLIE,falcon_mamba_1.,0,0.5,0.0,rouge2,ok,rouge2_0.5
1161,COLLIE,falcon_mamba_1.,0,0.6,0.0,rouge2,ok,rouge2_0.6
1162,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,rouge2,ok,rouge2_0.7000000000000001
1163,COLLIE,falcon_mamba_1.,0,0.8,0.0,rouge2,ok,rouge2_0.8
1164,COLLIE,falcon_mamba_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
1165,COLLIE,falcon_mamba_1.,0,0.1,0.24951923076923077,rougeL,ok,rougeL_0.1
1166,COLLIE,falcon_mamba_1.,0,0.2,0.04038461538461539,rougeL,ok,rougeL_0.2
1167,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.014423076923076924,rougeL,ok,rougeL_0.30000000000000004
1168,COLLIE,falcon_mamba_1.,0,0.4,0.0038461538461538464,rougeL,ok,rougeL_0.4
1169,COLLIE,falcon_mamba_1.,0,0.5,0.0004807692307692308,rougeL,ok,rougeL_0.5
1170,COLLIE,falcon_mamba_1.,0,0.6,0.0,rougeL,ok,rougeL_0.6
1171,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,rougeL,ok,rougeL_0.7000000000000001
1172,COLLIE,falcon_mamba_1.,0,0.8,0.0,rougeL,ok,rougeL_0.8
1173,COLLIE,falcon_mamba_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
1174,COLLIE,falcon_mamba_1.,0,0.1,0.28701923076923075,rougeLsum,ok,rougeLsum_0.1
1175,COLLIE,falcon_mamba_1.,0,0.2,0.0764423076923077,rougeLsum,ok,rougeLsum_0.2
1176,COLLIE,falcon_mamba_1.,0,0.30000000000000004,0.035096153846153846,rougeLsum,ok,rougeLsum_0.30000000000000004
1177,COLLIE,falcon_mamba_1.,0,0.4,0.013461538461538462,rougeLsum,ok,rougeLsum_0.4
1178,COLLIE,falcon_mamba_1.,0,0.5,0.0038461538461538464,rougeLsum,ok,rougeLsum_0.5
1179,COLLIE,falcon_mamba_1.,0,0.6,0.0,rougeLsum,ok,rougeLsum_0.6
1180,COLLIE,falcon_mamba_1.,0,0.7000000000000001,0.0,rougeLsum,ok,rougeLsum_0.7000000000000001
1181,COLLIE,falcon_mamba_1.,0,0.8,0.0,rougeLsum,ok,rougeLsum_0.8
1182,COLLIE,falcon_mamba_1.,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
1183,COLLIE,falcon_mamba_1.,4,0.1,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
1184,COLLIE,falcon_mamba_1.,4,0.2,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
1185,COLLIE,falcon_mamba_1.,4,0.30000000000000004,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
1186,COLLIE,falcon_mamba_1.,4,0.4,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
1187,COLLIE,falcon_mamba_1.,4,0.5,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
1188,COLLIE,falcon_mamba_1.,4,0.6,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
1189,COLLIE,falcon_mamba_1.,4,0.7000000000000001,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
1190,COLLIE,falcon_mamba_1.,4,0.8,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
1191,COLLIE,falcon_mamba_1.,4,0.9,0.027938342967244702,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
1192,COLLIE,falcon_mamba_1.,21,0.1,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
1193,COLLIE,falcon_mamba_1.,21,0.2,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
1194,COLLIE,falcon_mamba_1.,21,0.30000000000000004,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
1195,COLLIE,falcon_mamba_1.,21,0.4,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
1196,COLLIE,falcon_mamba_1.,21,0.5,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
1197,COLLIE,falcon_mamba_1.,21,0.6,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
1198,COLLIE,falcon_mamba_1.,21,0.7000000000000001,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
1199,COLLIE,falcon_mamba_1.,21,0.8,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
1200,COLLIE,falcon_mamba_1.,21,0.9,0.10150558523555124,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
1201,COLLIE,falcon_mamba_1.,0,0.5,0.14038461538461539,exact_correctness,ok,exact_correctness_0.5
1202,COLLIE,falcon_mamba_1.,1,0.1,0.11832611832611832,bma_judge_w8,ok,bma_judge_w8_0.1
1203,COLLIE,falcon_mamba_1.,1,0.2,0.11832611832611832,bma_judge_w8,ok,bma_judge_w8_0.2
1204,COLLIE,falcon_mamba_1.,1,0.30000000000000004,0.11832611832611832,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1205,COLLIE,falcon_mamba_1.,1,0.4,0.11832611832611832,bma_judge_w8,ok,bma_judge_w8_0.4
1206,COLLIE,falcon_mamba_1.,1,0.5,0.011063011063011063,bma_judge_w8,ok,bma_judge_w8_0.5
1207,COLLIE,falcon_mamba_1.,1,0.6,0.011063011063011063,bma_judge_w8,ok,bma_judge_w8_0.6
1208,COLLIE,falcon_mamba_1.,1,0.7000000000000001,0.011063011063011063,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1209,COLLIE,falcon_mamba_1.,1,0.8,0.011063011063011063,bma_judge_w8,ok,bma_judge_w8_0.8
1210,COLLIE,falcon_mamba_1.,1,0.9,0.011063011063011063,bma_judge_w8,ok,bma_judge_w8_0.9
1211,COLLIE,falcon_mamba_1.,4,0.1,0.027938342967244702,bma_judge,ok,bma_judge_0.1
1212,COLLIE,falcon_mamba_1.,4,0.2,0.027938342967244702,bma_judge,ok,bma_judge_0.2
1213,COLLIE,falcon_mamba_1.,4,0.30000000000000004,0.027938342967244702,bma_judge,ok,bma_judge_0.30000000000000004
1214,COLLIE,falcon_mamba_1.,4,0.4,0.027938342967244702,bma_judge,ok,bma_judge_0.4
1215,COLLIE,falcon_mamba_1.,4,0.5,0.027938342967244702,bma_judge,ok,bma_judge_0.5
1216,COLLIE,falcon_mamba_1.,4,0.6,0.027938342967244702,bma_judge,ok,bma_judge_0.6
1217,COLLIE,falcon_mamba_1.,4,0.7000000000000001,0.027938342967244702,bma_judge,ok,bma_judge_0.7000000000000001
1218,COLLIE,falcon_mamba_1.,4,0.8,0.027938342967244702,bma_judge,ok,bma_judge_0.8
1219,COLLIE,falcon_mamba_1.,4,0.9,0.027938342967244702,bma_judge,ok,bma_judge_0.9
1220,TRIVIA,llama3_8b_i_1.,18,0.1,0.04897913644984938,bleu,ok,bleu_0.1
1221,TRIVIA,llama3_8b_i_1.,18,0.2,0.02510320205288408,bleu,ok,bleu_0.2
1222,TRIVIA,llama3_8b_i_1.,18,0.30000000000000004,0.010097065714604486,bleu,ok,bleu_0.30000000000000004
1223,TRIVIA,llama3_8b_i_1.,18,0.4,0.005466919558183644,bleu,ok,bleu_0.4
1224,TRIVIA,llama3_8b_i_1.,18,0.5,0.003291308713600357,bleu,ok,bleu_0.5
1225,TRIVIA,llama3_8b_i_1.,18,0.6,0.0018409014838781659,bleu,ok,bleu_0.6
1226,TRIVIA,llama3_8b_i_1.,18,0.7000000000000001,0.0017293316969764587,bleu,ok,bleu_0.7000000000000001
1227,TRIVIA,llama3_8b_i_1.,18,0.8,0.0016177619100747518,bleu,ok,bleu_0.8
1228,TRIVIA,llama3_8b_i_1.,18,0.9,0.0015619770166238982,bleu,ok,bleu_0.9
1229,TRIVIA,llama3_8b_i_1.,18,0.1,0.09053888207073524,bleu_adapt,ok,bleu_adapt_0.1
1230,TRIVIA,llama3_8b_i_1.,18,0.2,0.0452415485886422,bleu_adapt,ok,bleu_adapt_0.2
1231,TRIVIA,llama3_8b_i_1.,18,0.30000000000000004,0.02655360928260627,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1232,TRIVIA,llama3_8b_i_1.,18,0.4,0.020473055896463237,bleu_adapt,ok,bleu_adapt_0.4
1233,TRIVIA,llama3_8b_i_1.,18,0.5,0.01762802633046971,bleu_adapt,ok,bleu_adapt_0.5
1234,TRIVIA,llama3_8b_i_1.,18,0.6,0.01617761910074752,bleu_adapt,ok,bleu_adapt_0.6
1235,TRIVIA,llama3_8b_i_1.,18,0.7000000000000001,0.015787124846591543,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1236,TRIVIA,llama3_8b_i_1.,18,0.8,0.01556398527278813,bleu_adapt,ok,bleu_adapt_0.8
1237,TRIVIA,llama3_8b_i_1.,18,0.9,0.015508200379337275,bleu_adapt,ok,bleu_adapt_0.9
1238,TRIVIA,llama3_8b_i_1.,0,0.1,0.7511145786892555,rouge1,ok,rouge1_0.1
1239,TRIVIA,llama3_8b_i_1.,0,0.2,0.48194382523406154,rouge1,ok,rouge1_0.2
1240,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.2732389656709764,rouge1,ok,rouge1_0.30000000000000004
1241,TRIVIA,llama3_8b_i_1.,0,0.4,0.14249888542131076,rouge1,ok,rouge1_0.4
1242,TRIVIA,llama3_8b_i_1.,0,0.5,0.08844181899242086,rouge1,ok,rouge1_0.5
1243,TRIVIA,llama3_8b_i_1.,0,0.6,0.0701627284886313,rouge1,ok,rouge1_0.6
1244,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.059574230940704416,rouge1,ok,rouge1_0.7000000000000001
1245,TRIVIA,llama3_8b_i_1.,0,0.8,0.0562862238074008,rouge1,ok,rouge1_0.8
1246,TRIVIA,llama3_8b_i_1.,0,0.9,0.055394560855996436,rouge1,ok,rouge1_0.9
1247,TRIVIA,llama3_8b_i_1.,0,0.1,0.38235621934908604,rouge2,ok,rouge2_0.1
1248,TRIVIA,llama3_8b_i_1.,0,0.2,0.17465448060633082,rouge2,ok,rouge2_0.2
1249,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.09535220686580473,rouge2,ok,rouge2_0.30000000000000004
1250,TRIVIA,llama3_8b_i_1.,0,0.4,0.054837271511368704,rouge2,ok,rouge2_0.4
1251,TRIVIA,llama3_8b_i_1.,0,0.5,0.03666963887650468,rouge2,ok,rouge2_0.5
1252,TRIVIA,llama3_8b_i_1.,0,0.6,0.03148684797146679,rouge2,ok,rouge2_0.6
1253,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.02758582255907267,rouge2,ok,rouge2_0.7000000000000001
1254,TRIVIA,llama3_8b_i_1.,0,0.8,0.026582701738742755,rouge2,ok,rouge2_0.8
1255,TRIVIA,llama3_8b_i_1.,0,0.9,0.02630405706642889,rouge2,ok,rouge2_0.9
1256,TRIVIA,llama3_8b_i_1.,0,0.1,0.7405260811413286,rougeL,ok,rougeL_0.1
1257,TRIVIA,llama3_8b_i_1.,0,0.2,0.4555283102987071,rougeL,ok,rougeL_0.2
1258,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.2511703076237182,rougeL,ok,rougeL_0.30000000000000004
1259,TRIVIA,llama3_8b_i_1.,0,0.4,0.13252340615247438,rougeL,ok,rougeL_0.4
1260,TRIVIA,llama3_8b_i_1.,0,0.5,0.08559964333481944,rougeL,ok,rougeL_0.5
1261,TRIVIA,llama3_8b_i_1.,0,0.6,0.06915960766830138,rougeL,ok,rougeL_0.6
1262,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.05935131520285332,rougeL,ok,rougeL_0.7000000000000001
1263,TRIVIA,llama3_8b_i_1.,0,0.8,0.05617476593847526,rougeL,ok,rougeL_0.8
1264,TRIVIA,llama3_8b_i_1.,0,0.9,0.055394560855996436,rougeL,ok,rougeL_0.9
1265,TRIVIA,llama3_8b_i_1.,0,0.1,0.7408604547481052,rougeLsum,ok,rougeLsum_0.1
1266,TRIVIA,llama3_8b_i_1.,0,0.2,0.45536112349531876,rougeLsum,ok,rougeLsum_0.2
1267,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.2511703076237182,rougeLsum,ok,rougeLsum_0.30000000000000004
1268,TRIVIA,llama3_8b_i_1.,0,0.4,0.13252340615247438,rougeLsum,ok,rougeLsum_0.4
1269,TRIVIA,llama3_8b_i_1.,0,0.5,0.08559964333481944,rougeLsum,ok,rougeLsum_0.5
1270,TRIVIA,llama3_8b_i_1.,0,0.6,0.06915960766830138,rougeLsum,ok,rougeLsum_0.6
1271,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.05935131520285332,rougeLsum,ok,rougeLsum_0.7000000000000001
1272,TRIVIA,llama3_8b_i_1.,0,0.8,0.05617476593847526,rougeLsum,ok,rougeLsum_0.8
1273,TRIVIA,llama3_8b_i_1.,0,0.9,0.055394560855996436,rougeLsum,ok,rougeLsum_0.9
1274,TRIVIA,llama3_8b_i_1.,9,0.1,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.1
1275,TRIVIA,llama3_8b_i_1.,9,0.2,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.2
1276,TRIVIA,llama3_8b_i_1.,9,0.30000000000000004,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.30000000000000004
1277,TRIVIA,llama3_8b_i_1.,9,0.4,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.4
1278,TRIVIA,llama3_8b_i_1.,9,0.5,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.5
1279,TRIVIA,llama3_8b_i_1.,9,0.6,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.6
1280,TRIVIA,llama3_8b_i_1.,9,0.7000000000000001,0.9989963758015055,bert_score_f1,ok,bert_score_f1_0.7000000000000001
1281,TRIVIA,llama3_8b_i_1.,9,0.8,0.9119040981321439,bert_score_f1,ok,bert_score_f1_0.8
1282,TRIVIA,llama3_8b_i_1.,9,0.9,0.08558684137161973,bert_score_f1,ok,bert_score_f1_0.9
1283,TRIVIA,llama3_8b_i_1.,1,0.1,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
1284,TRIVIA,llama3_8b_i_1.,1,0.2,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
1285,TRIVIA,llama3_8b_i_1.,1,0.30000000000000004,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
1286,TRIVIA,llama3_8b_i_1.,1,0.4,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
1287,TRIVIA,llama3_8b_i_1.,1,0.5,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
1288,TRIVIA,llama3_8b_i_1.,1,0.6,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
1289,TRIVIA,llama3_8b_i_1.,1,0.7000000000000001,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
1290,TRIVIA,llama3_8b_i_1.,1,0.8,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
1291,TRIVIA,llama3_8b_i_1.,1,0.9,0.7406788162514629,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
1292,TRIVIA,llama3_8b_i_1.,0,0.1,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
1293,TRIVIA,llama3_8b_i_1.,0,0.2,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
1294,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
1295,TRIVIA,llama3_8b_i_1.,0,0.4,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
1296,TRIVIA,llama3_8b_i_1.,0,0.5,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
1297,TRIVIA,llama3_8b_i_1.,0,0.6,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
1298,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
1299,TRIVIA,llama3_8b_i_1.,0,0.8,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
1300,TRIVIA,llama3_8b_i_1.,0,0.9,0.738687026304057,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
1301,TRIVIA,llama3_8b_i_1.,0,0.1,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
1302,TRIVIA,llama3_8b_i_1.,0,0.2,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
1303,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
1304,TRIVIA,llama3_8b_i_1.,0,0.4,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
1305,TRIVIA,llama3_8b_i_1.,0,0.5,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
1306,TRIVIA,llama3_8b_i_1.,0,0.6,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
1307,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
1308,TRIVIA,llama3_8b_i_1.,0,0.8,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
1309,TRIVIA,llama3_8b_i_1.,0,0.9,0.7776415514935354,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
1310,TRIVIA,llama3_8b_i_1.,0,0.1,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
1311,TRIVIA,llama3_8b_i_1.,0,0.2,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
1312,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
1313,TRIVIA,llama3_8b_i_1.,0,0.4,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
1314,TRIVIA,llama3_8b_i_1.,0,0.5,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
1315,TRIVIA,llama3_8b_i_1.,0,0.6,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
1316,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
1317,TRIVIA,llama3_8b_i_1.,0,0.8,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
1318,TRIVIA,llama3_8b_i_1.,0,0.9,0.7600869371377619,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
1319,TRIVIA,llama3_8b_i_1.,0,0.1,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
1320,TRIVIA,llama3_8b_i_1.,0,0.2,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
1321,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
1322,TRIVIA,llama3_8b_i_1.,0,0.4,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
1323,TRIVIA,llama3_8b_i_1.,0,0.5,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
1324,TRIVIA,llama3_8b_i_1.,0,0.6,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
1325,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
1326,TRIVIA,llama3_8b_i_1.,0,0.8,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
1327,TRIVIA,llama3_8b_i_1.,0,0.9,0.7788675880517164,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
1328,TRIVIA,llama3_8b_i_1.,0,0.1,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
1329,TRIVIA,llama3_8b_i_1.,0,0.2,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
1330,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
1331,TRIVIA,llama3_8b_i_1.,0,0.4,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
1332,TRIVIA,llama3_8b_i_1.,0,0.5,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
1333,TRIVIA,llama3_8b_i_1.,0,0.6,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
1334,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
1335,TRIVIA,llama3_8b_i_1.,0,0.8,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
1336,TRIVIA,llama3_8b_i_1.,0,0.9,0.7388542131074454,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
1337,TRIVIA,llama3_8b_i_1.,0,0.1,0.836937137761926,bma_judge_w8,ok,bma_judge_w8_0.1
1338,TRIVIA,llama3_8b_i_1.,0,0.2,0.7996544806063308,bma_judge_w8,ok,bma_judge_w8_0.2
1339,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7996544806063308,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1340,TRIVIA,llama3_8b_i_1.,0,0.4,0.7744650022291574,bma_judge_w8,ok,bma_judge_w8_0.4
1341,TRIVIA,llama3_8b_i_1.,0,0.5,0.7369594293357111,bma_judge_w8,ok,bma_judge_w8_0.5
1342,TRIVIA,llama3_8b_i_1.,0,0.6,0.7369594293357111,bma_judge_w8,ok,bma_judge_w8_0.6
1343,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7207980383415069,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1344,TRIVIA,llama3_8b_i_1.,0,0.8,0.7207980383415069,bma_judge_w8,ok,bma_judge_w8_0.8
1345,TRIVIA,llama3_8b_i_1.,0,0.9,0.666016495764601,bma_judge_w8,ok,bma_judge_w8_0.9
1346,TRIVIA,llama3_8b_i_1.,0,0.1,0.7462104324565314,bma_judge,ok,bma_judge_0.1
1347,TRIVIA,llama3_8b_i_1.,0,0.2,0.7462104324565314,bma_judge,ok,bma_judge_0.2
1348,TRIVIA,llama3_8b_i_1.,0,0.30000000000000004,0.7462104324565314,bma_judge,ok,bma_judge_0.30000000000000004
1349,TRIVIA,llama3_8b_i_1.,0,0.4,0.7391328577797592,bma_judge,ok,bma_judge_0.4
1350,TRIVIA,llama3_8b_i_1.,0,0.5,0.7391328577797592,bma_judge,ok,bma_judge_0.5
1351,TRIVIA,llama3_8b_i_1.,0,0.6,0.7391328577797592,bma_judge,ok,bma_judge_0.6
1352,TRIVIA,llama3_8b_i_1.,0,0.7000000000000001,0.7328912171199287,bma_judge,ok,bma_judge_0.7000000000000001
1353,TRIVIA,llama3_8b_i_1.,0,0.8,0.7328912171199287,bma_judge,ok,bma_judge_0.8
1354,TRIVIA,llama3_8b_i_1.,0,0.9,0.7328912171199287,bma_judge,ok,bma_judge_0.9
1355,COQA,llama3_8b_1.,152,0.1,0.034222960030647426,bleu,ok,bleu_0.1
1356,COQA,llama3_8b_1.,152,0.2,0.023879453454220406,bleu,ok,bleu_0.2
1357,COQA,llama3_8b_1.,152,0.30000000000000004,0.018005363299706294,bleu,ok,bleu_0.30000000000000004
1358,COQA,llama3_8b_1.,152,0.4,0.01391903971395735,bleu,ok,bleu_0.4
1359,COQA,llama3_8b_1.,152,0.5,0.010981994636700293,bleu,ok,bleu_0.5
1360,COQA,llama3_8b_1.,152,0.6,0.007406461499169965,bleu,ok,bleu_0.6
1361,COQA,llama3_8b_1.,152,0.7000000000000001,0.004724811646022219,bleu,ok,bleu_0.7000000000000001
1362,COQA,llama3_8b_1.,152,0.8,0.0022985570169837826,bleu,ok,bleu_0.8
1363,COQA,llama3_8b_1.,152,0.9,0.0011492785084918913,bleu,ok,bleu_0.9
1364,COQA,llama3_8b_1.,152,0.1,0.05082364959775252,bleu_adapt,ok,bleu_adapt_0.1
1365,COQA,llama3_8b_1.,152,0.2,0.03039203166900779,bleu_adapt,ok,bleu_adapt_0.2
1366,COQA,llama3_8b_1.,152,0.30000000000000004,0.021070105989018005,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1367,COQA,llama3_8b_1.,152,0.4,0.01557910867066786,bleu_adapt,ok,bleu_adapt_0.4
1368,COQA,llama3_8b_1.,152,0.5,0.01200357553313753,bleu_adapt,ok,bleu_adapt_0.5
1369,COQA,llama3_8b_1.,152,0.6,0.008300344783552547,bleu_adapt,ok,bleu_adapt_0.6
1370,COQA,llama3_8b_1.,152,0.7000000000000001,0.005363299706295492,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1371,COQA,llama3_8b_1.,152,0.8,0.0025539522410930916,bleu_adapt,ok,bleu_adapt_0.8
1372,COQA,llama3_8b_1.,152,0.9,0.0014046737326012004,bleu_adapt,ok,bleu_adapt_0.9
1373,COQA,llama3_8b_1.,0,0.1,0.24451960415883753,rouge1,ok,rouge1_0.1
1374,COQA,llama3_8b_1.,0,0.2,0.15395214831516973,rouge1,ok,rouge1_0.2
1375,COQA,llama3_8b_1.,0,0.30000000000000004,0.11875234874107478,rouge1,ok,rouge1_0.30000000000000004
1376,COQA,llama3_8b_1.,0,0.4,0.09106852060628837,rouge1,ok,rouge1_0.4
1377,COQA,llama3_8b_1.,0,0.5,0.06877113866967305,rouge1,ok,rouge1_0.5
1378,COQA,llama3_8b_1.,0,0.6,0.05787298008267568,rouge1,ok,rouge1_0.6
1379,COQA,llama3_8b_1.,0,0.7000000000000001,0.04108731053488664,rouge1,ok,rouge1_0.7000000000000001
1380,COQA,llama3_8b_1.,0,0.8,0.031817612426406114,rouge1,ok,rouge1_0.8
1381,COQA,llama3_8b_1.,0,0.9,0.023424777652511587,rouge1,ok,rouge1_0.9
1382,COQA,llama3_8b_1.,0,0.1,0.14355505449079292,rouge2,ok,rouge2_0.1
1383,COQA,llama3_8b_1.,0,0.2,0.09244644870349493,rouge2,ok,rouge2_0.2
1384,COQA,llama3_8b_1.,0,0.30000000000000004,0.07215332581736189,rouge2,ok,rouge2_0.30000000000000004
1385,COQA,llama3_8b_1.,0,0.4,0.05712138293874483,rouge2,ok,rouge2_0.4
1386,COQA,llama3_8b_1.,0,0.5,0.04221470625078291,rouge2,ok,rouge2_0.5
1387,COQA,llama3_8b_1.,0,0.6,0.03469873481147438,rouge2,ok,rouge2_0.6
1388,COQA,llama3_8b_1.,0,0.7000000000000001,0.026431166228235,rouge2,ok,rouge2_0.7000000000000001
1389,COQA,llama3_8b_1.,0,0.8,0.020293122886133032,rouge2,ok,rouge2_0.8
1390,COQA,llama3_8b_1.,0,0.9,0.01503194287861706,rouge2,ok,rouge2_0.9
1391,COQA,llama3_8b_1.,0,0.1,0.24013528748590757,rougeL,ok,rougeL_0.1
1392,COQA,llama3_8b_1.,0,0.2,0.15194788926468747,rougeL,ok,rougeL_0.2
1393,COQA,llama3_8b_1.,0,0.30000000000000004,0.11687335588124766,rougeL,ok,rougeL_0.30000000000000004
1394,COQA,llama3_8b_1.,0,0.4,0.09006639108104722,rougeL,ok,rougeL_0.4
1395,COQA,llama3_8b_1.,0,0.5,0.06776900914443192,rougeL,ok,rougeL_0.5
1396,COQA,llama3_8b_1.,0,0.6,0.05774771389202055,rougeL,ok,rougeL_0.6
1397,COQA,llama3_8b_1.,0,0.7000000000000001,0.04083677815357635,rougeL,ok,rougeL_0.7000000000000001
1398,COQA,llama3_8b_1.,0,0.8,0.03169234623575097,rougeL,ok,rougeL_0.8
1399,COQA,llama3_8b_1.,0,0.9,0.023424777652511587,rougeL,ok,rougeL_0.9
1400,COQA,llama3_8b_1.,0,0.1,0.24101215082049354,rougeLsum,ok,rougeLsum_0.1
1401,COQA,llama3_8b_1.,0,0.2,0.15244895402730804,rougeLsum,ok,rougeLsum_0.2
1402,COQA,llama3_8b_1.,0,0.30000000000000004,0.11687335588124766,rougeLsum,ok,rougeLsum_0.30000000000000004
1403,COQA,llama3_8b_1.,0,0.4,0.09006639108104722,rougeLsum,ok,rougeLsum_0.4
1404,COQA,llama3_8b_1.,0,0.5,0.06776900914443192,rougeLsum,ok,rougeLsum_0.5
1405,COQA,llama3_8b_1.,0,0.6,0.05774771389202055,rougeLsum,ok,rougeLsum_0.6
1406,COQA,llama3_8b_1.,0,0.7000000000000001,0.04083677815357635,rougeLsum,ok,rougeLsum_0.7000000000000001
1407,COQA,llama3_8b_1.,0,0.8,0.03169234623575097,rougeLsum,ok,rougeLsum_0.8
1408,COQA,llama3_8b_1.,0,0.9,0.023424777652511587,rougeLsum,ok,rougeLsum_0.9
1409,COQA,llama3_8b_1.,0,0.1,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
1410,COQA,llama3_8b_1.,0,0.2,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
1411,COQA,llama3_8b_1.,0,0.30000000000000004,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
1412,COQA,llama3_8b_1.,0,0.4,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
1413,COQA,llama3_8b_1.,0,0.5,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
1414,COQA,llama3_8b_1.,0,0.6,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
1415,COQA,llama3_8b_1.,0,0.7000000000000001,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
1416,COQA,llama3_8b_1.,0,0.8,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
1417,COQA,llama3_8b_1.,0,0.9,0.37379431291494425,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
1418,COQA,llama3_8b_1.,0,0.1,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
1419,COQA,llama3_8b_1.,0,0.2,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
1420,COQA,llama3_8b_1.,0,0.30000000000000004,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
1421,COQA,llama3_8b_1.,0,0.4,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
1422,COQA,llama3_8b_1.,0,0.5,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
1423,COQA,llama3_8b_1.,0,0.6,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
1424,COQA,llama3_8b_1.,0,0.7000000000000001,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
1425,COQA,llama3_8b_1.,0,0.8,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
1426,COQA,llama3_8b_1.,0,0.9,0.5426531379180759,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
1427,COQA,llama3_8b_1.,0,0.1,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
1428,COQA,llama3_8b_1.,0,0.2,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
1429,COQA,llama3_8b_1.,0,0.30000000000000004,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
1430,COQA,llama3_8b_1.,0,0.4,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
1431,COQA,llama3_8b_1.,0,0.5,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
1432,COQA,llama3_8b_1.,0,0.6,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
1433,COQA,llama3_8b_1.,0,0.7000000000000001,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
1434,COQA,llama3_8b_1.,0,0.8,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
1435,COQA,llama3_8b_1.,0,0.9,0.37642490291870223,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
1436,COQA,llama3_8b_1.,1,0.1,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.1
1437,COQA,llama3_8b_1.,1,0.2,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.2
1438,COQA,llama3_8b_1.,1,0.30000000000000004,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.30000000000000004
1439,COQA,llama3_8b_1.,1,0.4,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.4
1440,COQA,llama3_8b_1.,1,0.5,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.5
1441,COQA,llama3_8b_1.,1,0.6,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.6
1442,COQA,llama3_8b_1.,1,0.7000000000000001,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.7000000000000001
1443,COQA,llama3_8b_1.,1,0.8,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.8
1444,COQA,llama3_8b_1.,1,0.9,0.2881483337509396,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.9
1445,COQA,llama3_8b_1.,0,0.1,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.1
1446,COQA,llama3_8b_1.,0,0.2,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.2
1447,COQA,llama3_8b_1.,0,0.30000000000000004,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.30000000000000004
1448,COQA,llama3_8b_1.,0,0.4,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.4
1449,COQA,llama3_8b_1.,0,0.5,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.5
1450,COQA,llama3_8b_1.,0,0.6,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.6
1451,COQA,llama3_8b_1.,0,0.7000000000000001,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.7000000000000001
1452,COQA,llama3_8b_1.,0,0.8,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.8
1453,COQA,llama3_8b_1.,0,0.9,0.20506075410246774,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.9
1454,COQA,llama3_8b_1.,0,0.1,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
1455,COQA,llama3_8b_1.,0,0.2,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
1456,COQA,llama3_8b_1.,0,0.30000000000000004,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
1457,COQA,llama3_8b_1.,0,0.4,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
1458,COQA,llama3_8b_1.,0,0.5,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
1459,COQA,llama3_8b_1.,0,0.6,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
1460,COQA,llama3_8b_1.,0,0.7000000000000001,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
1461,COQA,llama3_8b_1.,0,0.8,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
1462,COQA,llama3_8b_1.,0,0.9,0.20430915695853688,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
1463,COQA,llama3_8b_1.,0,0.1,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
1464,COQA,llama3_8b_1.,0,0.2,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
1465,COQA,llama3_8b_1.,0,0.30000000000000004,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
1466,COQA,llama3_8b_1.,0,0.4,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
1467,COQA,llama3_8b_1.,0,0.5,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
1468,COQA,llama3_8b_1.,0,0.6,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
1469,COQA,llama3_8b_1.,0,0.7000000000000001,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
1470,COQA,llama3_8b_1.,0,0.8,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
1471,COQA,llama3_8b_1.,0,0.9,0.5461605912564199,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
1472,COQA,llama3_8b_1.,0,0.1,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
1473,COQA,llama3_8b_1.,0,0.2,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
1474,COQA,llama3_8b_1.,0,0.30000000000000004,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
1475,COQA,llama3_8b_1.,0,0.4,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
1476,COQA,llama3_8b_1.,0,0.5,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
1477,COQA,llama3_8b_1.,0,0.6,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
1478,COQA,llama3_8b_1.,0,0.7000000000000001,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
1479,COQA,llama3_8b_1.,0,0.8,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
1480,COQA,llama3_8b_1.,0,0.9,0.21783790554929225,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
1481,COQA,llama3_8b_1.,0,0.1,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.1
1482,COQA,llama3_8b_1.,0,0.2,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.2
1483,COQA,llama3_8b_1.,0,0.30000000000000004,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.30000000000000004
1484,COQA,llama3_8b_1.,0,0.4,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.4
1485,COQA,llama3_8b_1.,0,0.5,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.5
1486,COQA,llama3_8b_1.,0,0.6,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.6
1487,COQA,llama3_8b_1.,0,0.7000000000000001,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.7000000000000001
1488,COQA,llama3_8b_1.,0,0.8,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.8
1489,COQA,llama3_8b_1.,0,0.9,0.46085431542026806,j_llama70b_gen_16_0.5,ok,j_llama70b_gen_16_0.5_0.9
1490,COQA,llama3_8b_1.,0,0.1,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.1
1491,COQA,llama3_8b_1.,0,0.2,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.2
1492,COQA,llama3_8b_1.,0,0.30000000000000004,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.30000000000000004
1493,COQA,llama3_8b_1.,0,0.4,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.4
1494,COQA,llama3_8b_1.,0,0.5,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.5
1495,COQA,llama3_8b_1.,0,0.6,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.6
1496,COQA,llama3_8b_1.,0,0.7000000000000001,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.7000000000000001
1497,COQA,llama3_8b_1.,0,0.8,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.8
1498,COQA,llama3_8b_1.,0,0.9,0.42189653012651884,j_llama8b_gen_16_0.5,ok,j_llama8b_gen_16_0.5_0.9
1499,COQA,llama3_8b_1.,0,0.1,0.67844168858825,bma_judge_w8,ok,bma_judge_w8_0.1
1500,COQA,llama3_8b_1.,0,0.2,0.5371414255292497,bma_judge_w8,ok,bma_judge_w8_0.2
1501,COQA,llama3_8b_1.,0,0.30000000000000004,0.43279468871351623,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1502,COQA,llama3_8b_1.,0,0.4,0.3543780533633972,bma_judge_w8,ok,bma_judge_w8_0.4
1503,COQA,llama3_8b_1.,0,0.5,0.28648377802831015,bma_judge_w8,ok,bma_judge_w8_0.5
1504,COQA,llama3_8b_1.,0,0.6,0.2222222222222222,bma_judge_w8,ok,bma_judge_w8_0.6
1505,COQA,llama3_8b_1.,0,0.7000000000000001,0.16710509833395967,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1506,COQA,llama3_8b_1.,0,0.8,0.11950394588500564,bma_judge_w8,ok,bma_judge_w8_0.8
1507,COQA,llama3_8b_1.,0,0.9,0.07541024677439559,bma_judge_w8,ok,bma_judge_w8_0.9
1508,COQA,llama3_8b_1.,0,0.1,0.6536389828385318,bma_judge,ok,bma_judge_0.1
1509,COQA,llama3_8b_1.,0,0.2,0.5777276713015157,bma_judge,ok,bma_judge_0.2
1510,COQA,llama3_8b_1.,0,0.30000000000000004,0.5777276713015157,bma_judge,ok,bma_judge_0.30000000000000004
1511,COQA,llama3_8b_1.,0,0.4,0.4116247024927972,bma_judge,ok,bma_judge_0.4
1512,COQA,llama3_8b_1.,0,0.5,0.2501565827383189,bma_judge,ok,bma_judge_0.5
1513,COQA,llama3_8b_1.,0,0.6,0.2501565827383189,bma_judge,ok,bma_judge_0.6
1514,COQA,llama3_8b_1.,0,0.7000000000000001,0.17374420643868219,bma_judge,ok,bma_judge_0.7000000000000001
1515,COQA,llama3_8b_1.,0,0.8,0.17374420643868219,bma_judge,ok,bma_judge_0.8
1516,COQA,llama3_8b_1.,0,0.9,0.10998371539521483,bma_judge,ok,bma_judge_0.9
1517,SQUAD,llama3_8b_i_1.,5947,0.1,0.181235234559568,bleu,ok,bleu_0.1
1518,SQUAD,llama3_8b_i_1.,5947,0.2,0.14394195072561594,bleu,ok,bleu_0.2
1519,SQUAD,llama3_8b_i_1.,5947,0.30000000000000004,0.109011137360783,bleu,ok,bleu_0.30000000000000004
1520,SQUAD,llama3_8b_i_1.,5947,0.4,0.08420519743503206,bleu,ok,bleu_0.4
1521,SQUAD,llama3_8b_i_1.,5947,0.5,0.06395545055686803,bleu,ok,bleu_0.5
1522,SQUAD,llama3_8b_i_1.,5947,0.6,0.04708066149173135,bleu,ok,bleu_0.6
1523,SQUAD,llama3_8b_i_1.,5947,0.7000000000000001,0.03509956125548431,bleu,ok,bleu_0.7000000000000001
1524,SQUAD,llama3_8b_i_1.,5947,0.8,0.026155923050961864,bleu,ok,bleu_0.8
1525,SQUAD,llama3_8b_i_1.,5947,0.9,0.021262234222072223,bleu,ok,bleu_0.9
1526,SQUAD,llama3_8b_i_1.,5947,0.1,0.437900776240297,bleu_adapt,ok,bleu_adapt_0.1
1527,SQUAD,llama3_8b_i_1.,5947,0.2,0.34340195747553154,bleu_adapt,ok,bleu_adapt_0.2
1528,SQUAD,llama3_8b_i_1.,5947,0.30000000000000004,0.29294633817077287,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1529,SQUAD,llama3_8b_i_1.,5947,0.4,0.2521093486331421,bleu_adapt,ok,bleu_adapt_0.4
1530,SQUAD,llama3_8b_i_1.,5947,0.5,0.22105973675329058,bleu_adapt,ok,bleu_adapt_0.5
1531,SQUAD,llama3_8b_i_1.,5947,0.6,0.20047249409382384,bleu_adapt,ok,bleu_adapt_0.6
1532,SQUAD,llama3_8b_i_1.,5947,0.7000000000000001,0.1842726965912926,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1533,SQUAD,llama3_8b_i_1.,5947,0.8,0.17482281471481606,bleu_adapt,ok,bleu_adapt_0.8
1534,SQUAD,llama3_8b_i_1.,5947,0.9,0.16976037799527505,bleu_adapt,ok,bleu_adapt_0.9
1535,SQUAD,llama3_8b_i_1.,5945,0.1,0.9402834008097166,rouge1,ok,rouge1_0.1
1536,SQUAD,llama3_8b_i_1.,5945,0.2,0.8437921727395412,rouge1,ok,rouge1_0.2
1537,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.7493252361673415,rouge1,ok,rouge1_0.30000000000000004
1538,SQUAD,llama3_8b_i_1.,5945,0.4,0.6357962213225371,rouge1,ok,rouge1_0.4
1539,SQUAD,llama3_8b_i_1.,5945,0.5,0.5421727395411606,rouge1,ok,rouge1_0.5
1540,SQUAD,llama3_8b_i_1.,5945,0.6,0.4883603238866397,rouge1,ok,rouge1_0.6
1541,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.4104251012145749,rouge1,ok,rouge1_0.7000000000000001
1542,SQUAD,llama3_8b_i_1.,5945,0.8,0.35644399460188936,rouge1,ok,rouge1_0.8
1543,SQUAD,llama3_8b_i_1.,5945,0.9,0.32894736842105265,rouge1,ok,rouge1_0.9
1544,SQUAD,llama3_8b_i_1.,5945,0.1,0.6535087719298246,rouge2,ok,rouge2_0.1
1545,SQUAD,llama3_8b_i_1.,5945,0.2,0.5541497975708503,rouge2,ok,rouge2_0.2
1546,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.4750337381916329,rouge2,ok,rouge2_0.30000000000000004
1547,SQUAD,llama3_8b_i_1.,5945,0.4,0.3991228070175439,rouge2,ok,rouge2_0.4
1548,SQUAD,llama3_8b_i_1.,5945,0.5,0.33771929824561403,rouge2,ok,rouge2_0.5
1549,SQUAD,llama3_8b_i_1.,5945,0.6,0.29622132253711203,rouge2,ok,rouge2_0.6
1550,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.24409581646423753,rouge2,ok,rouge2_0.7000000000000001
1551,SQUAD,llama3_8b_i_1.,5945,0.8,0.2125506072874494,rouge2,ok,rouge2_0.8
1552,SQUAD,llama3_8b_i_1.,5945,0.9,0.19163292847503374,rouge2,ok,rouge2_0.9
1553,SQUAD,llama3_8b_i_1.,5945,0.1,0.9382591093117408,rougeL,ok,rougeL_0.1
1554,SQUAD,llama3_8b_i_1.,5945,0.2,0.8392375168690959,rougeL,ok,rougeL_0.2
1555,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.7451079622132254,rougeL,ok,rougeL_0.30000000000000004
1556,SQUAD,llama3_8b_i_1.,5945,0.4,0.62972334682861,rougeL,ok,rougeL_0.4
1557,SQUAD,llama3_8b_i_1.,5945,0.5,0.5386302294197031,rougeL,ok,rougeL_0.5
1558,SQUAD,llama3_8b_i_1.,5945,0.6,0.48346828609986503,rougeL,ok,rougeL_0.6
1559,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.40789473684210525,rougeL,ok,rougeL_0.7000000000000001
1560,SQUAD,llama3_8b_i_1.,5945,0.8,0.3547570850202429,rougeL,ok,rougeL_0.8
1561,SQUAD,llama3_8b_i_1.,5945,0.9,0.32776653171390013,rougeL,ok,rougeL_0.9
1562,SQUAD,llama3_8b_i_1.,5945,0.1,0.9382591093117408,rougeLsum,ok,rougeLsum_0.1
1563,SQUAD,llama3_8b_i_1.,5945,0.2,0.8392375168690959,rougeLsum,ok,rougeLsum_0.2
1564,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.7451079622132254,rougeLsum,ok,rougeLsum_0.30000000000000004
1565,SQUAD,llama3_8b_i_1.,5945,0.4,0.62972334682861,rougeLsum,ok,rougeLsum_0.4
1566,SQUAD,llama3_8b_i_1.,5945,0.5,0.5386302294197031,rougeLsum,ok,rougeLsum_0.5
1567,SQUAD,llama3_8b_i_1.,5945,0.6,0.48346828609986503,rougeLsum,ok,rougeLsum_0.6
1568,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.40789473684210525,rougeLsum,ok,rougeLsum_0.7000000000000001
1569,SQUAD,llama3_8b_i_1.,5945,0.8,0.3547570850202429,rougeLsum,ok,rougeLsum_0.8
1570,SQUAD,llama3_8b_i_1.,5945,0.9,0.32776653171390013,rougeLsum,ok,rougeLsum_0.9
1571,SQUAD,llama3_8b_i_1.,5945,0.1,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.1
1572,SQUAD,llama3_8b_i_1.,5945,0.2,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.2
1573,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.30000000000000004
1574,SQUAD,llama3_8b_i_1.,5945,0.4,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.4
1575,SQUAD,llama3_8b_i_1.,5945,0.5,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.5
1576,SQUAD,llama3_8b_i_1.,5945,0.6,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.6
1577,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9996626180836707,bert_score_f1,ok,bert_score_f1_0.7000000000000001
1578,SQUAD,llama3_8b_i_1.,5945,0.8,0.9649122807017544,bert_score_f1,ok,bert_score_f1_0.8
1579,SQUAD,llama3_8b_i_1.,5945,0.9,0.43100539811066124,bert_score_f1,ok,bert_score_f1_0.9
1580,SQUAD,llama3_8b_i_1.,5945,0.1,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
1581,SQUAD,llama3_8b_i_1.,5945,0.2,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
1582,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
1583,SQUAD,llama3_8b_i_1.,5945,0.4,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
1584,SQUAD,llama3_8b_i_1.,5945,0.5,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
1585,SQUAD,llama3_8b_i_1.,5945,0.6,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
1586,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
1587,SQUAD,llama3_8b_i_1.,5945,0.8,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
1588,SQUAD,llama3_8b_i_1.,5945,0.9,0.8943994601889339,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
1589,SQUAD,llama3_8b_i_1.,5945,0.1,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.1
1590,SQUAD,llama3_8b_i_1.,5945,0.2,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.2
1591,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.30000000000000004
1592,SQUAD,llama3_8b_i_1.,5945,0.4,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.4
1593,SQUAD,llama3_8b_i_1.,5945,0.5,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.5
1594,SQUAD,llama3_8b_i_1.,5945,0.6,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.6
1595,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.7000000000000001
1596,SQUAD,llama3_8b_i_1.,5945,0.8,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.8
1597,SQUAD,llama3_8b_i_1.,5945,0.9,0.9382591093117408,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.9
1598,SQUAD,llama3_8b_i_1.,5947,0.1,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.1
1599,SQUAD,llama3_8b_i_1.,5947,0.2,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.2
1600,SQUAD,llama3_8b_i_1.,5947,0.30000000000000004,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.30000000000000004
1601,SQUAD,llama3_8b_i_1.,5947,0.4,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.4
1602,SQUAD,llama3_8b_i_1.,5947,0.5,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.5
1603,SQUAD,llama3_8b_i_1.,5947,0.6,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.6
1604,SQUAD,llama3_8b_i_1.,5947,0.7000000000000001,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.7000000000000001
1605,SQUAD,llama3_8b_i_1.,5947,0.8,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.8
1606,SQUAD,llama3_8b_i_1.,5947,0.9,0.8771515356058049,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.9
1607,SQUAD,llama3_8b_i_1.,5945,0.1,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.1
1608,SQUAD,llama3_8b_i_1.,5945,0.2,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.2
1609,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.30000000000000004
1610,SQUAD,llama3_8b_i_1.,5945,0.4,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.4
1611,SQUAD,llama3_8b_i_1.,5945,0.5,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.5
1612,SQUAD,llama3_8b_i_1.,5945,0.6,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.6
1613,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.7000000000000001
1614,SQUAD,llama3_8b_i_1.,5945,0.8,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.8
1615,SQUAD,llama3_8b_i_1.,5945,0.9,0.9380904183535762,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.9
1616,SQUAD,llama3_8b_i_1.,5945,0.1,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
1617,SQUAD,llama3_8b_i_1.,5945,0.2,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
1618,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
1619,SQUAD,llama3_8b_i_1.,5945,0.4,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
1620,SQUAD,llama3_8b_i_1.,5945,0.5,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
1621,SQUAD,llama3_8b_i_1.,5945,0.6,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
1622,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
1623,SQUAD,llama3_8b_i_1.,5945,0.8,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
1624,SQUAD,llama3_8b_i_1.,5945,0.9,0.876855600539811,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
1625,SQUAD,llama3_8b_i_1.,6168,0.1,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.1
1626,SQUAD,llama3_8b_i_1.,6168,0.2,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.2
1627,SQUAD,llama3_8b_i_1.,6168,0.30000000000000004,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.30000000000000004
1628,SQUAD,llama3_8b_i_1.,6168,0.4,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.4
1629,SQUAD,llama3_8b_i_1.,6168,0.5,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.5
1630,SQUAD,llama3_8b_i_1.,6168,0.6,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.6
1631,SQUAD,llama3_8b_i_1.,6168,0.7000000000000001,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.7000000000000001
1632,SQUAD,llama3_8b_i_1.,6168,0.8,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.8
1633,SQUAD,llama3_8b_i_1.,6168,0.9,0.898159509202454,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.9
1634,SQUAD,llama3_8b_i_1.,5946,0.1,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.1
1635,SQUAD,llama3_8b_i_1.,5946,0.2,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.2
1636,SQUAD,llama3_8b_i_1.,5946,0.30000000000000004,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.30000000000000004
1637,SQUAD,llama3_8b_i_1.,5946,0.4,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.4
1638,SQUAD,llama3_8b_i_1.,5946,0.5,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.5
1639,SQUAD,llama3_8b_i_1.,5946,0.6,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.6
1640,SQUAD,llama3_8b_i_1.,5946,0.7000000000000001,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.7000000000000001
1641,SQUAD,llama3_8b_i_1.,5946,0.8,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.8
1642,SQUAD,llama3_8b_i_1.,5946,0.9,0.8861143917664923,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.9
1643,SQUAD,llama3_8b_i_1.,5945,0.1,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
1644,SQUAD,llama3_8b_i_1.,5945,0.2,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
1645,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
1646,SQUAD,llama3_8b_i_1.,5945,0.4,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
1647,SQUAD,llama3_8b_i_1.,5945,0.5,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
1648,SQUAD,llama3_8b_i_1.,5945,0.6,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
1649,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
1650,SQUAD,llama3_8b_i_1.,5945,0.8,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
1651,SQUAD,llama3_8b_i_1.,5945,0.9,0.9385964912280702,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
1652,SQUAD,llama3_8b_i_1.,5945,0.1,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
1653,SQUAD,llama3_8b_i_1.,5945,0.2,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
1654,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
1655,SQUAD,llama3_8b_i_1.,5945,0.4,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
1656,SQUAD,llama3_8b_i_1.,5945,0.5,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
1657,SQUAD,llama3_8b_i_1.,5945,0.6,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
1658,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
1659,SQUAD,llama3_8b_i_1.,5945,0.8,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
1660,SQUAD,llama3_8b_i_1.,5945,0.9,0.939608636977058,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
1661,SQUAD,llama3_8b_i_1.,5946,0.1,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
1662,SQUAD,llama3_8b_i_1.,5946,0.2,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
1663,SQUAD,llama3_8b_i_1.,5946,0.30000000000000004,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
1664,SQUAD,llama3_8b_i_1.,5946,0.4,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
1665,SQUAD,llama3_8b_i_1.,5946,0.5,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
1666,SQUAD,llama3_8b_i_1.,5946,0.6,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
1667,SQUAD,llama3_8b_i_1.,5946,0.7000000000000001,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
1668,SQUAD,llama3_8b_i_1.,5946,0.8,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
1669,SQUAD,llama3_8b_i_1.,5946,0.9,0.8791968955626792,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
1670,SQUAD,llama3_8b_i_1.,5945,0.1,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
1671,SQUAD,llama3_8b_i_1.,5945,0.2,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
1672,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
1673,SQUAD,llama3_8b_i_1.,5945,0.4,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
1674,SQUAD,llama3_8b_i_1.,5945,0.5,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
1675,SQUAD,llama3_8b_i_1.,5945,0.6,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
1676,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
1677,SQUAD,llama3_8b_i_1.,5945,0.8,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
1678,SQUAD,llama3_8b_i_1.,5945,0.9,0.8800607287449392,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
1679,SQUAD,llama3_8b_i_1.,5945,0.1,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.1
1680,SQUAD,llama3_8b_i_1.,5945,0.2,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.2
1681,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.30000000000000004
1682,SQUAD,llama3_8b_i_1.,5945,0.4,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.4
1683,SQUAD,llama3_8b_i_1.,5945,0.5,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.5
1684,SQUAD,llama3_8b_i_1.,5945,0.6,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.6
1685,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.7000000000000001
1686,SQUAD,llama3_8b_i_1.,5945,0.8,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.8
1687,SQUAD,llama3_8b_i_1.,5945,0.9,0.8776990553306343,j_llama8b_qa_16.0_0.49_2,ok,j_llama8b_qa_16.0_0.49_2_0.9
1688,SQUAD,llama3_8b_i_1.,5947,0.1,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.1
1689,SQUAD,llama3_8b_i_1.,5947,0.2,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.2
1690,SQUAD,llama3_8b_i_1.,5947,0.30000000000000004,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.30000000000000004
1691,SQUAD,llama3_8b_i_1.,5947,0.4,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.4
1692,SQUAD,llama3_8b_i_1.,5947,0.5,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.5
1693,SQUAD,llama3_8b_i_1.,5947,0.6,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.6
1694,SQUAD,llama3_8b_i_1.,5947,0.7000000000000001,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.7000000000000001
1695,SQUAD,llama3_8b_i_1.,5947,0.8,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.8
1696,SQUAD,llama3_8b_i_1.,5947,0.9,0.8773202834964563,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.9
1697,SQUAD,llama3_8b_i_1.,5949,0.1,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.1
1698,SQUAD,llama3_8b_i_1.,5949,0.2,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.2
1699,SQUAD,llama3_8b_i_1.,5949,0.30000000000000004,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.30000000000000004
1700,SQUAD,llama3_8b_i_1.,5949,0.4,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.4
1701,SQUAD,llama3_8b_i_1.,5949,0.5,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.5
1702,SQUAD,llama3_8b_i_1.,5949,0.6,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.6
1703,SQUAD,llama3_8b_i_1.,5949,0.7000000000000001,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.7000000000000001
1704,SQUAD,llama3_8b_i_1.,5949,0.8,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.8
1705,SQUAD,llama3_8b_i_1.,5949,0.9,0.9388926401080351,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.9
1706,SQUAD,llama3_8b_i_1.,0,0.1,0.5007159100480081,ood_label,ok,ood_label_0.1
1707,SQUAD,llama3_8b_i_1.,0,0.2,0.5007159100480081,ood_label,ok,ood_label_0.2
1708,SQUAD,llama3_8b_i_1.,0,0.30000000000000004,0.5007159100480081,ood_label,ok,ood_label_0.30000000000000004
1709,SQUAD,llama3_8b_i_1.,0,0.4,0.5007159100480081,ood_label,ok,ood_label_0.4
1710,SQUAD,llama3_8b_i_1.,0,0.5,0.5007159100480081,ood_label,ok,ood_label_0.5
1711,SQUAD,llama3_8b_i_1.,0,0.6,0.5007159100480081,ood_label,ok,ood_label_0.6
1712,SQUAD,llama3_8b_i_1.,0,0.7000000000000001,0.5007159100480081,ood_label,ok,ood_label_0.7000000000000001
1713,SQUAD,llama3_8b_i_1.,0,0.8,0.5007159100480081,ood_label,ok,ood_label_0.8
1714,SQUAD,llama3_8b_i_1.,0,0.9,0.5007159100480081,ood_label,ok,ood_label_0.9
1715,SQUAD,llama3_8b_i_1.,5945,0.1,0.9708164642375169,bma_judge_w8,ok,bma_judge_w8_0.1
1716,SQUAD,llama3_8b_i_1.,5945,0.2,0.9625506072874493,bma_judge_w8,ok,bma_judge_w8_0.2
1717,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9505735492577598,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1718,SQUAD,llama3_8b_i_1.,5945,0.4,0.9434885290148448,bma_judge_w8,ok,bma_judge_w8_0.4
1719,SQUAD,llama3_8b_i_1.,5945,0.5,0.9269568151147098,bma_judge_w8,ok,bma_judge_w8_0.5
1720,SQUAD,llama3_8b_i_1.,5945,0.6,0.9185222672064778,bma_judge_w8,ok,bma_judge_w8_0.6
1721,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9026653171390013,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1722,SQUAD,llama3_8b_i_1.,5945,0.8,0.8496963562753036,bma_judge_w8,ok,bma_judge_w8_0.8
1723,SQUAD,llama3_8b_i_1.,5945,0.9,0.7916666666666666,bma_judge_w8,ok,bma_judge_w8_0.9
1724,SQUAD,llama3_8b_i_1.,5945,0.1,0.956646423751687,bma_judge,ok,bma_judge_0.1
1725,SQUAD,llama3_8b_i_1.,5945,0.2,0.9446693657219973,bma_judge,ok,bma_judge_0.2
1726,SQUAD,llama3_8b_i_1.,5945,0.30000000000000004,0.9399460188933874,bma_judge,ok,bma_judge_0.30000000000000004
1727,SQUAD,llama3_8b_i_1.,5945,0.4,0.9399460188933874,bma_judge,ok,bma_judge_0.4
1728,SQUAD,llama3_8b_i_1.,5945,0.5,0.9379217273954116,bma_judge,ok,bma_judge_0.5
1729,SQUAD,llama3_8b_i_1.,5945,0.6,0.9342105263157895,bma_judge,ok,bma_judge_0.6
1730,SQUAD,llama3_8b_i_1.,5945,0.7000000000000001,0.9342105263157895,bma_judge,ok,bma_judge_0.7000000000000001
1731,SQUAD,llama3_8b_i_1.,5945,0.8,0.9011470985155196,bma_judge,ok,bma_judge_0.8
1732,SQUAD,llama3_8b_i_1.,5945,0.9,0.8380566801619433,bma_judge,ok,bma_judge_0.9
1733,COLLIE,llama3_8b_1.,2,0.1,0.07507218479307026,bleu,ok,bleu_0.1
1734,COLLIE,llama3_8b_1.,2,0.2,0.05582290664100096,bleu,ok,bleu_0.2
1735,COLLIE,llama3_8b_1.,2,0.30000000000000004,0.030798845043310877,bleu,ok,bleu_0.30000000000000004
1736,COLLIE,llama3_8b_1.,2,0.4,0.007218479307025986,bleu,ok,bleu_0.4
1737,COLLIE,llama3_8b_1.,2,0.5,0.0009624639076034649,bleu,ok,bleu_0.5
1738,COLLIE,llama3_8b_1.,2,0.6,0.00048123195380173246,bleu,ok,bleu_0.6
1739,COLLIE,llama3_8b_1.,2,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
1740,COLLIE,llama3_8b_1.,2,0.8,0.0,bleu,ok,bleu_0.8
1741,COLLIE,llama3_8b_1.,2,0.9,0.0,bleu,ok,bleu_0.9
1742,COLLIE,llama3_8b_1.,2,0.1,0.07507218479307026,bleu_adapt,ok,bleu_adapt_0.1
1743,COLLIE,llama3_8b_1.,2,0.2,0.05582290664100096,bleu_adapt,ok,bleu_adapt_0.2
1744,COLLIE,llama3_8b_1.,2,0.30000000000000004,0.030798845043310877,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1745,COLLIE,llama3_8b_1.,2,0.4,0.007218479307025986,bleu_adapt,ok,bleu_adapt_0.4
1746,COLLIE,llama3_8b_1.,2,0.5,0.0009624639076034649,bleu_adapt,ok,bleu_adapt_0.5
1747,COLLIE,llama3_8b_1.,2,0.6,0.00048123195380173246,bleu_adapt,ok,bleu_adapt_0.6
1748,COLLIE,llama3_8b_1.,2,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1749,COLLIE,llama3_8b_1.,2,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
1750,COLLIE,llama3_8b_1.,2,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
1751,COLLIE,llama3_8b_1.,0,0.1,0.25769230769230766,rouge1,ok,rouge1_0.1
1752,COLLIE,llama3_8b_1.,0,0.2,0.14711538461538462,rouge1,ok,rouge1_0.2
1753,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.08701923076923077,rouge1,ok,rouge1_0.30000000000000004
1754,COLLIE,llama3_8b_1.,0,0.4,0.06682692307692308,rouge1,ok,rouge1_0.4
1755,COLLIE,llama3_8b_1.,0,0.5,0.04326923076923077,rouge1,ok,rouge1_0.5
1756,COLLIE,llama3_8b_1.,0,0.6,0.01201923076923077,rouge1,ok,rouge1_0.6
1757,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0009615384615384616,rouge1,ok,rouge1_0.7000000000000001
1758,COLLIE,llama3_8b_1.,0,0.8,0.0004807692307692308,rouge1,ok,rouge1_0.8
1759,COLLIE,llama3_8b_1.,0,0.9,0.0,rouge1,ok,rouge1_0.9
1760,COLLIE,llama3_8b_1.,0,0.1,0.08942307692307692,rouge2,ok,rouge2_0.1
1761,COLLIE,llama3_8b_1.,0,0.2,0.07692307692307693,rouge2,ok,rouge2_0.2
1762,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.055288461538461536,rouge2,ok,rouge2_0.30000000000000004
1763,COLLIE,llama3_8b_1.,0,0.4,0.02451923076923077,rouge2,ok,rouge2_0.4
1764,COLLIE,llama3_8b_1.,0,0.5,0.006730769230769231,rouge2,ok,rouge2_0.5
1765,COLLIE,llama3_8b_1.,0,0.6,0.0014423076923076924,rouge2,ok,rouge2_0.6
1766,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0004807692307692308,rouge2,ok,rouge2_0.7000000000000001
1767,COLLIE,llama3_8b_1.,0,0.8,0.0004807692307692308,rouge2,ok,rouge2_0.8
1768,COLLIE,llama3_8b_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
1769,COLLIE,llama3_8b_1.,0,0.1,0.2125,rougeL,ok,rougeL_0.1
1770,COLLIE,llama3_8b_1.,0,0.2,0.08653846153846154,rougeL,ok,rougeL_0.2
1771,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.07115384615384615,rougeL,ok,rougeL_0.30000000000000004
1772,COLLIE,llama3_8b_1.,0,0.4,0.03653846153846154,rougeL,ok,rougeL_0.4
1773,COLLIE,llama3_8b_1.,0,0.5,0.007211538461538462,rougeL,ok,rougeL_0.5
1774,COLLIE,llama3_8b_1.,0,0.6,0.0019230769230769232,rougeL,ok,rougeL_0.6
1775,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0004807692307692308,rougeL,ok,rougeL_0.7000000000000001
1776,COLLIE,llama3_8b_1.,0,0.8,0.0004807692307692308,rougeL,ok,rougeL_0.8
1777,COLLIE,llama3_8b_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
1778,COLLIE,llama3_8b_1.,0,0.1,0.21298076923076922,rougeLsum,ok,rougeLsum_0.1
1779,COLLIE,llama3_8b_1.,0,0.2,0.08942307692307692,rougeLsum,ok,rougeLsum_0.2
1780,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.07596153846153846,rougeLsum,ok,rougeLsum_0.30000000000000004
1781,COLLIE,llama3_8b_1.,0,0.4,0.046634615384615385,rougeLsum,ok,rougeLsum_0.4
1782,COLLIE,llama3_8b_1.,0,0.5,0.017788461538461538,rougeLsum,ok,rougeLsum_0.5
1783,COLLIE,llama3_8b_1.,0,0.6,0.0038461538461538464,rougeLsum,ok,rougeLsum_0.6
1784,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0009615384615384616,rougeLsum,ok,rougeLsum_0.7000000000000001
1785,COLLIE,llama3_8b_1.,0,0.8,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.8
1786,COLLIE,llama3_8b_1.,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
1787,COLLIE,llama3_8b_1.,11,0.1,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
1788,COLLIE,llama3_8b_1.,11,0.2,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
1789,COLLIE,llama3_8b_1.,11,0.30000000000000004,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
1790,COLLIE,llama3_8b_1.,11,0.4,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
1791,COLLIE,llama3_8b_1.,11,0.5,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
1792,COLLIE,llama3_8b_1.,11,0.6,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
1793,COLLIE,llama3_8b_1.,11,0.7000000000000001,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
1794,COLLIE,llama3_8b_1.,11,0.8,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
1795,COLLIE,llama3_8b_1.,11,0.9,0.12131464475592073,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
1796,COLLIE,llama3_8b_1.,0,0.1,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
1797,COLLIE,llama3_8b_1.,0,0.2,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
1798,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
1799,COLLIE,llama3_8b_1.,0,0.4,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
1800,COLLIE,llama3_8b_1.,0,0.5,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
1801,COLLIE,llama3_8b_1.,0,0.6,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
1802,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
1803,COLLIE,llama3_8b_1.,0,0.8,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
1804,COLLIE,llama3_8b_1.,0,0.9,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
1805,COLLIE,llama3_8b_1.,0,0.1,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.1
1806,COLLIE,llama3_8b_1.,0,0.2,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.2
1807,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.30000000000000004
1808,COLLIE,llama3_8b_1.,0,0.4,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.4
1809,COLLIE,llama3_8b_1.,0,0.5,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.5
1810,COLLIE,llama3_8b_1.,0,0.6,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.6
1811,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.7000000000000001
1812,COLLIE,llama3_8b_1.,0,0.8,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.8
1813,COLLIE,llama3_8b_1.,0,0.9,0.046153846153846156,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.9
1814,COLLIE,llama3_8b_1.,0,0.1,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
1815,COLLIE,llama3_8b_1.,0,0.2,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
1816,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
1817,COLLIE,llama3_8b_1.,0,0.4,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
1818,COLLIE,llama3_8b_1.,0,0.5,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
1819,COLLIE,llama3_8b_1.,0,0.6,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
1820,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
1821,COLLIE,llama3_8b_1.,0,0.8,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
1822,COLLIE,llama3_8b_1.,0,0.9,0.03653846153846154,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
1823,COLLIE,llama3_8b_1.,0,0.1,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
1824,COLLIE,llama3_8b_1.,0,0.2,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
1825,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
1826,COLLIE,llama3_8b_1.,0,0.4,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
1827,COLLIE,llama3_8b_1.,0,0.5,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
1828,COLLIE,llama3_8b_1.,0,0.6,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
1829,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
1830,COLLIE,llama3_8b_1.,0,0.8,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
1831,COLLIE,llama3_8b_1.,0,0.9,0.004807692307692308,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
1832,COLLIE,llama3_8b_1.,2,0.1,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
1833,COLLIE,llama3_8b_1.,2,0.2,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
1834,COLLIE,llama3_8b_1.,2,0.30000000000000004,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
1835,COLLIE,llama3_8b_1.,2,0.4,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
1836,COLLIE,llama3_8b_1.,2,0.5,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
1837,COLLIE,llama3_8b_1.,2,0.6,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
1838,COLLIE,llama3_8b_1.,2,0.7000000000000001,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
1839,COLLIE,llama3_8b_1.,2,0.8,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
1840,COLLIE,llama3_8b_1.,2,0.9,0.0356111645813282,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
1841,COLLIE,llama3_8b_1.,2,0.1,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.1
1842,COLLIE,llama3_8b_1.,2,0.2,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.2
1843,COLLIE,llama3_8b_1.,2,0.30000000000000004,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.30000000000000004
1844,COLLIE,llama3_8b_1.,2,0.4,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.4
1845,COLLIE,llama3_8b_1.,2,0.5,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.5
1846,COLLIE,llama3_8b_1.,2,0.6,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.6
1847,COLLIE,llama3_8b_1.,2,0.7000000000000001,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.7000000000000001
1848,COLLIE,llama3_8b_1.,2,0.8,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.8
1849,COLLIE,llama3_8b_1.,2,0.9,0.0726660250240616,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.9
1850,COLLIE,llama3_8b_1.,7,0.1,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.1
1851,COLLIE,llama3_8b_1.,7,0.2,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.2
1852,COLLIE,llama3_8b_1.,7,0.30000000000000004,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.30000000000000004
1853,COLLIE,llama3_8b_1.,7,0.4,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.4
1854,COLLIE,llama3_8b_1.,7,0.5,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.5
1855,COLLIE,llama3_8b_1.,7,0.6,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.6
1856,COLLIE,llama3_8b_1.,7,0.7000000000000001,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.7000000000000001
1857,COLLIE,llama3_8b_1.,7,0.8,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.8
1858,COLLIE,llama3_8b_1.,7,0.9,0.10998552821997105,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.9
1859,COLLIE,llama3_8b_1.,0,0.5,0.14471153846153847,exact_correctness,ok,exact_correctness_0.5
1860,COLLIE,llama3_8b_1.,0,0.1,0.23798076923076922,bma_judge_w8,ok,bma_judge_w8_0.1
1861,COLLIE,llama3_8b_1.,0,0.2,0.09807692307692308,bma_judge_w8,ok,bma_judge_w8_0.2
1862,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.05144230769230769,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1863,COLLIE,llama3_8b_1.,0,0.4,0.025,bma_judge_w8,ok,bma_judge_w8_0.4
1864,COLLIE,llama3_8b_1.,0,0.5,0.009615384615384616,bma_judge_w8,ok,bma_judge_w8_0.5
1865,COLLIE,llama3_8b_1.,0,0.6,0.009615384615384616,bma_judge_w8,ok,bma_judge_w8_0.6
1866,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.004326923076923077,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1867,COLLIE,llama3_8b_1.,0,0.8,0.0014423076923076924,bma_judge_w8,ok,bma_judge_w8_0.8
1868,COLLIE,llama3_8b_1.,0,0.9,0.0004807692307692308,bma_judge_w8,ok,bma_judge_w8_0.9
1869,COLLIE,llama3_8b_1.,0,0.1,0.09375,bma_judge,ok,bma_judge_0.1
1870,COLLIE,llama3_8b_1.,0,0.2,0.06298076923076923,bma_judge,ok,bma_judge_0.2
1871,COLLIE,llama3_8b_1.,0,0.30000000000000004,0.06298076923076923,bma_judge,ok,bma_judge_0.30000000000000004
1872,COLLIE,llama3_8b_1.,0,0.4,0.028365384615384615,bma_judge,ok,bma_judge_0.4
1873,COLLIE,llama3_8b_1.,0,0.5,0.008653846153846154,bma_judge,ok,bma_judge_0.5
1874,COLLIE,llama3_8b_1.,0,0.6,0.008653846153846154,bma_judge,ok,bma_judge_0.6
1875,COLLIE,llama3_8b_1.,0,0.7000000000000001,0.0028846153846153848,bma_judge,ok,bma_judge_0.7000000000000001
1876,COLLIE,llama3_8b_1.,0,0.8,0.0028846153846153848,bma_judge,ok,bma_judge_0.8
1877,COLLIE,llama3_8b_1.,0,0.9,0.0004807692307692308,bma_judge,ok,bma_judge_0.9
1878,COLLIE,falcon_mamba_i_1.,0,0.1,0.05096153846153846,bleu,ok,bleu_0.1
1879,COLLIE,falcon_mamba_i_1.,0,0.2,0.029326923076923077,bleu,ok,bleu_0.2
1880,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.010576923076923078,bleu,ok,bleu_0.30000000000000004
1881,COLLIE,falcon_mamba_i_1.,0,0.4,0.0009615384615384616,bleu,ok,bleu_0.4
1882,COLLIE,falcon_mamba_i_1.,0,0.5,0.0004807692307692308,bleu,ok,bleu_0.5
1883,COLLIE,falcon_mamba_i_1.,0,0.6,0.0004807692307692308,bleu,ok,bleu_0.6
1884,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
1885,COLLIE,falcon_mamba_i_1.,0,0.8,0.0,bleu,ok,bleu_0.8
1886,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,bleu,ok,bleu_0.9
1887,COLLIE,falcon_mamba_i_1.,0,0.1,0.05432692307692308,bleu_adapt,ok,bleu_adapt_0.1
1888,COLLIE,falcon_mamba_i_1.,0,0.2,0.02980769230769231,bleu_adapt,ok,bleu_adapt_0.2
1889,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.010576923076923078,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1890,COLLIE,falcon_mamba_i_1.,0,0.4,0.0009615384615384616,bleu_adapt,ok,bleu_adapt_0.4
1891,COLLIE,falcon_mamba_i_1.,0,0.5,0.0004807692307692308,bleu_adapt,ok,bleu_adapt_0.5
1892,COLLIE,falcon_mamba_i_1.,0,0.6,0.0004807692307692308,bleu_adapt,ok,bleu_adapt_0.6
1893,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1894,COLLIE,falcon_mamba_i_1.,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
1895,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
1896,COLLIE,falcon_mamba_i_1.,0,0.1,0.33557692307692305,rouge1,ok,rouge1_0.1
1897,COLLIE,falcon_mamba_i_1.,0,0.2,0.13028846153846155,rouge1,ok,rouge1_0.2
1898,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.06634615384615385,rouge1,ok,rouge1_0.30000000000000004
1899,COLLIE,falcon_mamba_i_1.,0,0.4,0.0375,rouge1,ok,rouge1_0.4
1900,COLLIE,falcon_mamba_i_1.,0,0.5,0.017788461538461538,rouge1,ok,rouge1_0.5
1901,COLLIE,falcon_mamba_i_1.,0,0.6,0.002403846153846154,rouge1,ok,rouge1_0.6
1902,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0004807692307692308,rouge1,ok,rouge1_0.7000000000000001
1903,COLLIE,falcon_mamba_i_1.,0,0.8,0.0004807692307692308,rouge1,ok,rouge1_0.8
1904,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,rouge1,ok,rouge1_0.9
1905,COLLIE,falcon_mamba_i_1.,0,0.1,0.07836538461538461,rouge2,ok,rouge2_0.1
1906,COLLIE,falcon_mamba_i_1.,0,0.2,0.05096153846153846,rouge2,ok,rouge2_0.2
1907,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.027403846153846154,rouge2,ok,rouge2_0.30000000000000004
1908,COLLIE,falcon_mamba_i_1.,0,0.4,0.010096153846153847,rouge2,ok,rouge2_0.4
1909,COLLIE,falcon_mamba_i_1.,0,0.5,0.0033653846153846156,rouge2,ok,rouge2_0.5
1910,COLLIE,falcon_mamba_i_1.,0,0.6,0.0004807692307692308,rouge2,ok,rouge2_0.6
1911,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0004807692307692308,rouge2,ok,rouge2_0.7000000000000001
1912,COLLIE,falcon_mamba_i_1.,0,0.8,0.0004807692307692308,rouge2,ok,rouge2_0.8
1913,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
1914,COLLIE,falcon_mamba_i_1.,0,0.1,0.2658653846153846,rougeL,ok,rougeL_0.1
1915,COLLIE,falcon_mamba_i_1.,0,0.2,0.08798076923076924,rougeL,ok,rougeL_0.2
1916,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.04567307692307692,rougeL,ok,rougeL_0.30000000000000004
1917,COLLIE,falcon_mamba_i_1.,0,0.4,0.01730769230769231,rougeL,ok,rougeL_0.4
1918,COLLIE,falcon_mamba_i_1.,0,0.5,0.0038461538461538464,rougeL,ok,rougeL_0.5
1919,COLLIE,falcon_mamba_i_1.,0,0.6,0.0014423076923076924,rougeL,ok,rougeL_0.6
1920,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0004807692307692308,rougeL,ok,rougeL_0.7000000000000001
1921,COLLIE,falcon_mamba_i_1.,0,0.8,0.0004807692307692308,rougeL,ok,rougeL_0.8
1922,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
1923,COLLIE,falcon_mamba_i_1.,0,0.1,0.25721153846153844,rougeLsum,ok,rougeLsum_0.1
1924,COLLIE,falcon_mamba_i_1.,0,0.2,0.10096153846153846,rougeLsum,ok,rougeLsum_0.2
1925,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.05625,rougeLsum,ok,rougeLsum_0.30000000000000004
1926,COLLIE,falcon_mamba_i_1.,0,0.4,0.027884615384615386,rougeLsum,ok,rougeLsum_0.4
1927,COLLIE,falcon_mamba_i_1.,0,0.5,0.009615384615384616,rougeLsum,ok,rougeLsum_0.5
1928,COLLIE,falcon_mamba_i_1.,0,0.6,0.0014423076923076924,rougeLsum,ok,rougeLsum_0.6
1929,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.7000000000000001
1930,COLLIE,falcon_mamba_i_1.,0,0.8,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.8
1931,COLLIE,falcon_mamba_i_1.,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
1932,COLLIE,falcon_mamba_i_1.,0,0.1,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
1933,COLLIE,falcon_mamba_i_1.,0,0.2,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
1934,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
1935,COLLIE,falcon_mamba_i_1.,0,0.4,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
1936,COLLIE,falcon_mamba_i_1.,0,0.5,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
1937,COLLIE,falcon_mamba_i_1.,0,0.6,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
1938,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
1939,COLLIE,falcon_mamba_i_1.,0,0.8,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
1940,COLLIE,falcon_mamba_i_1.,0,0.9,0.08605769230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
1941,COLLIE,falcon_mamba_i_1.,34,0.1,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
1942,COLLIE,falcon_mamba_i_1.,34,0.2,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
1943,COLLIE,falcon_mamba_i_1.,34,0.30000000000000004,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
1944,COLLIE,falcon_mamba_i_1.,34,0.4,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
1945,COLLIE,falcon_mamba_i_1.,34,0.5,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
1946,COLLIE,falcon_mamba_i_1.,34,0.6,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
1947,COLLIE,falcon_mamba_i_1.,34,0.7000000000000001,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
1948,COLLIE,falcon_mamba_i_1.,34,0.8,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
1949,COLLIE,falcon_mamba_i_1.,34,0.9,0.13147605083088953,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
1950,COLLIE,falcon_mamba_i_1.,0,0.5,0.1658653846153846,exact_correctness,ok,exact_correctness_0.5
1951,COLLIE,falcon_mamba_i_1.,0,0.1,0.17115384615384616,bma_judge_w8,ok,bma_judge_w8_0.1
1952,COLLIE,falcon_mamba_i_1.,0,0.2,0.17115384615384616,bma_judge_w8,ok,bma_judge_w8_0.2
1953,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.17115384615384616,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
1954,COLLIE,falcon_mamba_i_1.,0,0.4,0.17115384615384616,bma_judge_w8,ok,bma_judge_w8_0.4
1955,COLLIE,falcon_mamba_i_1.,0,0.5,0.04471153846153846,bma_judge_w8,ok,bma_judge_w8_0.5
1956,COLLIE,falcon_mamba_i_1.,0,0.6,0.04471153846153846,bma_judge_w8,ok,bma_judge_w8_0.6
1957,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.04471153846153846,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
1958,COLLIE,falcon_mamba_i_1.,0,0.8,0.04471153846153846,bma_judge_w8,ok,bma_judge_w8_0.8
1959,COLLIE,falcon_mamba_i_1.,0,0.9,0.04471153846153846,bma_judge_w8,ok,bma_judge_w8_0.9
1960,COLLIE,falcon_mamba_i_1.,0,0.1,0.08605769230769231,bma_judge,ok,bma_judge_0.1
1961,COLLIE,falcon_mamba_i_1.,0,0.2,0.08605769230769231,bma_judge,ok,bma_judge_0.2
1962,COLLIE,falcon_mamba_i_1.,0,0.30000000000000004,0.08605769230769231,bma_judge,ok,bma_judge_0.30000000000000004
1963,COLLIE,falcon_mamba_i_1.,0,0.4,0.08605769230769231,bma_judge,ok,bma_judge_0.4
1964,COLLIE,falcon_mamba_i_1.,0,0.5,0.08605769230769231,bma_judge,ok,bma_judge_0.5
1965,COLLIE,falcon_mamba_i_1.,0,0.6,0.08605769230769231,bma_judge,ok,bma_judge_0.6
1966,COLLIE,falcon_mamba_i_1.,0,0.7000000000000001,0.08605769230769231,bma_judge,ok,bma_judge_0.7000000000000001
1967,COLLIE,falcon_mamba_i_1.,0,0.8,0.08605769230769231,bma_judge,ok,bma_judge_0.8
1968,COLLIE,falcon_mamba_i_1.,0,0.9,0.08605769230769231,bma_judge,ok,bma_judge_0.9
1969,COQA,llama3_70b_1.,75,0.1,0.057283763277693474,bleu,ok,bleu_0.1
1970,COQA,llama3_70b_1.,75,0.2,0.04122407688416793,bleu,ok,bleu_0.2
1971,COQA,llama3_70b_1.,75,0.30000000000000004,0.030854830551340414,bleu,ok,bleu_0.30000000000000004
1972,COQA,llama3_70b_1.,75,0.4,0.023899848254931716,bleu,ok,bleu_0.4
1973,COQA,llama3_70b_1.,75,0.5,0.019221041982802226,bleu,ok,bleu_0.5
1974,COQA,llama3_70b_1.,75,0.6,0.015048052604957006,bleu,ok,bleu_0.6
1975,COQA,llama3_70b_1.,75,0.7000000000000001,0.011380880121396054,bleu,ok,bleu_0.7000000000000001
1976,COQA,llama3_70b_1.,75,0.8,0.008219524532119373,bleu,ok,bleu_0.8
1977,COQA,llama3_70b_1.,75,0.9,0.00556398583712696,bleu,ok,bleu_0.9
1978,COQA,llama3_70b_1.,75,0.1,0.10103692463328275,bleu_adapt,ok,bleu_adapt_0.1
1979,COQA,llama3_70b_1.,75,0.2,0.057157309054122404,bleu_adapt,ok,bleu_adapt_0.2
1980,COQA,llama3_70b_1.,75,0.30000000000000004,0.04274152756702074,bleu_adapt,ok,bleu_adapt_0.30000000000000004
1981,COQA,llama3_70b_1.,75,0.4,0.032751643904906425,bleu_adapt,ok,bleu_adapt_0.4
1982,COQA,llama3_70b_1.,75,0.5,0.026302478502781994,bleu_adapt,ok,bleu_adapt_0.5
1983,COQA,llama3_70b_1.,75,0.6,0.021244309559939303,bleu_adapt,ok,bleu_adapt_0.6
1984,COQA,llama3_70b_1.,75,0.7000000000000001,0.01618614061709661,bleu_adapt,ok,bleu_adapt_0.7000000000000001
1985,COQA,llama3_70b_1.,75,0.8,0.012898330804248861,bleu_adapt,ok,bleu_adapt_0.8
1986,COQA,llama3_70b_1.,75,0.9,0.01024279210925645,bleu_adapt,ok,bleu_adapt_0.9
1987,COQA,llama3_70b_1.,0,0.1,0.3598897657522235,rouge1,ok,rouge1_0.1
1988,COQA,llama3_70b_1.,0,0.2,0.2277339346110485,rouge1,ok,rouge1_0.2
1989,COQA,llama3_70b_1.,0,0.30000000000000004,0.1642239759488914,rouge1,ok,rouge1_0.30000000000000004
1990,COQA,llama3_70b_1.,0,0.4,0.12163347112614305,rouge1,ok,rouge1_0.4
1991,COQA,llama3_70b_1.,0,0.5,0.0953275710885632,rouge1,ok,rouge1_0.5
1992,COQA,llama3_70b_1.,0,0.6,0.0777903043968433,rouge1,ok,rouge1_0.6
1993,COQA,llama3_70b_1.,0,0.7000000000000001,0.06300889389953651,rouge1,ok,rouge1_0.7000000000000001
1994,COQA,llama3_70b_1.,0,0.8,0.051484404359263436,rouge1,ok,rouge1_0.8
1995,COQA,llama3_70b_1.,0,0.9,0.04309156958536891,rouge1,ok,rouge1_0.9
1996,COQA,llama3_70b_1.,0,0.1,0.20719027934360515,rouge2,ok,rouge2_0.1
1997,COQA,llama3_70b_1.,0,0.2,0.13879493924589753,rouge2,ok,rouge2_0.2
1998,COQA,llama3_70b_1.,0,0.30000000000000004,0.10622572967556057,rouge2,ok,rouge2_0.30000000000000004
1999,COQA,llama3_70b_1.,0,0.4,0.08305148440435926,rouge2,ok,rouge2_0.4
2000,COQA,llama3_70b_1.,0,0.5,0.0647626205687085,rouge2,ok,rouge2_0.5
2001,COQA,llama3_70b_1.,0,0.6,0.054365526744331705,rouge2,ok,rouge2_0.6
2002,COQA,llama3_70b_1.,0,0.7000000000000001,0.043592634347989476,rouge2,ok,rouge2_0.7000000000000001
2003,COQA,llama3_70b_1.,0,0.8,0.035450331955405236,rouge2,ok,rouge2_0.8
2004,COQA,llama3_70b_1.,0,0.9,0.03156708004509583,rouge2,ok,rouge2_0.9
2005,COQA,llama3_70b_1.,0,0.1,0.3555054490792935,rougeL,ok,rougeL_0.1
2006,COQA,llama3_70b_1.,0,0.2,0.22585494175122134,rougeL,ok,rougeL_0.2
2007,COQA,llama3_70b_1.,0,0.30000000000000004,0.1622197168984091,rougeL,ok,rougeL_0.30000000000000004
2008,COQA,llama3_70b_1.,0,0.4,0.12000501064762621,rougeL,ok,rougeL_0.4
2009,COQA,llama3_70b_1.,0,0.5,0.09457597394463234,rougeL,ok,rougeL_0.5
2010,COQA,llama3_70b_1.,0,0.6,0.07728923963422273,rougeL,ok,rougeL_0.6
2011,COQA,llama3_70b_1.,0,0.7000000000000001,0.06250782913691595,rougeL,ok,rougeL_0.7000000000000001
2012,COQA,llama3_70b_1.,0,0.8,0.051484404359263436,rougeL,ok,rougeL_0.8
2013,COQA,llama3_70b_1.,0,0.9,0.04309156958536891,rougeL,ok,rougeL_0.9
2014,COQA,llama3_70b_1.,0,0.1,0.35600651384191406,rougeLsum,ok,rougeLsum_0.1
2015,COQA,llama3_70b_1.,0,0.2,0.22598020794187648,rougeLsum,ok,rougeLsum_0.2
2016,COQA,llama3_70b_1.,0,0.30000000000000004,0.1622197168984091,rougeLsum,ok,rougeLsum_0.30000000000000004
2017,COQA,llama3_70b_1.,0,0.4,0.12000501064762621,rougeLsum,ok,rougeLsum_0.4
2018,COQA,llama3_70b_1.,0,0.5,0.09457597394463234,rougeLsum,ok,rougeLsum_0.5
2019,COQA,llama3_70b_1.,0,0.6,0.07728923963422273,rougeLsum,ok,rougeLsum_0.6
2020,COQA,llama3_70b_1.,0,0.7000000000000001,0.06250782913691595,rougeLsum,ok,rougeLsum_0.7000000000000001
2021,COQA,llama3_70b_1.,0,0.8,0.051484404359263436,rougeLsum,ok,rougeLsum_0.8
2022,COQA,llama3_70b_1.,0,0.9,0.04309156958536891,rougeLsum,ok,rougeLsum_0.9
2023,COQA,llama3_70b_1.,0,0.1,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.1
2024,COQA,llama3_70b_1.,0,0.2,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.2
2025,COQA,llama3_70b_1.,0,0.30000000000000004,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.30000000000000004
2026,COQA,llama3_70b_1.,0,0.4,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.4
2027,COQA,llama3_70b_1.,0,0.5,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.5
2028,COQA,llama3_70b_1.,0,0.6,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.6
2029,COQA,llama3_70b_1.,0,0.7000000000000001,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.7000000000000001
2030,COQA,llama3_70b_1.,0,0.8,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.8
2031,COQA,llama3_70b_1.,0,0.9,0.4530878115996493,j_llama8b_qa_16_0.5,ok,j_llama8b_qa_16_0.5_0.9
2032,COQA,llama3_70b_1.,2,0.1,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
2033,COQA,llama3_70b_1.,2,0.2,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
2034,COQA,llama3_70b_1.,2,0.30000000000000004,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
2035,COQA,llama3_70b_1.,2,0.4,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
2036,COQA,llama3_70b_1.,2,0.5,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
2037,COQA,llama3_70b_1.,2,0.6,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
2038,COQA,llama3_70b_1.,2,0.7000000000000001,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
2039,COQA,llama3_70b_1.,2,0.8,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
2040,COQA,llama3_70b_1.,2,0.9,0.3827841122666333,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
2041,COQA,llama3_70b_1.,222,0.1,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
2042,COQA,llama3_70b_1.,222,0.2,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
2043,COQA,llama3_70b_1.,222,0.30000000000000004,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
2044,COQA,llama3_70b_1.,222,0.4,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
2045,COQA,llama3_70b_1.,222,0.5,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
2046,COQA,llama3_70b_1.,222,0.6,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
2047,COQA,llama3_70b_1.,222,0.7000000000000001,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
2048,COQA,llama3_70b_1.,222,0.8,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
2049,COQA,llama3_70b_1.,222,0.9,0.3446720783404201,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
2050,COQA,llama3_70b_1.,0,0.1,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
2051,COQA,llama3_70b_1.,0,0.2,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
2052,COQA,llama3_70b_1.,0,0.30000000000000004,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
2053,COQA,llama3_70b_1.,0,0.4,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
2054,COQA,llama3_70b_1.,0,0.5,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
2055,COQA,llama3_70b_1.,0,0.6,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
2056,COQA,llama3_70b_1.,0,0.7000000000000001,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
2057,COQA,llama3_70b_1.,0,0.8,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
2058,COQA,llama3_70b_1.,0,0.9,0.3664036076662909,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
2059,COQA,llama3_70b_1.,0,0.1,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
2060,COQA,llama3_70b_1.,0,0.2,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
2061,COQA,llama3_70b_1.,0,0.30000000000000004,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
2062,COQA,llama3_70b_1.,0,0.4,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
2063,COQA,llama3_70b_1.,0,0.5,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
2064,COQA,llama3_70b_1.,0,0.6,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
2065,COQA,llama3_70b_1.,0,0.7000000000000001,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
2066,COQA,llama3_70b_1.,0,0.8,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
2067,COQA,llama3_70b_1.,0,0.9,0.523362144557184,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
2068,COQA,llama3_70b_1.,0,0.1,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
2069,COQA,llama3_70b_1.,0,0.2,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
2070,COQA,llama3_70b_1.,0,0.30000000000000004,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
2071,COQA,llama3_70b_1.,0,0.4,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
2072,COQA,llama3_70b_1.,0,0.5,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
2073,COQA,llama3_70b_1.,0,0.6,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
2074,COQA,llama3_70b_1.,0,0.7000000000000001,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
2075,COQA,llama3_70b_1.,0,0.8,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
2076,COQA,llama3_70b_1.,0,0.9,0.7340598772391331,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
2077,COQA,llama3_70b_1.,0,0.1,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.1
2078,COQA,llama3_70b_1.,0,0.2,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.2
2079,COQA,llama3_70b_1.,0,0.30000000000000004,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.30000000000000004
2080,COQA,llama3_70b_1.,0,0.4,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.4
2081,COQA,llama3_70b_1.,0,0.5,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.5
2082,COQA,llama3_70b_1.,0,0.6,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.6
2083,COQA,llama3_70b_1.,0,0.7000000000000001,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.7000000000000001
2084,COQA,llama3_70b_1.,0,0.8,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.8
2085,COQA,llama3_70b_1.,0,0.9,0.45008142302392584,j_llama8b_qa_16_0.49,ok,j_llama8b_qa_16_0.49_0.9
2086,COQA,llama3_70b_1.,0,0.1,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.1
2087,COQA,llama3_70b_1.,0,0.2,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.2
2088,COQA,llama3_70b_1.,0,0.30000000000000004,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.30000000000000004
2089,COQA,llama3_70b_1.,0,0.4,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.4
2090,COQA,llama3_70b_1.,0,0.5,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.5
2091,COQA,llama3_70b_1.,0,0.6,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.6
2092,COQA,llama3_70b_1.,0,0.7000000000000001,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.7000000000000001
2093,COQA,llama3_70b_1.,0,0.8,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.8
2094,COQA,llama3_70b_1.,0,0.9,0.7349367405737192,j_llama70b_qa_16_0.49,ok,j_llama70b_qa_16_0.49_0.9
2095,COQA,llama3_70b_1.,0,0.1,0.8201177502192158,bma_judge_w8,ok,bma_judge_w8_0.1
2096,COQA,llama3_70b_1.,0,0.2,0.7791557058749844,bma_judge_w8,ok,bma_judge_w8_0.2
2097,COQA,llama3_70b_1.,0,0.30000000000000004,0.6495051985469122,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
2098,COQA,llama3_70b_1.,0,0.4,0.5325065764750094,bma_judge_w8,ok,bma_judge_w8_0.4
2099,COQA,llama3_70b_1.,0,0.5,0.43442314919203306,bma_judge_w8,ok,bma_judge_w8_0.5
2100,COQA,llama3_70b_1.,0,0.6,0.42991356632844796,bma_judge_w8,ok,bma_judge_w8_0.6
2101,COQA,llama3_70b_1.,0,0.7000000000000001,0.3492421395465364,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
2102,COQA,llama3_70b_1.,0,0.8,0.2610547413253163,bma_judge_w8,ok,bma_judge_w8_0.8
2103,COQA,llama3_70b_1.,0,0.9,0.17211574596016535,bma_judge_w8,ok,bma_judge_w8_0.9
2104,COQA,llama3_70b_1.,0,0.1,0.78416635350119,bma_judge,ok,bma_judge_0.1
2105,COQA,llama3_70b_1.,0,0.2,0.7520982086934737,bma_judge,ok,bma_judge_0.2
2106,COQA,llama3_70b_1.,0,0.30000000000000004,0.7520982086934737,bma_judge,ok,bma_judge_0.30000000000000004
2107,COQA,llama3_70b_1.,0,0.4,0.5743454841538269,bma_judge,ok,bma_judge_0.4
2108,COQA,llama3_70b_1.,0,0.5,0.4291619691845171,bma_judge,ok,bma_judge_0.5
2109,COQA,llama3_70b_1.,0,0.6,0.42402605536765625,bma_judge,ok,bma_judge_0.6
2110,COQA,llama3_70b_1.,0,0.7000000000000001,0.32631842665664534,bma_judge,ok,bma_judge_0.7000000000000001
2111,COQA,llama3_70b_1.,0,0.8,0.32118251283978455,bma_judge,ok,bma_judge_0.8
2112,COQA,llama3_70b_1.,0,0.9,0.22648127270449706,bma_judge,ok,bma_judge_0.9
2113,SQUAD,llama3_8b_1.,5948,0.1,0.09586497890295359,bleu,ok,bleu_0.1
2114,SQUAD,llama3_8b_1.,5948,0.2,0.0519831223628692,bleu,ok,bleu_0.2
2115,SQUAD,llama3_8b_1.,5948,0.30000000000000004,0.032067510548523206,bleu,ok,bleu_0.30000000000000004
2116,SQUAD,llama3_8b_1.,5948,0.4,0.02109704641350211,bleu,ok,bleu_0.4
2117,SQUAD,llama3_8b_1.,5948,0.5,0.01350210970464135,bleu,ok,bleu_0.5
2118,SQUAD,llama3_8b_1.,5948,0.6,0.00810126582278481,bleu,ok,bleu_0.6
2119,SQUAD,llama3_8b_1.,5948,0.7000000000000001,0.005063291139240506,bleu,ok,bleu_0.7000000000000001
2120,SQUAD,llama3_8b_1.,5948,0.8,0.0033755274261603376,bleu,ok,bleu_0.8
2121,SQUAD,llama3_8b_1.,5948,0.9,0.0016877637130801688,bleu,ok,bleu_0.9
2122,SQUAD,llama3_8b_1.,5948,0.1,0.11983122362869199,bleu_adapt,ok,bleu_adapt_0.1
2123,SQUAD,llama3_8b_1.,5948,0.2,0.0540084388185654,bleu_adapt,ok,bleu_adapt_0.2
2124,SQUAD,llama3_8b_1.,5948,0.30000000000000004,0.03257383966244726,bleu_adapt,ok,bleu_adapt_0.30000000000000004
2125,SQUAD,llama3_8b_1.,5948,0.4,0.02160337552742616,bleu_adapt,ok,bleu_adapt_0.4
2126,SQUAD,llama3_8b_1.,5948,0.5,0.014008438818565401,bleu_adapt,ok,bleu_adapt_0.5
2127,SQUAD,llama3_8b_1.,5948,0.6,0.008607594936708861,bleu_adapt,ok,bleu_adapt_0.6
2128,SQUAD,llama3_8b_1.,5948,0.7000000000000001,0.00540084388185654,bleu_adapt,ok,bleu_adapt_0.7000000000000001
2129,SQUAD,llama3_8b_1.,5948,0.8,0.0037130801687763715,bleu_adapt,ok,bleu_adapt_0.8
2130,SQUAD,llama3_8b_1.,5948,0.9,0.0020253164556962027,bleu_adapt,ok,bleu_adapt_0.9
2131,SQUAD,llama3_8b_1.,5945,0.1,0.44129554655870445,rouge1,ok,rouge1_0.1
2132,SQUAD,llama3_8b_1.,5945,0.2,0.24578272604588394,rouge1,ok,rouge1_0.2
2133,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.15215924426450742,rouge1,ok,rouge1_0.30000000000000004
2134,SQUAD,llama3_8b_1.,5945,0.4,0.0907557354925776,rouge1,ok,rouge1_0.4
2135,SQUAD,llama3_8b_1.,5945,0.5,0.05819838056680162,rouge1,ok,rouge1_0.5
2136,SQUAD,llama3_8b_1.,5945,0.6,0.038461538461538464,rouge1,ok,rouge1_0.6
2137,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.022941970310391364,rouge1,ok,rouge1_0.7000000000000001
2138,SQUAD,llama3_8b_1.,5945,0.8,0.013663967611336033,rouge1,ok,rouge1_0.8
2139,SQUAD,llama3_8b_1.,5945,0.9,0.008771929824561403,rouge1,ok,rouge1_0.9
2140,SQUAD,llama3_8b_1.,5945,0.1,0.2974021592442645,rouge2,ok,rouge2_0.1
2141,SQUAD,llama3_8b_1.,5945,0.2,0.17223346828609987,rouge2,ok,rouge2_0.2
2142,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.10998650472334683,rouge2,ok,rouge2_0.30000000000000004
2143,SQUAD,llama3_8b_1.,5945,0.4,0.06882591093117409,rouge2,ok,rouge2_0.4
2144,SQUAD,llama3_8b_1.,5945,0.5,0.04706477732793522,rouge2,ok,rouge2_0.5
2145,SQUAD,llama3_8b_1.,5945,0.6,0.031545209176788123,rouge2,ok,rouge2_0.6
2146,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.019230769230769232,rouge2,ok,rouge2_0.7000000000000001
2147,SQUAD,llama3_8b_1.,5945,0.8,0.012483130904183536,rouge2,ok,rouge2_0.8
2148,SQUAD,llama3_8b_1.,5945,0.9,0.008434547908232119,rouge2,ok,rouge2_0.9
2149,SQUAD,llama3_8b_1.,5945,0.1,0.43606612685560053,rougeL,ok,rougeL_0.1
2150,SQUAD,llama3_8b_1.,5945,0.2,0.24055330634278002,rougeL,ok,rougeL_0.2
2151,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.1496288798920378,rougeL,ok,rougeL_0.30000000000000004
2152,SQUAD,llama3_8b_1.,5945,0.4,0.09024966261808368,rougeL,ok,rougeL_0.4
2153,SQUAD,llama3_8b_1.,5945,0.5,0.058029689608636977,rougeL,ok,rougeL_0.5
2154,SQUAD,llama3_8b_1.,5945,0.6,0.03829284750337382,rougeL,ok,rougeL_0.6
2155,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.022941970310391364,rougeL,ok,rougeL_0.7000000000000001
2156,SQUAD,llama3_8b_1.,5945,0.8,0.01349527665317139,rougeL,ok,rougeL_0.8
2157,SQUAD,llama3_8b_1.,5945,0.9,0.008771929824561403,rougeL,ok,rougeL_0.9
2158,SQUAD,llama3_8b_1.,5945,0.1,0.43606612685560053,rougeLsum,ok,rougeLsum_0.1
2159,SQUAD,llama3_8b_1.,5945,0.2,0.24055330634278002,rougeLsum,ok,rougeLsum_0.2
2160,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.14979757085020243,rougeLsum,ok,rougeLsum_0.30000000000000004
2161,SQUAD,llama3_8b_1.,5945,0.4,0.09024966261808368,rougeLsum,ok,rougeLsum_0.4
2162,SQUAD,llama3_8b_1.,5945,0.5,0.058029689608636977,rougeLsum,ok,rougeLsum_0.5
2163,SQUAD,llama3_8b_1.,5945,0.6,0.03829284750337382,rougeLsum,ok,rougeLsum_0.6
2164,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.022941970310391364,rougeLsum,ok,rougeLsum_0.7000000000000001
2165,SQUAD,llama3_8b_1.,5945,0.8,0.01349527665317139,rougeLsum,ok,rougeLsum_0.8
2166,SQUAD,llama3_8b_1.,5945,0.9,0.008771929824561403,rougeLsum,ok,rougeLsum_0.9
2167,SQUAD,llama3_8b_1.,6154,0.1,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.1
2168,SQUAD,llama3_8b_1.,6154,0.2,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.2
2169,SQUAD,llama3_8b_1.,6154,0.30000000000000004,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.30000000000000004
2170,SQUAD,llama3_8b_1.,6154,0.4,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.4
2171,SQUAD,llama3_8b_1.,6154,0.5,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.5
2172,SQUAD,llama3_8b_1.,6154,0.6,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.6
2173,SQUAD,llama3_8b_1.,6154,0.7000000000000001,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.7000000000000001
2174,SQUAD,llama3_8b_1.,6154,0.8,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.8
2175,SQUAD,llama3_8b_1.,6154,0.9,0.6527364923937752,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.9
2176,SQUAD,llama3_8b_1.,5945,0.1,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
2177,SQUAD,llama3_8b_1.,5945,0.2,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
2178,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
2179,SQUAD,llama3_8b_1.,5945,0.4,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
2180,SQUAD,llama3_8b_1.,5945,0.5,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
2181,SQUAD,llama3_8b_1.,5945,0.6,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
2182,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
2183,SQUAD,llama3_8b_1.,5945,0.8,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
2184,SQUAD,llama3_8b_1.,5945,0.9,0.6010458839406208,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
2185,SQUAD,llama3_8b_1.,5945,0.1,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
2186,SQUAD,llama3_8b_1.,5945,0.2,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
2187,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
2188,SQUAD,llama3_8b_1.,5945,0.4,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
2189,SQUAD,llama3_8b_1.,5945,0.5,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
2190,SQUAD,llama3_8b_1.,5945,0.6,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
2191,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
2192,SQUAD,llama3_8b_1.,5945,0.8,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
2193,SQUAD,llama3_8b_1.,5945,0.9,0.7462887989203779,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
2194,SQUAD,llama3_8b_1.,5945,0.1,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.1
2195,SQUAD,llama3_8b_1.,5945,0.2,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.2
2196,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.30000000000000004
2197,SQUAD,llama3_8b_1.,5945,0.4,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.4
2198,SQUAD,llama3_8b_1.,5945,0.5,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.5
2199,SQUAD,llama3_8b_1.,5945,0.6,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.6
2200,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.7000000000000001
2201,SQUAD,llama3_8b_1.,5945,0.8,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.8
2202,SQUAD,llama3_8b_1.,5945,0.9,0.5936234817813765,j_llama8b_qa_16.0_0.5_1,ok,j_llama8b_qa_16.0_0.5_1_0.9
2203,SQUAD,llama3_8b_1.,5945,0.1,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.1
2204,SQUAD,llama3_8b_1.,5945,0.2,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.2
2205,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.30000000000000004
2206,SQUAD,llama3_8b_1.,5945,0.4,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.4
2207,SQUAD,llama3_8b_1.,5945,0.5,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.5
2208,SQUAD,llama3_8b_1.,5945,0.6,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.6
2209,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.7000000000000001
2210,SQUAD,llama3_8b_1.,5945,0.8,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.8
2211,SQUAD,llama3_8b_1.,5945,0.9,0.7419028340080972,j_llama70b_qa_16.0_0.5_1,ok,j_llama70b_qa_16.0_0.5_1_0.9
2212,SQUAD,llama3_8b_1.,5945,0.1,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.1
2213,SQUAD,llama3_8b_1.,5945,0.2,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.2
2214,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.30000000000000004
2215,SQUAD,llama3_8b_1.,5945,0.4,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.4
2216,SQUAD,llama3_8b_1.,5945,0.5,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.5
2217,SQUAD,llama3_8b_1.,5945,0.6,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.6
2218,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.7000000000000001
2219,SQUAD,llama3_8b_1.,5945,0.8,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.8
2220,SQUAD,llama3_8b_1.,5945,0.9,0.5976720647773279,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.9
2221,SQUAD,llama3_8b_1.,5945,0.1,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
2222,SQUAD,llama3_8b_1.,5945,0.2,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
2223,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
2224,SQUAD,llama3_8b_1.,5945,0.4,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
2225,SQUAD,llama3_8b_1.,5945,0.5,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
2226,SQUAD,llama3_8b_1.,5945,0.6,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
2227,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
2228,SQUAD,llama3_8b_1.,5945,0.8,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
2229,SQUAD,llama3_8b_1.,5945,0.9,0.5711875843454791,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
2230,SQUAD,llama3_8b_1.,5945,0.1,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
2231,SQUAD,llama3_8b_1.,5945,0.2,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
2232,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
2233,SQUAD,llama3_8b_1.,5945,0.4,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
2234,SQUAD,llama3_8b_1.,5945,0.5,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
2235,SQUAD,llama3_8b_1.,5945,0.6,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
2236,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
2237,SQUAD,llama3_8b_1.,5945,0.8,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
2238,SQUAD,llama3_8b_1.,5945,0.9,0.5664642375168691,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
2239,SQUAD,llama3_8b_1.,5946,0.1,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.1
2240,SQUAD,llama3_8b_1.,5946,0.2,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.2
2241,SQUAD,llama3_8b_1.,5946,0.30000000000000004,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.30000000000000004
2242,SQUAD,llama3_8b_1.,5946,0.4,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.4
2243,SQUAD,llama3_8b_1.,5946,0.5,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.5
2244,SQUAD,llama3_8b_1.,5946,0.6,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.6
2245,SQUAD,llama3_8b_1.,5946,0.7000000000000001,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.7000000000000001
2246,SQUAD,llama3_8b_1.,5946,0.8,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.8
2247,SQUAD,llama3_8b_1.,5946,0.9,0.745908554074574,j_llama70b_qa_16.0_0.49_1,ok,j_llama70b_qa_16.0_0.49_1_0.9
2248,SQUAD,llama3_8b_1.,5945,0.1,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.1
2249,SQUAD,llama3_8b_1.,5945,0.2,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.2
2250,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.30000000000000004
2251,SQUAD,llama3_8b_1.,5945,0.4,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.4
2252,SQUAD,llama3_8b_1.,5945,0.5,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.5
2253,SQUAD,llama3_8b_1.,5945,0.6,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.6
2254,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.7000000000000001
2255,SQUAD,llama3_8b_1.,5945,0.8,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.8
2256,SQUAD,llama3_8b_1.,5945,0.9,0.7454453441295547,j_llama70b_qa_16.0_0.5_2,ok,j_llama70b_qa_16.0_0.5_2_0.9
2257,SQUAD,llama3_8b_1.,5945,0.1,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.1
2258,SQUAD,llama3_8b_1.,5945,0.2,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.2
2259,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.30000000000000004
2260,SQUAD,llama3_8b_1.,5945,0.4,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.4
2261,SQUAD,llama3_8b_1.,5945,0.5,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.5
2262,SQUAD,llama3_8b_1.,5945,0.6,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.6
2263,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.7000000000000001
2264,SQUAD,llama3_8b_1.,5945,0.8,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.8
2265,SQUAD,llama3_8b_1.,5945,0.9,0.5949730094466936,j_llama8b_qa_16.0_0.49_1,ok,j_llama8b_qa_16.0_0.49_1_0.9
2266,SQUAD,llama3_8b_1.,5945,0.1,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.1
2267,SQUAD,llama3_8b_1.,5945,0.2,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.2
2268,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.30000000000000004
2269,SQUAD,llama3_8b_1.,5945,0.4,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.4
2270,SQUAD,llama3_8b_1.,5945,0.5,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.5
2271,SQUAD,llama3_8b_1.,5945,0.6,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.6
2272,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.7000000000000001
2273,SQUAD,llama3_8b_1.,5945,0.8,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.8
2274,SQUAD,llama3_8b_1.,5945,0.9,0.7442645074224021,j_llama70b_qa_16.0_0.49_2,ok,j_llama70b_qa_16.0_0.49_2_0.9
2275,SQUAD,llama3_8b_1.,5945,0.1,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.1
2276,SQUAD,llama3_8b_1.,5945,0.2,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.2
2277,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.30000000000000004
2278,SQUAD,llama3_8b_1.,5945,0.4,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.4
2279,SQUAD,llama3_8b_1.,5945,0.5,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.5
2280,SQUAD,llama3_8b_1.,5945,0.6,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.6
2281,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.7000000000000001
2282,SQUAD,llama3_8b_1.,5945,0.8,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.8
2283,SQUAD,llama3_8b_1.,5945,0.9,0.5975033738191633,j_llama8b_qa_16.0_0.5_2,ok,j_llama8b_qa_16.0_0.5_2_0.9
2284,SQUAD,llama3_8b_1.,5946,0.1,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.1
2285,SQUAD,llama3_8b_1.,5946,0.2,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.2
2286,SQUAD,llama3_8b_1.,5946,0.30000000000000004,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.30000000000000004
2287,SQUAD,llama3_8b_1.,5946,0.4,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.4
2288,SQUAD,llama3_8b_1.,5946,0.5,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.5
2289,SQUAD,llama3_8b_1.,5946,0.6,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.6
2290,SQUAD,llama3_8b_1.,5946,0.7000000000000001,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.7000000000000001
2291,SQUAD,llama3_8b_1.,5946,0.8,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.8
2292,SQUAD,llama3_8b_1.,5946,0.9,0.6659355491817108,j_llama8b_gen_16.0_0.49,ok,j_llama8b_gen_16.0_0.49_0.9
2293,SQUAD,llama3_8b_1.,5945,0.1,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.1
2294,SQUAD,llama3_8b_1.,5945,0.2,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.2
2295,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.30000000000000004
2296,SQUAD,llama3_8b_1.,5945,0.4,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.4
2297,SQUAD,llama3_8b_1.,5945,0.5,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.5
2298,SQUAD,llama3_8b_1.,5945,0.6,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.6
2299,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.7000000000000001
2300,SQUAD,llama3_8b_1.,5945,0.8,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.8
2301,SQUAD,llama3_8b_1.,5945,0.9,0.7577597840755735,j_llama70b_gen_16.0_0.5,ok,j_llama70b_gen_16.0_0.5_0.9
2302,SQUAD,llama3_8b_1.,5945,0.1,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.1
2303,SQUAD,llama3_8b_1.,5945,0.2,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.2
2304,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.30000000000000004
2305,SQUAD,llama3_8b_1.,5945,0.4,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.4
2306,SQUAD,llama3_8b_1.,5945,0.5,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.5
2307,SQUAD,llama3_8b_1.,5945,0.6,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.6
2308,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.7000000000000001
2309,SQUAD,llama3_8b_1.,5945,0.8,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.8
2310,SQUAD,llama3_8b_1.,5945,0.9,0.7074898785425101,j_llama8b_gen_16.0_0.5,ok,j_llama8b_gen_16.0_0.5_0.9
2311,SQUAD,llama3_8b_1.,0,0.1,0.5007159100480081,ood_label,ok,ood_label_0.1
2312,SQUAD,llama3_8b_1.,0,0.2,0.5007159100480081,ood_label,ok,ood_label_0.2
2313,SQUAD,llama3_8b_1.,0,0.30000000000000004,0.5007159100480081,ood_label,ok,ood_label_0.30000000000000004
2314,SQUAD,llama3_8b_1.,0,0.4,0.5007159100480081,ood_label,ok,ood_label_0.4
2315,SQUAD,llama3_8b_1.,0,0.5,0.5007159100480081,ood_label,ok,ood_label_0.5
2316,SQUAD,llama3_8b_1.,0,0.6,0.5007159100480081,ood_label,ok,ood_label_0.6
2317,SQUAD,llama3_8b_1.,0,0.7000000000000001,0.5007159100480081,ood_label,ok,ood_label_0.7000000000000001
2318,SQUAD,llama3_8b_1.,0,0.8,0.5007159100480081,ood_label,ok,ood_label_0.8
2319,SQUAD,llama3_8b_1.,0,0.9,0.5007159100480081,ood_label,ok,ood_label_0.9
2320,SQUAD,llama3_8b_1.,5945,0.1,0.8343454790823212,bma_judge_w8,ok,bma_judge_w8_0.1
2321,SQUAD,llama3_8b_1.,5945,0.2,0.8024628879892037,bma_judge_w8,ok,bma_judge_w8_0.2
2322,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.789642375168691,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
2323,SQUAD,llama3_8b_1.,5945,0.4,0.7435897435897436,bma_judge_w8,ok,bma_judge_w8_0.4
2324,SQUAD,llama3_8b_1.,5945,0.5,0.6934885290148448,bma_judge_w8,ok,bma_judge_w8_0.5
2325,SQUAD,llama3_8b_1.,5945,0.6,0.6585695006747638,bma_judge_w8,ok,bma_judge_w8_0.6
2326,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.581646423751687,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
2327,SQUAD,llama3_8b_1.,5945,0.8,0.5327260458839406,bma_judge_w8,ok,bma_judge_w8_0.8
2328,SQUAD,llama3_8b_1.,5945,0.9,0.4050269905533063,bma_judge_w8,ok,bma_judge_w8_0.9
2329,SQUAD,llama3_8b_1.,5945,0.1,0.8119095816464238,bma_judge,ok,bma_judge_0.1
2330,SQUAD,llama3_8b_1.,5945,0.2,0.7790148448043185,bma_judge,ok,bma_judge_0.2
2331,SQUAD,llama3_8b_1.,5945,0.30000000000000004,0.7609649122807017,bma_judge,ok,bma_judge_0.30000000000000004
2332,SQUAD,llama3_8b_1.,5945,0.4,0.7520242914979757,bma_judge,ok,bma_judge_0.4
2333,SQUAD,llama3_8b_1.,5945,0.5,0.7437584345479082,bma_judge,ok,bma_judge_0.5
2334,SQUAD,llama3_8b_1.,5945,0.6,0.72165991902834,bma_judge,ok,bma_judge_0.6
2335,SQUAD,llama3_8b_1.,5945,0.7000000000000001,0.6641363022941971,bma_judge,ok,bma_judge_0.7000000000000001
2336,SQUAD,llama3_8b_1.,5945,0.8,0.5732118758434548,bma_judge,ok,bma_judge_0.8
2337,SQUAD,llama3_8b_1.,5945,0.9,0.4699730094466937,bma_judge,ok,bma_judge_0.9
2338,COLLIE,llama3_8b_i_1.,0,0.1,0.09471153846153846,bleu,ok,bleu_0.1
2339,COLLIE,llama3_8b_i_1.,0,0.2,0.07163461538461538,bleu,ok,bleu_0.2
2340,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.041826923076923074,bleu,ok,bleu_0.30000000000000004
2341,COLLIE,llama3_8b_i_1.,0,0.4,0.020673076923076922,bleu,ok,bleu_0.4
2342,COLLIE,llama3_8b_i_1.,0,0.5,0.004326923076923077,bleu,ok,bleu_0.5
2343,COLLIE,llama3_8b_i_1.,0,0.6,0.0009615384615384616,bleu,ok,bleu_0.6
2344,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
2345,COLLIE,llama3_8b_i_1.,0,0.8,0.0,bleu,ok,bleu_0.8
2346,COLLIE,llama3_8b_i_1.,0,0.9,0.0,bleu,ok,bleu_0.9
2347,COLLIE,llama3_8b_i_1.,0,0.1,0.09519230769230769,bleu_adapt,ok,bleu_adapt_0.1
2348,COLLIE,llama3_8b_i_1.,0,0.2,0.07211538461538461,bleu_adapt,ok,bleu_adapt_0.2
2349,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.04230769230769231,bleu_adapt,ok,bleu_adapt_0.30000000000000004
2350,COLLIE,llama3_8b_i_1.,0,0.4,0.021153846153846155,bleu_adapt,ok,bleu_adapt_0.4
2351,COLLIE,llama3_8b_i_1.,0,0.5,0.004326923076923077,bleu_adapt,ok,bleu_adapt_0.5
2352,COLLIE,llama3_8b_i_1.,0,0.6,0.0009615384615384616,bleu_adapt,ok,bleu_adapt_0.6
2353,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
2354,COLLIE,llama3_8b_i_1.,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
2355,COLLIE,llama3_8b_i_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
2356,COLLIE,llama3_8b_i_1.,0,0.1,0.76875,rouge1,ok,rouge1_0.1
2357,COLLIE,llama3_8b_i_1.,0,0.2,0.5067307692307692,rouge1,ok,rouge1_0.2
2358,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.2298076923076923,rouge1,ok,rouge1_0.30000000000000004
2359,COLLIE,llama3_8b_i_1.,0,0.4,0.11634615384615385,rouge1,ok,rouge1_0.4
2360,COLLIE,llama3_8b_i_1.,0,0.5,0.061057692307692306,rouge1,ok,rouge1_0.5
2361,COLLIE,llama3_8b_i_1.,0,0.6,0.021634615384615384,rouge1,ok,rouge1_0.6
2362,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.005288461538461539,rouge1,ok,rouge1_0.7000000000000001
2363,COLLIE,llama3_8b_i_1.,0,0.8,0.0004807692307692308,rouge1,ok,rouge1_0.8
2364,COLLIE,llama3_8b_i_1.,0,0.9,0.0,rouge1,ok,rouge1_0.9
2365,COLLIE,llama3_8b_i_1.,0,0.1,0.14375,rouge2,ok,rouge2_0.1
2366,COLLIE,llama3_8b_i_1.,0,0.2,0.0985576923076923,rouge2,ok,rouge2_0.2
2367,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.07019230769230769,rouge2,ok,rouge2_0.30000000000000004
2368,COLLIE,llama3_8b_i_1.,0,0.4,0.035096153846153846,rouge2,ok,rouge2_0.4
2369,COLLIE,llama3_8b_i_1.,0,0.5,0.012980769230769231,rouge2,ok,rouge2_0.5
2370,COLLIE,llama3_8b_i_1.,0,0.6,0.0038461538461538464,rouge2,ok,rouge2_0.6
2371,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0009615384615384616,rouge2,ok,rouge2_0.7000000000000001
2372,COLLIE,llama3_8b_i_1.,0,0.8,0.0004807692307692308,rouge2,ok,rouge2_0.8
2373,COLLIE,llama3_8b_i_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
2374,COLLIE,llama3_8b_i_1.,0,0.1,0.7197115384615385,rougeL,ok,rougeL_0.1
2375,COLLIE,llama3_8b_i_1.,0,0.2,0.29951923076923076,rougeL,ok,rougeL_0.2
2376,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.13028846153846155,rougeL,ok,rougeL_0.30000000000000004
2377,COLLIE,llama3_8b_i_1.,0,0.4,0.06442307692307692,rougeL,ok,rougeL_0.4
2378,COLLIE,llama3_8b_i_1.,0,0.5,0.01826923076923077,rougeL,ok,rougeL_0.5
2379,COLLIE,llama3_8b_i_1.,0,0.6,0.00625,rougeL,ok,rougeL_0.6
2380,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0014423076923076924,rougeL,ok,rougeL_0.7000000000000001
2381,COLLIE,llama3_8b_i_1.,0,0.8,0.0004807692307692308,rougeL,ok,rougeL_0.8
2382,COLLIE,llama3_8b_i_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
2383,COLLIE,llama3_8b_i_1.,0,0.1,0.7192307692307692,rougeLsum,ok,rougeLsum_0.1
2384,COLLIE,llama3_8b_i_1.,0,0.2,0.3033653846153846,rougeLsum,ok,rougeLsum_0.2
2385,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.14471153846153847,rougeLsum,ok,rougeLsum_0.30000000000000004
2386,COLLIE,llama3_8b_i_1.,0,0.4,0.08701923076923077,rougeLsum,ok,rougeLsum_0.4
2387,COLLIE,llama3_8b_i_1.,0,0.5,0.04038461538461539,rougeLsum,ok,rougeLsum_0.5
2388,COLLIE,llama3_8b_i_1.,0,0.6,0.012980769230769231,rougeLsum,ok,rougeLsum_0.6
2389,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0028846153846153848,rougeLsum,ok,rougeLsum_0.7000000000000001
2390,COLLIE,llama3_8b_i_1.,0,0.8,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.8
2391,COLLIE,llama3_8b_i_1.,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
2392,COLLIE,llama3_8b_i_1.,1,0.1,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
2393,COLLIE,llama3_8b_i_1.,1,0.2,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
2394,COLLIE,llama3_8b_i_1.,1,0.30000000000000004,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
2395,COLLIE,llama3_8b_i_1.,1,0.4,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
2396,COLLIE,llama3_8b_i_1.,1,0.5,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
2397,COLLIE,llama3_8b_i_1.,1,0.6,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
2398,COLLIE,llama3_8b_i_1.,1,0.7000000000000001,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
2399,COLLIE,llama3_8b_i_1.,1,0.8,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
2400,COLLIE,llama3_8b_i_1.,1,0.9,0.022126022126022125,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
2401,COLLIE,llama3_8b_i_1.,0,0.1,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.1
2402,COLLIE,llama3_8b_i_1.,0,0.2,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.2
2403,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.30000000000000004
2404,COLLIE,llama3_8b_i_1.,0,0.4,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.4
2405,COLLIE,llama3_8b_i_1.,0,0.5,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.5
2406,COLLIE,llama3_8b_i_1.,0,0.6,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.6
2407,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.7000000000000001
2408,COLLIE,llama3_8b_i_1.,0,0.8,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.8
2409,COLLIE,llama3_8b_i_1.,0,0.9,0.051923076923076926,j_llama405b_qa_1_1.,ok,j_llama405b_qa_1_1._0.9
2410,COLLIE,llama3_8b_i_1.,2,0.1,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.1
2411,COLLIE,llama3_8b_i_1.,2,0.2,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.2
2412,COLLIE,llama3_8b_i_1.,2,0.30000000000000004,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.30000000000000004
2413,COLLIE,llama3_8b_i_1.,2,0.4,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.4
2414,COLLIE,llama3_8b_i_1.,2,0.5,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.5
2415,COLLIE,llama3_8b_i_1.,2,0.6,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.6
2416,COLLIE,llama3_8b_i_1.,2,0.7000000000000001,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.7000000000000001
2417,COLLIE,llama3_8b_i_1.,2,0.8,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.8
2418,COLLIE,llama3_8b_i_1.,2,0.9,0.49037536092396533,j_llama405b_gen_1_1.,ok,j_llama405b_gen_1_1._0.9
2419,COLLIE,llama3_8b_i_1.,1,0.1,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.1
2420,COLLIE,llama3_8b_i_1.,1,0.2,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.2
2421,COLLIE,llama3_8b_i_1.,1,0.30000000000000004,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.30000000000000004
2422,COLLIE,llama3_8b_i_1.,1,0.4,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.4
2423,COLLIE,llama3_8b_i_1.,1,0.5,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.5
2424,COLLIE,llama3_8b_i_1.,1,0.6,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.6
2425,COLLIE,llama3_8b_i_1.,1,0.7000000000000001,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.7000000000000001
2426,COLLIE,llama3_8b_i_1.,1,0.8,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.8
2427,COLLIE,llama3_8b_i_1.,1,0.9,0.017797017797017797,j_llama405b_qa_16.0_0.49,ok,j_llama405b_qa_16.0_0.49_0.9
2428,COLLIE,llama3_8b_i_1.,0,0.1,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.1
2429,COLLIE,llama3_8b_i_1.,0,0.2,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.2
2430,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.30000000000000004
2431,COLLIE,llama3_8b_i_1.,0,0.4,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.4
2432,COLLIE,llama3_8b_i_1.,0,0.5,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.5
2433,COLLIE,llama3_8b_i_1.,0,0.6,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.6
2434,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.7000000000000001
2435,COLLIE,llama3_8b_i_1.,0,0.8,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.8
2436,COLLIE,llama3_8b_i_1.,0,0.9,0.5125,j_qwen32b_gen_16_0.5,ok,j_qwen32b_gen_16_0.5_0.9
2437,COLLIE,llama3_8b_i_1.,0,0.1,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
2438,COLLIE,llama3_8b_i_1.,0,0.2,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
2439,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
2440,COLLIE,llama3_8b_i_1.,0,0.4,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
2441,COLLIE,llama3_8b_i_1.,0,0.5,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
2442,COLLIE,llama3_8b_i_1.,0,0.6,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
2443,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
2444,COLLIE,llama3_8b_i_1.,0,0.8,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
2445,COLLIE,llama3_8b_i_1.,0,0.9,0.0004807692307692308,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
2446,COLLIE,llama3_8b_i_1.,0,0.1,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
2447,COLLIE,llama3_8b_i_1.,0,0.2,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
2448,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
2449,COLLIE,llama3_8b_i_1.,0,0.4,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
2450,COLLIE,llama3_8b_i_1.,0,0.5,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
2451,COLLIE,llama3_8b_i_1.,0,0.6,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
2452,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
2453,COLLIE,llama3_8b_i_1.,0,0.8,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
2454,COLLIE,llama3_8b_i_1.,0,0.9,0.3918269230769231,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
2455,COLLIE,llama3_8b_i_1.,0,0.1,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
2456,COLLIE,llama3_8b_i_1.,0,0.2,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
2457,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
2458,COLLIE,llama3_8b_i_1.,0,0.4,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
2459,COLLIE,llama3_8b_i_1.,0,0.5,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
2460,COLLIE,llama3_8b_i_1.,0,0.6,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
2461,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
2462,COLLIE,llama3_8b_i_1.,0,0.8,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
2463,COLLIE,llama3_8b_i_1.,0,0.9,0.3625,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
2464,COLLIE,llama3_8b_i_1.,0,0.1,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
2465,COLLIE,llama3_8b_i_1.,0,0.2,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
2466,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
2467,COLLIE,llama3_8b_i_1.,0,0.4,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
2468,COLLIE,llama3_8b_i_1.,0,0.5,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
2469,COLLIE,llama3_8b_i_1.,0,0.6,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
2470,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
2471,COLLIE,llama3_8b_i_1.,0,0.8,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
2472,COLLIE,llama3_8b_i_1.,0,0.9,0.4423076923076923,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
2473,COLLIE,llama3_8b_i_1.,0,0.1,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.1
2474,COLLIE,llama3_8b_i_1.,0,0.2,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.2
2475,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.30000000000000004
2476,COLLIE,llama3_8b_i_1.,0,0.4,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.4
2477,COLLIE,llama3_8b_i_1.,0,0.5,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.5
2478,COLLIE,llama3_8b_i_1.,0,0.6,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.6
2479,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.7000000000000001
2480,COLLIE,llama3_8b_i_1.,0,0.8,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.8
2481,COLLIE,llama3_8b_i_1.,0,0.9,0.08605769230769231,j_llama8b_qa_1_1.,ok,j_llama8b_qa_1_1._0.9
2482,COLLIE,llama3_8b_i_1.,0,0.5,0.41923076923076924,exact_correctness,ok,exact_correctness_0.5
2483,COLLIE,llama3_8b_i_1.,0,0.1,0.6389423076923076,bma_judge_w8,ok,bma_judge_w8_0.1
2484,COLLIE,llama3_8b_i_1.,0,0.2,0.44567307692307695,bma_judge_w8,ok,bma_judge_w8_0.2
2485,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.2668269230769231,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
2486,COLLIE,llama3_8b_i_1.,0,0.4,0.13221153846153846,bma_judge_w8,ok,bma_judge_w8_0.4
2487,COLLIE,llama3_8b_i_1.,0,0.5,0.047596153846153844,bma_judge_w8,ok,bma_judge_w8_0.5
2488,COLLIE,llama3_8b_i_1.,0,0.6,0.01201923076923077,bma_judge_w8,ok,bma_judge_w8_0.6
2489,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.002403846153846154,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
2490,COLLIE,llama3_8b_i_1.,0,0.8,0.0009615384615384616,bma_judge_w8,ok,bma_judge_w8_0.8
2491,COLLIE,llama3_8b_i_1.,0,0.9,0.0,bma_judge_w8,ok,bma_judge_w8_0.9
2492,COLLIE,llama3_8b_i_1.,0,0.1,0.7403846153846154,bma_judge,ok,bma_judge_0.1
2493,COLLIE,llama3_8b_i_1.,0,0.2,0.5504807692307693,bma_judge,ok,bma_judge_0.2
2494,COLLIE,llama3_8b_i_1.,0,0.30000000000000004,0.33798076923076925,bma_judge,ok,bma_judge_0.30000000000000004
2495,COLLIE,llama3_8b_i_1.,0,0.4,0.1701923076923077,bma_judge,ok,bma_judge_0.4
2496,COLLIE,llama3_8b_i_1.,0,0.5,0.041826923076923074,bma_judge,ok,bma_judge_0.5
2497,COLLIE,llama3_8b_i_1.,0,0.6,0.040865384615384616,bma_judge,ok,bma_judge_0.6
2498,COLLIE,llama3_8b_i_1.,0,0.7000000000000001,0.007211538461538462,bma_judge,ok,bma_judge_0.7000000000000001
2499,COLLIE,llama3_8b_i_1.,0,0.8,0.002403846153846154,bma_judge,ok,bma_judge_0.8
2500,COLLIE,llama3_8b_i_1.,0,0.9,0.0,bma_judge,ok,bma_judge_0.9
2501,COLLIE,phi35_i_1.,0,0.1,0.08605769230769231,bleu,ok,bleu_0.1
2502,COLLIE,phi35_i_1.,0,0.2,0.05576923076923077,bleu,ok,bleu_0.2
2503,COLLIE,phi35_i_1.,0,0.30000000000000004,0.025,bleu,ok,bleu_0.30000000000000004
2504,COLLIE,phi35_i_1.,0,0.4,0.00625,bleu,ok,bleu_0.4
2505,COLLIE,phi35_i_1.,0,0.5,0.0019230769230769232,bleu,ok,bleu_0.5
2506,COLLIE,phi35_i_1.,0,0.6,0.0004807692307692308,bleu,ok,bleu_0.6
2507,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
2508,COLLIE,phi35_i_1.,0,0.8,0.0,bleu,ok,bleu_0.8
2509,COLLIE,phi35_i_1.,0,0.9,0.0,bleu,ok,bleu_0.9
2510,COLLIE,phi35_i_1.,0,0.1,0.08605769230769231,bleu_adapt,ok,bleu_adapt_0.1
2511,COLLIE,phi35_i_1.,0,0.2,0.05576923076923077,bleu_adapt,ok,bleu_adapt_0.2
2512,COLLIE,phi35_i_1.,0,0.30000000000000004,0.025,bleu_adapt,ok,bleu_adapt_0.30000000000000004
2513,COLLIE,phi35_i_1.,0,0.4,0.00625,bleu_adapt,ok,bleu_adapt_0.4
2514,COLLIE,phi35_i_1.,0,0.5,0.0019230769230769232,bleu_adapt,ok,bleu_adapt_0.5
2515,COLLIE,phi35_i_1.,0,0.6,0.0004807692307692308,bleu_adapt,ok,bleu_adapt_0.6
2516,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
2517,COLLIE,phi35_i_1.,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
2518,COLLIE,phi35_i_1.,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
2519,COLLIE,phi35_i_1.,0,0.1,0.7341346153846153,rouge1,ok,rouge1_0.1
2520,COLLIE,phi35_i_1.,0,0.2,0.4269230769230769,rouge1,ok,rouge1_0.2
2521,COLLIE,phi35_i_1.,0,0.30000000000000004,0.1548076923076923,rouge1,ok,rouge1_0.30000000000000004
2522,COLLIE,phi35_i_1.,0,0.4,0.08028846153846154,rouge1,ok,rouge1_0.4
2523,COLLIE,phi35_i_1.,0,0.5,0.03461538461538462,rouge1,ok,rouge1_0.5
2524,COLLIE,phi35_i_1.,0,0.6,0.006730769230769231,rouge1,ok,rouge1_0.6
2525,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0004807692307692308,rouge1,ok,rouge1_0.7000000000000001
2526,COLLIE,phi35_i_1.,0,0.8,0.0,rouge1,ok,rouge1_0.8
2527,COLLIE,phi35_i_1.,0,0.9,0.0,rouge1,ok,rouge1_0.9
2528,COLLIE,phi35_i_1.,0,0.1,0.11442307692307692,rouge2,ok,rouge2_0.1
2529,COLLIE,phi35_i_1.,0,0.2,0.07884615384615384,rouge2,ok,rouge2_0.2
2530,COLLIE,phi35_i_1.,0,0.30000000000000004,0.04375,rouge2,ok,rouge2_0.30000000000000004
2531,COLLIE,phi35_i_1.,0,0.4,0.015384615384615385,rouge2,ok,rouge2_0.4
2532,COLLIE,phi35_i_1.,0,0.5,0.0038461538461538464,rouge2,ok,rouge2_0.5
2533,COLLIE,phi35_i_1.,0,0.6,0.0014423076923076924,rouge2,ok,rouge2_0.6
2534,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0,rouge2,ok,rouge2_0.7000000000000001
2535,COLLIE,phi35_i_1.,0,0.8,0.0,rouge2,ok,rouge2_0.8
2536,COLLIE,phi35_i_1.,0,0.9,0.0,rouge2,ok,rouge2_0.9
2537,COLLIE,phi35_i_1.,0,0.1,0.6552884615384615,rougeL,ok,rougeL_0.1
2538,COLLIE,phi35_i_1.,0,0.2,0.20721153846153847,rougeL,ok,rougeL_0.2
2539,COLLIE,phi35_i_1.,0,0.30000000000000004,0.08076923076923077,rougeL,ok,rougeL_0.30000000000000004
2540,COLLIE,phi35_i_1.,0,0.4,0.027403846153846154,rougeL,ok,rougeL_0.4
2541,COLLIE,phi35_i_1.,0,0.5,0.006730769230769231,rougeL,ok,rougeL_0.5
2542,COLLIE,phi35_i_1.,0,0.6,0.0019230769230769232,rougeL,ok,rougeL_0.6
2543,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0,rougeL,ok,rougeL_0.7000000000000001
2544,COLLIE,phi35_i_1.,0,0.8,0.0,rougeL,ok,rougeL_0.8
2545,COLLIE,phi35_i_1.,0,0.9,0.0,rougeL,ok,rougeL_0.9
2546,COLLIE,phi35_i_1.,0,0.1,0.6567307692307692,rougeLsum,ok,rougeLsum_0.1
2547,COLLIE,phi35_i_1.,0,0.2,0.21634615384615385,rougeLsum,ok,rougeLsum_0.2
2548,COLLIE,phi35_i_1.,0,0.30000000000000004,0.10384615384615385,rougeLsum,ok,rougeLsum_0.30000000000000004
2549,COLLIE,phi35_i_1.,0,0.4,0.05721153846153846,rougeLsum,ok,rougeLsum_0.4
2550,COLLIE,phi35_i_1.,0,0.5,0.01730769230769231,rougeLsum,ok,rougeLsum_0.5
2551,COLLIE,phi35_i_1.,0,0.6,0.0028846153846153848,rougeLsum,ok,rougeLsum_0.6
2552,COLLIE,phi35_i_1.,0,0.7000000000000001,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.7000000000000001
2553,COLLIE,phi35_i_1.,0,0.8,0.0,rougeLsum,ok,rougeLsum_0.8
2554,COLLIE,phi35_i_1.,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
2555,COLLIE,phi35_i_1.,0,0.1,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.1
2556,COLLIE,phi35_i_1.,0,0.2,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.2
2557,COLLIE,phi35_i_1.,0,0.30000000000000004,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.30000000000000004
2558,COLLIE,phi35_i_1.,0,0.4,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.4
2559,COLLIE,phi35_i_1.,0,0.5,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.5
2560,COLLIE,phi35_i_1.,0,0.6,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.6
2561,COLLIE,phi35_i_1.,0,0.7000000000000001,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.7000000000000001
2562,COLLIE,phi35_i_1.,0,0.8,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.8
2563,COLLIE,phi35_i_1.,0,0.9,0.021634615384615384,j_llama70b_qa_1_1.,ok,j_llama70b_qa_1_1._0.9
2564,COLLIE,phi35_i_1.,0,0.1,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.1
2565,COLLIE,phi35_i_1.,0,0.2,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.2
2566,COLLIE,phi35_i_1.,0,0.30000000000000004,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.30000000000000004
2567,COLLIE,phi35_i_1.,0,0.4,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.4
2568,COLLIE,phi35_i_1.,0,0.5,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.5
2569,COLLIE,phi35_i_1.,0,0.6,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.6
2570,COLLIE,phi35_i_1.,0,0.7000000000000001,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.7000000000000001
2571,COLLIE,phi35_i_1.,0,0.8,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.8
2572,COLLIE,phi35_i_1.,0,0.9,0.30240384615384613,j_llama70b_gen_1_1.,ok,j_llama70b_gen_1_1._0.9
2573,COLLIE,phi35_i_1.,0,0.1,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.1
2574,COLLIE,phi35_i_1.,0,0.2,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.2
2575,COLLIE,phi35_i_1.,0,0.30000000000000004,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.30000000000000004
2576,COLLIE,phi35_i_1.,0,0.4,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.4
2577,COLLIE,phi35_i_1.,0,0.5,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.5
2578,COLLIE,phi35_i_1.,0,0.6,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.6
2579,COLLIE,phi35_i_1.,0,0.7000000000000001,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.7000000000000001
2580,COLLIE,phi35_i_1.,0,0.8,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.8
2581,COLLIE,phi35_i_1.,0,0.9,0.4634615384615385,j_llama8b_gen_1_1.,ok,j_llama8b_gen_1_1._0.9
2582,COLLIE,phi35_i_1.,0,0.5,0.21201923076923077,exact_correctness,ok,exact_correctness_0.5
2583,COLLIE,phi35_i_1.,0,0.1,0.6182692307692308,bma_judge_w8,ok,bma_judge_w8_0.1
2584,COLLIE,phi35_i_1.,0,0.2,0.6182692307692308,bma_judge_w8,ok,bma_judge_w8_0.2
2585,COLLIE,phi35_i_1.,0,0.30000000000000004,0.6182692307692308,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
2586,COLLIE,phi35_i_1.,0,0.4,0.1639423076923077,bma_judge_w8,ok,bma_judge_w8_0.4
2587,COLLIE,phi35_i_1.,0,0.5,0.1639423076923077,bma_judge_w8,ok,bma_judge_w8_0.5
2588,COLLIE,phi35_i_1.,0,0.6,0.1639423076923077,bma_judge_w8,ok,bma_judge_w8_0.6
2589,COLLIE,phi35_i_1.,0,0.7000000000000001,0.005288461538461539,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
2590,COLLIE,phi35_i_1.,0,0.8,0.005288461538461539,bma_judge_w8,ok,bma_judge_w8_0.8
2591,COLLIE,phi35_i_1.,0,0.9,0.005288461538461539,bma_judge_w8,ok,bma_judge_w8_0.9
2592,COLLIE,phi35_i_1.,0,0.1,0.31298076923076923,bma_judge,ok,bma_judge_0.1
2593,COLLIE,phi35_i_1.,0,0.2,0.31298076923076923,bma_judge,ok,bma_judge_0.2
2594,COLLIE,phi35_i_1.,0,0.30000000000000004,0.31298076923076923,bma_judge,ok,bma_judge_0.30000000000000004
2595,COLLIE,phi35_i_1.,0,0.4,0.31298076923076923,bma_judge,ok,bma_judge_0.4
2596,COLLIE,phi35_i_1.,0,0.5,0.011057692307692308,bma_judge,ok,bma_judge_0.5
2597,COLLIE,phi35_i_1.,0,0.6,0.011057692307692308,bma_judge,ok,bma_judge_0.6
2598,COLLIE,phi35_i_1.,0,0.7000000000000001,0.011057692307692308,bma_judge,ok,bma_judge_0.7000000000000001
2599,COLLIE,phi35_i_1.,0,0.8,0.011057692307692308,bma_judge,ok,bma_judge_0.8
2600,COLLIE,phi35_i_1.,0,0.9,0.011057692307692308,bma_judge,ok,bma_judge_0.9
2601,SQUAD,llama3_70b_i_1.,5945,0.1,0.18556005398110662,bleu,ok,bleu_0.1
2602,SQUAD,llama3_70b_i_1.,5945,0.2,0.14035087719298245,bleu,ok,bleu_0.2
2603,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.09547908232118758,bleu,ok,bleu_0.30000000000000004
2604,SQUAD,llama3_70b_i_1.,5945,0.4,0.0664642375168691,bleu,ok,bleu_0.4
2605,SQUAD,llama3_70b_i_1.,5945,0.5,0.04520917678812416,bleu,ok,bleu_0.5
2606,SQUAD,llama3_70b_i_1.,5945,0.6,0.02699055330634278,bleu,ok,bleu_0.6
2607,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.016194331983805668,bleu,ok,bleu_0.7000000000000001
2608,SQUAD,llama3_70b_i_1.,5945,0.8,0.010458839406207827,bleu,ok,bleu_0.8
2609,SQUAD,llama3_70b_i_1.,5945,0.9,0.00775978407557355,bleu,ok,bleu_0.9
2610,SQUAD,llama3_70b_i_1.,5945,0.1,0.39068825910931176,bleu_adapt,ok,bleu_adapt_0.1
2611,SQUAD,llama3_70b_i_1.,5945,0.2,0.22874493927125505,bleu_adapt,ok,bleu_adapt_0.2
2612,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.16177462887989202,bleu_adapt,ok,bleu_adapt_0.30000000000000004
2613,SQUAD,llama3_70b_i_1.,5945,0.4,0.12550607287449392,bleu_adapt,ok,bleu_adapt_0.4
2614,SQUAD,llama3_70b_i_1.,5945,0.5,0.09851551956815115,bleu_adapt,ok,bleu_adapt_0.5
2615,SQUAD,llama3_70b_i_1.,5945,0.6,0.07759784075573549,bleu_adapt,ok,bleu_adapt_0.6
2616,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.0641025641025641,bleu_adapt,ok,bleu_adapt_0.7000000000000001
2617,SQUAD,llama3_70b_i_1.,5945,0.8,0.05819838056680162,bleu_adapt,ok,bleu_adapt_0.8
2618,SQUAD,llama3_70b_i_1.,5945,0.9,0.055499325236167345,bleu_adapt,ok,bleu_adapt_0.9
2619,SQUAD,llama3_70b_i_1.,5945,0.1,0.9426450742240216,rouge1,ok,rouge1_0.1
2620,SQUAD,llama3_70b_i_1.,5945,0.2,0.7810391363022942,rouge1,ok,rouge1_0.2
2621,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.6401821862348178,rouge1,ok,rouge1_0.30000000000000004
2622,SQUAD,llama3_70b_i_1.,5945,0.4,0.4848178137651822,rouge1,ok,rouge1_0.4
2623,SQUAD,llama3_70b_i_1.,5945,0.5,0.37449392712550605,rouge1,ok,rouge1_0.5
2624,SQUAD,llama3_70b_i_1.,5945,0.6,0.30718623481781376,rouge1,ok,rouge1_0.6
2625,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.24021592442645073,rouge1,ok,rouge1_0.7000000000000001
2626,SQUAD,llama3_70b_i_1.,5945,0.8,0.18943994601889338,rouge1,ok,rouge1_0.8
2627,SQUAD,llama3_70b_i_1.,5945,0.9,0.16177462887989202,rouge1,ok,rouge1_0.9
2628,SQUAD,llama3_70b_i_1.,5945,0.1,0.6734143049932524,rouge2,ok,rouge2_0.1
2629,SQUAD,llama3_70b_i_1.,5945,0.2,0.5391363022941971,rouge2,ok,rouge2_0.2
2630,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.428306342780027,rouge2,ok,rouge2_0.30000000000000004
2631,SQUAD,llama3_70b_i_1.,5945,0.4,0.3281039136302294,rouge2,ok,rouge2_0.4
2632,SQUAD,llama3_70b_i_1.,5945,0.5,0.2619770580296896,rouge2,ok,rouge2_0.5
2633,SQUAD,llama3_70b_i_1.,5945,0.6,0.20951417004048584,rouge2,ok,rouge2_0.6
2634,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.1624493927125506,rouge2,ok,rouge2_0.7000000000000001
2635,SQUAD,llama3_70b_i_1.,5945,0.8,0.12719298245614036,rouge2,ok,rouge2_0.8
2636,SQUAD,llama3_70b_i_1.,5945,0.9,0.10897435897435898,rouge2,ok,rouge2_0.9
2637,SQUAD,llama3_70b_i_1.,5945,0.1,0.9406207827260459,rougeL,ok,rougeL_0.1
2638,SQUAD,llama3_70b_i_1.,5945,0.2,0.7744601889338731,rougeL,ok,rougeL_0.2
2639,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.6325910931174089,rougeL,ok,rougeL_0.30000000000000004
2640,SQUAD,llama3_70b_i_1.,5945,0.4,0.47874493927125505,rougeL,ok,rougeL_0.4
2641,SQUAD,llama3_70b_i_1.,5945,0.5,0.37280701754385964,rougeL,ok,rougeL_0.5
2642,SQUAD,llama3_70b_i_1.,5945,0.6,0.30516194331983804,rougeL,ok,rougeL_0.6
2643,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.23717948717948717,rougeL,ok,rougeL_0.7000000000000001
2644,SQUAD,llama3_70b_i_1.,5945,0.8,0.1887651821862348,rougeL,ok,rougeL_0.8
2645,SQUAD,llama3_70b_i_1.,5945,0.9,0.1607624831309042,rougeL,ok,rougeL_0.9
2646,SQUAD,llama3_70b_i_1.,5945,0.1,0.9409581646423751,rougeLsum,ok,rougeLsum_0.1
2647,SQUAD,llama3_70b_i_1.,5945,0.2,0.7744601889338731,rougeLsum,ok,rougeLsum_0.2
2648,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.6325910931174089,rougeLsum,ok,rougeLsum_0.30000000000000004
2649,SQUAD,llama3_70b_i_1.,5945,0.4,0.47874493927125505,rougeLsum,ok,rougeLsum_0.4
2650,SQUAD,llama3_70b_i_1.,5945,0.5,0.37280701754385964,rougeLsum,ok,rougeLsum_0.5
2651,SQUAD,llama3_70b_i_1.,5945,0.6,0.30516194331983804,rougeLsum,ok,rougeLsum_0.6
2652,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.23717948717948717,rougeLsum,ok,rougeLsum_0.7000000000000001
2653,SQUAD,llama3_70b_i_1.,5945,0.8,0.1887651821862348,rougeLsum,ok,rougeLsum_0.8
2654,SQUAD,llama3_70b_i_1.,5945,0.9,0.1607624831309042,rougeLsum,ok,rougeLsum_0.9
2655,SQUAD,llama3_70b_i_1.,5945,0.1,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.1
2656,SQUAD,llama3_70b_i_1.,5945,0.2,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.2
2657,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.30000000000000004
2658,SQUAD,llama3_70b_i_1.,5945,0.4,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.4
2659,SQUAD,llama3_70b_i_1.,5945,0.5,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.5
2660,SQUAD,llama3_70b_i_1.,5945,0.6,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.6
2661,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.7000000000000001
2662,SQUAD,llama3_70b_i_1.,5945,0.8,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.8
2663,SQUAD,llama3_70b_i_1.,5945,0.9,0.8478407557354926,j_llama8b_qa_16.0_0.5,ok,j_llama8b_qa_16.0_0.5_0.9
2664,SQUAD,llama3_70b_i_1.,5945,0.1,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
2665,SQUAD,llama3_70b_i_1.,5945,0.2,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
2666,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
2667,SQUAD,llama3_70b_i_1.,5945,0.4,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
2668,SQUAD,llama3_70b_i_1.,5945,0.5,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
2669,SQUAD,llama3_70b_i_1.,5945,0.6,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
2670,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
2671,SQUAD,llama3_70b_i_1.,5945,0.8,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
2672,SQUAD,llama3_70b_i_1.,5945,0.9,0.8755060728744939,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
2673,SQUAD,llama3_70b_i_1.,5945,0.1,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
2674,SQUAD,llama3_70b_i_1.,5945,0.2,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
2675,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
2676,SQUAD,llama3_70b_i_1.,5945,0.4,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
2677,SQUAD,llama3_70b_i_1.,5945,0.5,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
2678,SQUAD,llama3_70b_i_1.,5945,0.6,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
2679,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
2680,SQUAD,llama3_70b_i_1.,5945,0.8,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
2681,SQUAD,llama3_70b_i_1.,5945,0.9,0.8653846153846154,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
2682,SQUAD,llama3_70b_i_1.,5946,0.1,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.1
2683,SQUAD,llama3_70b_i_1.,5946,0.2,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.2
2684,SQUAD,llama3_70b_i_1.,5946,0.30000000000000004,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.30000000000000004
2685,SQUAD,llama3_70b_i_1.,5946,0.4,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.4
2686,SQUAD,llama3_70b_i_1.,5946,0.5,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.5
2687,SQUAD,llama3_70b_i_1.,5946,0.6,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.6
2688,SQUAD,llama3_70b_i_1.,5946,0.7000000000000001,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.7000000000000001
2689,SQUAD,llama3_70b_i_1.,5946,0.8,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.8
2690,SQUAD,llama3_70b_i_1.,5946,0.9,0.8845959169900456,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.9
2691,SQUAD,llama3_70b_i_1.,5945,0.1,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.1
2692,SQUAD,llama3_70b_i_1.,5945,0.2,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.2
2693,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.30000000000000004
2694,SQUAD,llama3_70b_i_1.,5945,0.4,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.4
2695,SQUAD,llama3_70b_i_1.,5945,0.5,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.5
2696,SQUAD,llama3_70b_i_1.,5945,0.6,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.6
2697,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.7000000000000001
2698,SQUAD,llama3_70b_i_1.,5945,0.8,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.8
2699,SQUAD,llama3_70b_i_1.,5945,0.9,0.900472334682861,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.9
2700,SQUAD,llama3_70b_i_1.,5945,0.1,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.1
2701,SQUAD,llama3_70b_i_1.,5945,0.2,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.2
2702,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.30000000000000004
2703,SQUAD,llama3_70b_i_1.,5945,0.4,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.4
2704,SQUAD,llama3_70b_i_1.,5945,0.5,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.5
2705,SQUAD,llama3_70b_i_1.,5945,0.6,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.6
2706,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.7000000000000001
2707,SQUAD,llama3_70b_i_1.,5945,0.8,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.8
2708,SQUAD,llama3_70b_i_1.,5945,0.9,0.9411268556005398,j_llama70b_qa_16.0_0.5,ok,j_llama70b_qa_16.0_0.5_0.9
2709,SQUAD,llama3_70b_i_1.,5945,0.1,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.1
2710,SQUAD,llama3_70b_i_1.,5945,0.2,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.2
2711,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.30000000000000004
2712,SQUAD,llama3_70b_i_1.,5945,0.4,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.4
2713,SQUAD,llama3_70b_i_1.,5945,0.5,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.5
2714,SQUAD,llama3_70b_i_1.,5945,0.6,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.6
2715,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.7000000000000001
2716,SQUAD,llama3_70b_i_1.,5945,0.8,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.8
2717,SQUAD,llama3_70b_i_1.,5945,0.9,0.8442982456140351,j_llama8b_qa_16.0_0.49,ok,j_llama8b_qa_16.0_0.49_0.9
2718,SQUAD,llama3_70b_i_1.,5945,0.1,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.1
2719,SQUAD,llama3_70b_i_1.,5945,0.2,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.2
2720,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.30000000000000004
2721,SQUAD,llama3_70b_i_1.,5945,0.4,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.4
2722,SQUAD,llama3_70b_i_1.,5945,0.5,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.5
2723,SQUAD,llama3_70b_i_1.,5945,0.6,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.6
2724,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.7000000000000001
2725,SQUAD,llama3_70b_i_1.,5945,0.8,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.8
2726,SQUAD,llama3_70b_i_1.,5945,0.9,0.9409581646423751,j_llama70b_qa_16.0_0.49,ok,j_llama70b_qa_16.0_0.49_0.9
2727,SQUAD,llama3_70b_i_1.,0,0.1,0.5007159100480081,ood_label,ok,ood_label_0.1
2728,SQUAD,llama3_70b_i_1.,0,0.2,0.5007159100480081,ood_label,ok,ood_label_0.2
2729,SQUAD,llama3_70b_i_1.,0,0.30000000000000004,0.5007159100480081,ood_label,ok,ood_label_0.30000000000000004
2730,SQUAD,llama3_70b_i_1.,0,0.4,0.5007159100480081,ood_label,ok,ood_label_0.4
2731,SQUAD,llama3_70b_i_1.,0,0.5,0.5007159100480081,ood_label,ok,ood_label_0.5
2732,SQUAD,llama3_70b_i_1.,0,0.6,0.5007159100480081,ood_label,ok,ood_label_0.6
2733,SQUAD,llama3_70b_i_1.,0,0.7000000000000001,0.5007159100480081,ood_label,ok,ood_label_0.7000000000000001
2734,SQUAD,llama3_70b_i_1.,0,0.8,0.5007159100480081,ood_label,ok,ood_label_0.8
2735,SQUAD,llama3_70b_i_1.,0,0.9,0.5007159100480081,ood_label,ok,ood_label_0.9
2736,SQUAD,llama3_70b_i_1.,5945,0.1,0.9694669365721997,bma_judge_w8,ok,bma_judge_w8_0.1
2737,SQUAD,llama3_70b_i_1.,5945,0.2,0.9595141700404858,bma_judge_w8,ok,bma_judge_w8_0.2
2738,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.9421390013495277,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
2739,SQUAD,llama3_70b_i_1.,5945,0.4,0.9272941970310391,bma_judge_w8,ok,bma_judge_w8_0.4
2740,SQUAD,llama3_70b_i_1.,5945,0.5,0.9077260458839406,bma_judge_w8,ok,bma_judge_w8_0.5
2741,SQUAD,llama3_70b_i_1.,5945,0.6,0.9077260458839406,bma_judge_w8,ok,bma_judge_w8_0.6
2742,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.8822537112010796,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
2743,SQUAD,llama3_70b_i_1.,5945,0.8,0.8152834008097166,bma_judge_w8,ok,bma_judge_w8_0.8
2744,SQUAD,llama3_70b_i_1.,5945,0.9,0.6965249662618084,bma_judge_w8,ok,bma_judge_w8_0.9
2745,SQUAD,llama3_70b_i_1.,5945,0.1,0.9588394062078273,bma_judge,ok,bma_judge_0.1
2746,SQUAD,llama3_70b_i_1.,5945,0.2,0.9512483130904184,bma_judge,ok,bma_judge_0.2
2747,SQUAD,llama3_70b_i_1.,5945,0.30000000000000004,0.9512483130904184,bma_judge,ok,bma_judge_0.30000000000000004
2748,SQUAD,llama3_70b_i_1.,5945,0.4,0.9267881241565452,bma_judge,ok,bma_judge_0.4
2749,SQUAD,llama3_70b_i_1.,5945,0.5,0.9100877192982456,bma_judge,ok,bma_judge_0.5
2750,SQUAD,llama3_70b_i_1.,5945,0.6,0.9100877192982456,bma_judge,ok,bma_judge_0.6
2751,SQUAD,llama3_70b_i_1.,5945,0.7000000000000001,0.86555330634278,bma_judge,ok,bma_judge_0.7000000000000001
2752,SQUAD,llama3_70b_i_1.,5945,0.8,0.86555330634278,bma_judge,ok,bma_judge_0.8
2753,SQUAD,llama3_70b_i_1.,5945,0.9,0.7955465587044535,bma_judge,ok,bma_judge_0.9
