,dataset,model,nan_values_in_cor,threshold,accuracy,correctness,status,cor_thr
0,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.4245614035087719,bleu,ok,bleu_0.1
1,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.043859649122807015,bleu,ok,bleu_0.2
2,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.007894736842105263,bleu,ok,bleu_0.30000000000000004
3,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.0,bleu,ok,bleu_0.4
4,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.0,bleu,ok,bleu_0.5
5,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,bleu,ok,bleu_0.6
6,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
7,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,bleu,ok,bleu_0.8
8,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,bleu,ok,bleu_0.9
9,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.4245614035087719,bleu_adapt,ok,bleu_adapt_0.1
10,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.043859649122807015,bleu_adapt,ok,bleu_adapt_0.2
11,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.007894736842105263,bleu_adapt,ok,bleu_adapt_0.30000000000000004
12,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.0,bleu_adapt,ok,bleu_adapt_0.4
13,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.0,bleu_adapt,ok,bleu_adapt_0.5
14,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,bleu_adapt,ok,bleu_adapt_0.6
15,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
16,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
17,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
18,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.9368421052631579,rouge1,ok,rouge1_0.1
19,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.5403508771929825,rouge1,ok,rouge1_0.2
20,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.2,rouge1,ok,rouge1_0.30000000000000004
21,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.034210526315789476,rouge1,ok,rouge1_0.4
22,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.0061403508771929825,rouge1,ok,rouge1_0.5
23,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,rouge1,ok,rouge1_0.6
24,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,rouge1,ok,rouge1_0.7000000000000001
25,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,rouge1,ok,rouge1_0.8
26,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,rouge1,ok,rouge1_0.9
27,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.6912280701754386,rouge2,ok,rouge2_0.1
28,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.18947368421052632,rouge2,ok,rouge2_0.2
29,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.03245614035087719,rouge2,ok,rouge2_0.30000000000000004
30,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.0035087719298245615,rouge2,ok,rouge2_0.4
31,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.0,rouge2,ok,rouge2_0.5
32,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,rouge2,ok,rouge2_0.6
33,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,rouge2,ok,rouge2_0.7000000000000001
34,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,rouge2,ok,rouge2_0.8
35,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,rouge2,ok,rouge2_0.9
36,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.8929824561403509,rougeL,ok,rougeL_0.1
37,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.34035087719298246,rougeL,ok,rougeL_0.2
38,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.04649122807017544,rougeL,ok,rougeL_0.30000000000000004
39,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.009649122807017544,rougeL,ok,rougeL_0.4
40,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.0,rougeL,ok,rougeL_0.5
41,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,rougeL,ok,rougeL_0.6
42,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,rougeL,ok,rougeL_0.7000000000000001
43,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,rougeL,ok,rougeL_0.8
44,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,rougeL,ok,rougeL_0.9
45,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.9333333333333333,rougeLsum,ok,rougeLsum_0.1
46,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.531578947368421,rougeLsum,ok,rougeLsum_0.2
47,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.18596491228070175,rougeLsum,ok,rougeLsum_0.30000000000000004
48,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.03245614035087719,rougeLsum,ok,rougeLsum_0.4
49,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.005263157894736842,rougeLsum,ok,rougeLsum_0.5
50,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.0,rougeLsum,ok,rougeLsum_0.6
51,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,rougeLsum,ok,rougeLsum_0.7000000000000001
52,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.0,rougeLsum,ok,rougeLsum_0.8
53,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
54,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.1,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
55,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.2,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
56,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.30000000000000004,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
57,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.4,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
58,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.5,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
59,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.6,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
60,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.7000000000000001,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
61,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.8,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
62,BCB,rerun/BCB_qwen2_32b_i_0.6,223,0.9,0.6946564885496184,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
63,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
64,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
65,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
66,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
67,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
68,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
69,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
70,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
71,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.7342105263157894,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
72,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
73,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
74,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
75,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
76,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
77,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
78,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
79,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
80,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.606140350877193,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
81,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.1
82,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.2
83,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.30000000000000004
84,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.4
85,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.5
86,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.6
87,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.7000000000000001
88,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.8
89,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.6070175438596491,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.9
90,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.1
91,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.2
92,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.30000000000000004
93,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.4
94,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.5
95,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.6
96,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.7000000000000001
97,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.8
98,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.7333333333333333,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.9
99,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
100,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
101,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
102,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
103,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
104,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
105,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
106,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
107,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.4789473684210526,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
108,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.1,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.1
109,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.2,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.2
110,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.30000000000000004,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.30000000000000004
111,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.4,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.4
112,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.5,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.5
113,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.6,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.6
114,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.7000000000000001,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.7000000000000001
115,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.8,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.8
116,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.9,0.8718173836698858,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.9
117,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.1,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.1
118,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.2,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.2
119,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.30000000000000004,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.30000000000000004
120,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.4,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.4
121,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.5,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.5
122,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.6,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.6
123,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.7000000000000001,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.7000000000000001
124,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.8,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.8
125,BCB,rerun/BCB_qwen2_32b_i_0.6,1,0.9,0.8217734855136084,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.9
126,BCB,rerun/BCB_qwen2_32b_i_0.6,2,0.5,0.17398945518453426,exact_correctness,ok,exact_correctness_0.5
127,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.9175438596491228,bma_judge_w8,ok,bma_judge_w8_0.1
128,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.8736842105263158,bma_judge_w8,ok,bma_judge_w8_0.2
129,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.7991228070175439,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
130,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.75,bma_judge_w8,ok,bma_judge_w8_0.4
131,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.6780701754385965,bma_judge_w8,ok,bma_judge_w8_0.5
132,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.6570175438596492,bma_judge_w8,ok,bma_judge_w8_0.6
133,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.5982456140350877,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
134,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.5342105263157895,bma_judge_w8,ok,bma_judge_w8_0.8
135,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.4131578947368421,bma_judge_w8,ok,bma_judge_w8_0.9
136,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.1,0.9175438596491228,bma_judge,ok,bma_judge_0.1
137,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.2,0.8736842105263158,bma_judge,ok,bma_judge_0.2
138,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.30000000000000004,0.7991228070175439,bma_judge,ok,bma_judge_0.30000000000000004
139,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.4,0.75,bma_judge,ok,bma_judge_0.4
140,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.5,0.6780701754385965,bma_judge,ok,bma_judge_0.5
141,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.6,0.6570175438596492,bma_judge,ok,bma_judge_0.6
142,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.7000000000000001,0.5982456140350877,bma_judge,ok,bma_judge_0.7000000000000001
143,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.8,0.5342105263157895,bma_judge,ok,bma_judge_0.8
144,BCB,rerun/BCB_qwen2_32b_i_0.6,0,0.9,0.4131578947368421,bma_judge,ok,bma_judge_0.9
145,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.6103515625,bleu,ok,bleu_0.1
146,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.421875,bleu,ok,bleu_0.2
147,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.306640625,bleu,ok,bleu_0.30000000000000004
148,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.1806640625,bleu,ok,bleu_0.4
149,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.1044921875,bleu,ok,bleu_0.5
150,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.052734375,bleu,ok,bleu_0.6
151,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.0205078125,bleu,ok,bleu_0.7000000000000001
152,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.0078125,bleu,ok,bleu_0.8
153,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.0,bleu,ok,bleu_0.9
154,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.6103515625,bleu_adapt,ok,bleu_adapt_0.1
155,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.421875,bleu_adapt,ok,bleu_adapt_0.2
156,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.306640625,bleu_adapt,ok,bleu_adapt_0.30000000000000004
157,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.1806640625,bleu_adapt,ok,bleu_adapt_0.4
158,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.1044921875,bleu_adapt,ok,bleu_adapt_0.5
159,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.052734375,bleu_adapt,ok,bleu_adapt_0.6
160,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.0205078125,bleu_adapt,ok,bleu_adapt_0.7000000000000001
161,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.0078125,bleu_adapt,ok,bleu_adapt_0.8
162,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
163,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.9560546875,rouge1,ok,rouge1_0.1
164,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.69140625,rouge1,ok,rouge1_0.2
165,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.5126953125,rouge1,ok,rouge1_0.30000000000000004
166,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.4267578125,rouge1,ok,rouge1_0.4
167,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.330078125,rouge1,ok,rouge1_0.5
168,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.2099609375,rouge1,ok,rouge1_0.6
169,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.109375,rouge1,ok,rouge1_0.7000000000000001
170,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.041015625,rouge1,ok,rouge1_0.8
171,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.009765625,rouge1,ok,rouge1_0.9
172,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.7900390625,rouge2,ok,rouge2_0.1
173,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.494140625,rouge2,ok,rouge2_0.2
174,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.353515625,rouge2,ok,rouge2_0.30000000000000004
175,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.2333984375,rouge2,ok,rouge2_0.4
176,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.1318359375,rouge2,ok,rouge2_0.5
177,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.0595703125,rouge2,ok,rouge2_0.6
178,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.025390625,rouge2,ok,rouge2_0.7000000000000001
179,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.0087890625,rouge2,ok,rouge2_0.8
180,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.0,rouge2,ok,rouge2_0.9
181,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.9033203125,rougeL,ok,rougeL_0.1
182,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.5869140625,rougeL,ok,rougeL_0.2
183,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.4443359375,rougeL,ok,rougeL_0.30000000000000004
184,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.3349609375,rougeL,ok,rougeL_0.4
185,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.21484375,rougeL,ok,rougeL_0.5
186,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.1103515625,rougeL,ok,rougeL_0.6
187,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.0478515625,rougeL,ok,rougeL_0.7000000000000001
188,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.0185546875,rougeL,ok,rougeL_0.8
189,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.0048828125,rougeL,ok,rougeL_0.9
190,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.9541015625,rougeLsum,ok,rougeLsum_0.1
191,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.6865234375,rougeLsum,ok,rougeLsum_0.2
192,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.5107421875,rougeLsum,ok,rougeLsum_0.30000000000000004
193,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.4228515625,rougeLsum,ok,rougeLsum_0.4
194,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.326171875,rougeLsum,ok,rougeLsum_0.5
195,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.201171875,rougeLsum,ok,rougeLsum_0.6
196,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.1025390625,rougeLsum,ok,rougeLsum_0.7000000000000001
197,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.041015625,rougeLsum,ok,rougeLsum_0.8
198,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.0087890625,rougeLsum,ok,rougeLsum_0.9
199,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.1,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
200,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.2,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
201,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.30000000000000004,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
202,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.4,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
203,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.5,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
204,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.6,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
205,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.7000000000000001,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
206,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.8,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
207,BCB,rerun/BCB_llama3_70b_i_0.6,339,0.9,0.5942571785268415,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
208,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
209,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
210,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
211,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
212,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
213,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
214,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
215,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
216,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.400390625,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
217,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
218,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
219,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
220,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
221,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
222,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
223,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
224,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
225,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.8173828125,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
226,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
227,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
228,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
229,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
230,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
231,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
232,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
233,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
234,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.71875,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
235,BCB,rerun/BCB_llama3_70b_i_0.6,1,0.5,0.3520632133450395,exact_correctness,ok,exact_correctness_0.5
236,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.8525390625,bma_judge_w8,ok,bma_judge_w8_0.1
237,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.8525390625,bma_judge_w8,ok,bma_judge_w8_0.2
238,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.751953125,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
239,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.732421875,bma_judge_w8,ok,bma_judge_w8_0.4
240,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.59765625,bma_judge_w8,ok,bma_judge_w8_0.5
241,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.59765625,bma_judge_w8,ok,bma_judge_w8_0.6
242,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.513671875,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
243,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.390625,bma_judge_w8,ok,bma_judge_w8_0.8
244,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.390625,bma_judge_w8,ok,bma_judge_w8_0.9
245,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.1,0.8525390625,bma_judge,ok,bma_judge_0.1
246,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.2,0.8525390625,bma_judge,ok,bma_judge_0.2
247,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.30000000000000004,0.751953125,bma_judge,ok,bma_judge_0.30000000000000004
248,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.4,0.732421875,bma_judge,ok,bma_judge_0.4
249,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.5,0.59765625,bma_judge,ok,bma_judge_0.5
250,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.6,0.59765625,bma_judge,ok,bma_judge_0.6
251,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.7000000000000001,0.513671875,bma_judge,ok,bma_judge_0.7000000000000001
252,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.8,0.390625,bma_judge,ok,bma_judge_0.8
253,BCB,rerun/BCB_llama3_70b_i_0.6,116,0.9,0.390625,bma_judge,ok,bma_judge_0.9
254,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.3929824561403509,bleu,ok,bleu_0.1
255,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.028070175438596492,bleu,ok,bleu_0.2
256,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.0017543859649122807,bleu,ok,bleu_0.30000000000000004
257,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.0,bleu,ok,bleu_0.4
258,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.0,bleu,ok,bleu_0.5
259,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,bleu,ok,bleu_0.6
260,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
261,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,bleu,ok,bleu_0.8
262,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,bleu,ok,bleu_0.9
263,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.3929824561403509,bleu_adapt,ok,bleu_adapt_0.1
264,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.028070175438596492,bleu_adapt,ok,bleu_adapt_0.2
265,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.0017543859649122807,bleu_adapt,ok,bleu_adapt_0.30000000000000004
266,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.0,bleu_adapt,ok,bleu_adapt_0.4
267,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.0,bleu_adapt,ok,bleu_adapt_0.5
268,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,bleu_adapt,ok,bleu_adapt_0.6
269,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
270,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
271,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
272,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.9456140350877194,rouge1,ok,rouge1_0.1
273,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.5149122807017544,rouge1,ok,rouge1_0.2
274,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.1692982456140351,rouge1,ok,rouge1_0.30000000000000004
275,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.02543859649122807,rouge1,ok,rouge1_0.4
276,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.002631578947368421,rouge1,ok,rouge1_0.5
277,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,rouge1,ok,rouge1_0.6
278,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,rouge1,ok,rouge1_0.7000000000000001
279,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,rouge1,ok,rouge1_0.8
280,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,rouge1,ok,rouge1_0.9
281,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.6535087719298246,rouge2,ok,rouge2_0.1
282,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.15,rouge2,ok,rouge2_0.2
283,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.015789473684210527,rouge2,ok,rouge2_0.30000000000000004
284,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.0017543859649122807,rouge2,ok,rouge2_0.4
285,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.0,rouge2,ok,rouge2_0.5
286,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,rouge2,ok,rouge2_0.6
287,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,rouge2,ok,rouge2_0.7000000000000001
288,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,rouge2,ok,rouge2_0.8
289,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,rouge2,ok,rouge2_0.9
290,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.9096491228070176,rougeL,ok,rougeL_0.1
291,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.32719298245614037,rougeL,ok,rougeL_0.2
292,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.03508771929824561,rougeL,ok,rougeL_0.30000000000000004
293,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.002631578947368421,rougeL,ok,rougeL_0.4
294,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.0,rougeL,ok,rougeL_0.5
295,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,rougeL,ok,rougeL_0.6
296,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,rougeL,ok,rougeL_0.7000000000000001
297,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,rougeL,ok,rougeL_0.8
298,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,rougeL,ok,rougeL_0.9
299,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.9368421052631579,rougeLsum,ok,rougeLsum_0.1
300,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.5017543859649123,rougeLsum,ok,rougeLsum_0.2
301,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.15789473684210525,rougeLsum,ok,rougeLsum_0.30000000000000004
302,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.02543859649122807,rougeLsum,ok,rougeLsum_0.4
303,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.002631578947368421,rougeLsum,ok,rougeLsum_0.5
304,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.0,rougeLsum,ok,rougeLsum_0.6
305,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.0,rougeLsum,ok,rougeLsum_0.7000000000000001
306,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.0,rougeLsum,ok,rougeLsum_0.8
307,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
308,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.1,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
309,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.2,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
310,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.30000000000000004,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
311,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.4,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
312,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.5,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
313,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.6,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
314,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.7000000000000001,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
315,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.8,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
316,BCB,rerun/BCB_qwen2_7b_i_0.6,223,0.9,0.579062159214831,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
317,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
318,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
319,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
320,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
321,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
322,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
323,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
324,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
325,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.6517543859649123,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
326,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
327,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
328,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
329,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
330,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
331,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
332,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
333,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
334,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.4807017543859649,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
335,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.1
336,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.2
337,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.30000000000000004
338,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.4
339,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.5
340,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.6
341,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.7000000000000001
342,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.8
343,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.4763157894736842,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.9
344,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.1
345,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.2
346,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.30000000000000004
347,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.4
348,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.5
349,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.6
350,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.7000000000000001
351,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.8
352,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.6464912280701754,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.9
353,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
354,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
355,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
356,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
357,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
358,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
359,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
360,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
361,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.37456140350877193,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
362,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
363,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
364,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
365,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
366,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
367,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
368,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
369,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
370,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.7824561403508772,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
371,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
372,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
373,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
374,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
375,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
376,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
377,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
378,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
379,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.7403508771929824,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
380,BCB,rerun/BCB_qwen2_7b_i_0.6,3,0.5,0.06948109058927,exact_correctness,ok,exact_correctness_0.5
381,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.8403508771929824,bma_judge_w8,ok,bma_judge_w8_0.1
382,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.793859649122807,bma_judge_w8,ok,bma_judge_w8_0.2
383,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.6991228070175438,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
384,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.643859649122807,bma_judge_w8,ok,bma_judge_w8_0.4
385,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.5780701754385965,bma_judge_w8,ok,bma_judge_w8_0.5
386,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.5587719298245614,bma_judge_w8,ok,bma_judge_w8_0.6
387,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.4885964912280702,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
388,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.4070175438596491,bma_judge_w8,ok,bma_judge_w8_0.8
389,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.30350877192982456,bma_judge_w8,ok,bma_judge_w8_0.9
390,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.1,0.8403508771929824,bma_judge,ok,bma_judge_0.1
391,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.2,0.793859649122807,bma_judge,ok,bma_judge_0.2
392,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.30000000000000004,0.6991228070175438,bma_judge,ok,bma_judge_0.30000000000000004
393,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.4,0.643859649122807,bma_judge,ok,bma_judge_0.4
394,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.5,0.5780701754385965,bma_judge,ok,bma_judge_0.5
395,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.6,0.5587719298245614,bma_judge,ok,bma_judge_0.6
396,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.7000000000000001,0.4885964912280702,bma_judge,ok,bma_judge_0.7000000000000001
397,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.8,0.4070175438596491,bma_judge,ok,bma_judge_0.8
398,BCB,rerun/BCB_qwen2_7b_i_0.6,0,0.9,0.30350877192982456,bma_judge,ok,bma_judge_0.9
399,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.17729419703103913,bleu,ok,bleu_0.1
400,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.11656545209176788,bleu,ok,bleu_0.2
401,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.06713900134952766,bleu,ok,bleu_0.30000000000000004
402,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.038461538461538464,bleu,ok,bleu_0.4
403,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.020917678812415654,bleu,ok,bleu_0.5
404,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.009615384615384616,bleu,ok,bleu_0.6
405,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.004892037786774629,bleu,ok,bleu_0.7000000000000001
406,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.0015182186234817814,bleu,ok,bleu_0.8
407,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.00033738191632928474,bleu,ok,bleu_0.9
408,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.35846828609986503,bleu_adapt,ok,bleu_adapt_0.1
409,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.14861673414304993,bleu_adapt,ok,bleu_adapt_0.2
410,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.07574224021592442,bleu_adapt,ok,bleu_adapt_0.30000000000000004
411,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.04234143049932523,bleu_adapt,ok,bleu_adapt_0.4
412,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.023785425101214574,bleu_adapt,ok,bleu_adapt_0.5
413,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.012145748987854251,bleu_adapt,ok,bleu_adapt_0.6
414,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.007253711201079622,bleu_adapt,ok,bleu_adapt_0.7000000000000001
415,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.003879892037786775,bleu_adapt,ok,bleu_adapt_0.8
416,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.002699055330634278,bleu_adapt,ok,bleu_adapt_0.9
417,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9458502024291497,rouge1,ok,rouge1_0.1
418,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.7309379217273954,rouge1,ok,rouge1_0.2
419,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.5281713900134952,rouge1,ok,rouge1_0.30000000000000004
420,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.324055330634278,rouge1,ok,rouge1_0.4
421,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.19247638326585695,rouge1,ok,rouge1_0.5
422,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.11454116059379217,rouge1,ok,rouge1_0.6
423,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.06039136302294197,rouge1,ok,rouge1_0.7000000000000001
424,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.02850877192982456,rouge1,ok,rouge1_0.8
425,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.015350877192982455,rouge1,ok,rouge1_0.9
426,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.6789811066126855,rouge2,ok,rouge2_0.1
427,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.47790148448043185,rouge2,ok,rouge2_0.2
428,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.3340080971659919,rouge2,ok,rouge2_0.30000000000000004
429,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.19973009446693657,rouge2,ok,rouge2_0.4
430,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.12112010796221323,rouge2,ok,rouge2_0.5
431,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.071693657219973,rouge2,ok,rouge2_0.6
432,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.037112010796221326,rouge2,ok,rouge2_0.7000000000000001
433,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.018049932523616734,rouge2,ok,rouge2_0.8
434,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.010290148448043185,rouge2,ok,rouge2_0.9
435,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9433198380566802,rougeL,ok,rougeL_0.1
436,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.722165991902834,rougeL,ok,rougeL_0.2
437,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.5210863697705803,rougeL,ok,rougeL_0.30000000000000004
438,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.31511470985155193,rougeL,ok,rougeL_0.4
439,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.18572874493927125,rougeL,ok,rougeL_0.5
440,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.1082995951417004,rougeL,ok,rougeL_0.6
441,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.05819838056680162,rougeL,ok,rougeL_0.7000000000000001
442,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.027159244264507422,rougeL,ok,rougeL_0.8
443,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.015350877192982455,rougeL,ok,rougeL_0.9
444,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9433198380566802,rougeLsum,ok,rougeLsum_0.1
445,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.722165991902834,rougeLsum,ok,rougeLsum_0.2
446,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.5210863697705803,rougeLsum,ok,rougeLsum_0.30000000000000004
447,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.31511470985155193,rougeLsum,ok,rougeLsum_0.4
448,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.1858974358974359,rougeLsum,ok,rougeLsum_0.5
449,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.1082995951417004,rougeLsum,ok,rougeLsum_0.6
450,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.05819838056680162,rougeLsum,ok,rougeLsum_0.7000000000000001
451,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.027159244264507422,rougeLsum,ok,rougeLsum_0.8
452,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.015350877192982455,rougeLsum,ok,rougeLsum_0.9
453,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.1
454,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.2
455,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.30000000000000004
456,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.4
457,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.5
458,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.6
459,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.7000000000000001
460,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.8
461,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.9308367071524967,j_qwen32b_qa_16.0_0.5,ok,j_qwen32b_qa_16.0_0.5_0.9
462,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.1
463,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.2
464,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.30000000000000004
465,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.4
466,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.5
467,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.6
468,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.7000000000000001
469,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.8
470,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.9423076923076923,j_qwen32b_gen_16.0_0.49,ok,j_qwen32b_gen_16.0_0.49_0.9
471,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.1
472,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.2
473,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.30000000000000004
474,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.4
475,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.5
476,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.6
477,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.7000000000000001
478,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.8
479,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.9308367071524967,j_qwen32b_qa_16.0_0.49,ok,j_qwen32b_qa_16.0_0.49_0.9
480,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.1,0.5007159100480081,ood_label,ok,ood_label_0.1
481,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.2,0.5007159100480081,ood_label,ok,ood_label_0.2
482,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.30000000000000004,0.5007159100480081,ood_label,ok,ood_label_0.30000000000000004
483,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.4,0.5007159100480081,ood_label,ok,ood_label_0.4
484,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.5,0.5007159100480081,ood_label,ok,ood_label_0.5
485,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.6,0.5007159100480081,ood_label,ok,ood_label_0.6
486,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.7000000000000001,0.5007159100480081,ood_label,ok,ood_label_0.7000000000000001
487,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.8,0.5007159100480081,ood_label,ok,ood_label_0.8
488,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,0,0.9,0.5007159100480081,ood_label,ok,ood_label_0.9
489,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.979251012145749,bma_judge_w8,ok,bma_judge_w8_0.1
490,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.979251012145749,bma_judge_w8,ok,bma_judge_w8_0.2
491,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.979251012145749,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
492,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.944838056680162,bma_judge_w8,ok,bma_judge_w8_0.4
493,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.944838056680162,bma_judge_w8,ok,bma_judge_w8_0.5
494,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.944838056680162,bma_judge_w8,ok,bma_judge_w8_0.6
495,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.8798920377867746,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
496,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.8798920377867746,bma_judge_w8,ok,bma_judge_w8_0.8
497,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.8798920377867746,bma_judge_w8,ok,bma_judge_w8_0.9
498,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.1,0.979251012145749,bma_judge,ok,bma_judge_0.1
499,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.2,0.979251012145749,bma_judge,ok,bma_judge_0.2
500,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.30000000000000004,0.979251012145749,bma_judge,ok,bma_judge_0.30000000000000004
501,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.4,0.944838056680162,bma_judge,ok,bma_judge_0.4
502,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.5,0.944838056680162,bma_judge,ok,bma_judge_0.5
503,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.6,0.944838056680162,bma_judge,ok,bma_judge_0.6
504,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.7000000000000001,0.8798920377867746,bma_judge,ok,bma_judge_0.7000000000000001
505,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.8,0.8798920377867746,bma_judge,ok,bma_judge_0.8
506,SQUAD,rerun/SQUAD_qwen2_32b_i_0.6,5945,0.9,0.8798920377867746,bma_judge,ok,bma_judge_0.9
507,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.09632970061380433,bleu,ok,bleu_0.1
508,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.0769134410622573,bleu,ok,bleu_0.2
509,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.057246649129399974,bleu,ok,bleu_0.30000000000000004
510,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.041964173869472626,bleu,ok,bleu_0.4
511,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.03256920957033697,bleu,ok,bleu_0.5
512,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.021169986220719028,bleu,ok,bleu_0.6
513,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.015157209069272204,bleu,ok,bleu_0.7000000000000001
514,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.010522360015031942,bleu,ok,bleu_0.8
515,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.00751597143930853,bleu,ok,bleu_0.9
516,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.2970061380433421,bleu_adapt,ok,bleu_adapt_0.1
517,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.16409870975823626,bleu_adapt,ok,bleu_adapt_0.2
518,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.11712388826255793,bleu_adapt,ok,bleu_adapt_0.30000000000000004
519,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.080170362019291,bleu_adapt,ok,bleu_adapt_0.4
520,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.05975197294250282,bleu_adapt,ok,bleu_adapt_0.5
521,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.04634849054240261,bleu_adapt,ok,bleu_adapt_0.6
522,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.03745459100588751,bleu_adapt,ok,bleu_adapt_0.7000000000000001
523,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.0320681448077164,bleu_adapt,ok,bleu_adapt_0.8
524,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.029061756231992985,bleu_adapt,ok,bleu_adapt_0.9
525,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.8709758236252035,rouge1,ok,rouge1_0.1
526,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.6699235876237004,rouge1,ok,rouge1_0.2
527,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.5227358136039083,rouge1,ok,rouge1_0.30000000000000004
528,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.3902041838907679,rouge1,ok,rouge1_0.4
529,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.296004008518101,rouge1,ok,rouge1_0.5
530,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.24577226606538896,rouge1,ok,rouge1_0.6
531,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.1895277464612301,rouge1,ok,rouge1_0.7000000000000001
532,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.14944256545158463,rouge1,ok,rouge1_0.8
533,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.13178003256920956,rouge1,ok,rouge1_0.9
534,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.4473255668295127,rouge2,ok,rouge2_0.1
535,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.37166478767380684,rouge2,ok,rouge2_0.2
536,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.3020167856695478,rouge2,ok,rouge2_0.30000000000000004
537,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.22623074032318677,rouge2,ok,rouge2_0.4
538,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.17812852311161217,rouge2,ok,rouge2_0.5
539,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.14543404735062007,rouge2,ok,rouge2_0.6
540,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.1185018163597645,rouge2,ok,rouge2_0.7000000000000001
541,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.09557810346987348,rouge2,ok,rouge2_0.8
542,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.0840536139296004,rouge2,ok,rouge2_0.9
543,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.868721032193411,rougeL,ok,rougeL_0.1
544,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.6647876738068396,rougeL,ok,rougeL_0.2
545,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.5173493674057372,rougeL,ok,rougeL_0.30000000000000004
546,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.38419140673932106,rougeL,ok,rougeL_0.4
547,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.2913691594638607,rougeL,ok,rougeL_0.5
548,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.24226481272704498,rougeL,ok,rougeL_0.6
549,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.1882750845546787,rougeL,ok,rougeL_0.7000000000000001
550,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.1488162344983089,rougeL,ok,rougeL_0.8
551,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.13127896780658901,rougeL,ok,rougeL_0.9
552,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.868721032193411,rougeLsum,ok,rougeLsum_0.1
553,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.6647876738068396,rougeLsum,ok,rougeLsum_0.2
554,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.5173493674057372,rougeLsum,ok,rougeLsum_0.30000000000000004
555,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.3843166729299762,rougeLsum,ok,rougeLsum_0.4
556,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.29149442565451583,rougeLsum,ok,rougeLsum_0.5
557,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.24226481272704498,rougeLsum,ok,rougeLsum_0.6
558,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.1882750845546787,rougeLsum,ok,rougeLsum_0.7000000000000001
559,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.1488162344983089,rougeLsum,ok,rougeLsum_0.8
560,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.13115370161593387,rougeLsum,ok,rougeLsum_0.9
561,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
562,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
563,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
564,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
565,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
566,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
567,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
568,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
569,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.9103094074909182,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
570,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
571,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
572,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
573,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
574,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
575,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
576,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
577,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
578,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.8619566578980333,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
579,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.1
580,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.2
581,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.30000000000000004
582,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.4
583,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.5
584,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.6
585,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.7000000000000001
586,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.8
587,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.910935738444194,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.9
588,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.1,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.1
589,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.2,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.2
590,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.30000000000000004,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.30000000000000004
591,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.4,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.4
592,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.5,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.5
593,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.6,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.6
594,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.7000000000000001,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.7000000000000001
595,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.8,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.8
596,COQA,rerun/COQA_qwen2_32b_i_0.6,1,0.9,0.8604359809571536,j_llama70b_gen_16.0_0.49,ok,j_llama70b_gen_16.0_0.49_0.9
597,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.9487661280220469,bma_judge_w8,ok,bma_judge_w8_0.1
598,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.9487661280220469,bma_judge_w8,ok,bma_judge_w8_0.2
599,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.9430038832519103,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
600,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.9430038832519103,bma_judge_w8,ok,bma_judge_w8_0.4
601,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.8347738945258675,bma_judge_w8,ok,bma_judge_w8_0.5
602,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.8347738945258675,bma_judge_w8,ok,bma_judge_w8_0.6
603,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.8347738945258675,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
604,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.8171113616434924,bma_judge_w8,ok,bma_judge_w8_0.8
605,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.8171113616434924,bma_judge_w8,ok,bma_judge_w8_0.9
606,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.1,0.9487661280220469,bma_judge,ok,bma_judge_0.1
607,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.2,0.9487661280220469,bma_judge,ok,bma_judge_0.2
608,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.30000000000000004,0.9430038832519103,bma_judge,ok,bma_judge_0.30000000000000004
609,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.4,0.9430038832519103,bma_judge,ok,bma_judge_0.4
610,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.5,0.8347738945258675,bma_judge,ok,bma_judge_0.5
611,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.6,0.8347738945258675,bma_judge,ok,bma_judge_0.6
612,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.7000000000000001,0.8347738945258675,bma_judge,ok,bma_judge_0.7000000000000001
613,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.8,0.8171113616434924,bma_judge,ok,bma_judge_0.8
614,COQA,rerun/COQA_qwen2_32b_i_0.6,0,0.9,0.8171113616434924,bma_judge,ok,bma_judge_0.9
615,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.10048076923076923,bleu,ok,bleu_0.1
616,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.0764423076923077,bleu,ok,bleu_0.2
617,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.047596153846153844,bleu,ok,bleu_0.30000000000000004
618,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.020673076923076922,bleu,ok,bleu_0.4
619,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.0028846153846153848,bleu,ok,bleu_0.5
620,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.0009615384615384616,bleu,ok,bleu_0.6
621,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,bleu,ok,bleu_0.7000000000000001
622,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,bleu,ok,bleu_0.8
623,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,bleu,ok,bleu_0.9
624,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.10144230769230769,bleu_adapt,ok,bleu_adapt_0.1
625,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.07692307692307693,bleu_adapt,ok,bleu_adapt_0.2
626,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.047596153846153844,bleu_adapt,ok,bleu_adapt_0.30000000000000004
627,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.020673076923076922,bleu_adapt,ok,bleu_adapt_0.4
628,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.0028846153846153848,bleu_adapt,ok,bleu_adapt_0.5
629,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.0009615384615384616,bleu_adapt,ok,bleu_adapt_0.6
630,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0,bleu_adapt,ok,bleu_adapt_0.7000000000000001
631,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
632,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
633,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.6807692307692308,rouge1,ok,rouge1_0.1
634,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.4625,rouge1,ok,rouge1_0.2
635,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.23221153846153847,rouge1,ok,rouge1_0.30000000000000004
636,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.11730769230769231,rouge1,ok,rouge1_0.4
637,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.06394230769230769,rouge1,ok,rouge1_0.5
638,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.020192307692307693,rouge1,ok,rouge1_0.6
639,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0014423076923076924,rouge1,ok,rouge1_0.7000000000000001
640,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,rouge1,ok,rouge1_0.8
641,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,rouge1,ok,rouge1_0.9
642,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.14086538461538461,rouge2,ok,rouge2_0.1
643,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.10721153846153846,rouge2,ok,rouge2_0.2
644,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.07403846153846154,rouge2,ok,rouge2_0.30000000000000004
645,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.03173076923076923,rouge2,ok,rouge2_0.4
646,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.009134615384615385,rouge2,ok,rouge2_0.5
647,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.0009615384615384616,rouge2,ok,rouge2_0.6
648,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0004807692307692308,rouge2,ok,rouge2_0.7000000000000001
649,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,rouge2,ok,rouge2_0.8
650,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,rouge2,ok,rouge2_0.9
651,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.6341346153846154,rougeL,ok,rougeL_0.1
652,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.2860576923076923,rougeL,ok,rougeL_0.2
653,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.14423076923076922,rougeL,ok,rougeL_0.30000000000000004
654,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.07115384615384615,rougeL,ok,rougeL_0.4
655,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.027403846153846154,rougeL,ok,rougeL_0.5
656,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.005288461538461539,rougeL,ok,rougeL_0.6
657,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0004807692307692308,rougeL,ok,rougeL_0.7000000000000001
658,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,rougeL,ok,rougeL_0.8
659,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,rougeL,ok,rougeL_0.9
660,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.6336538461538461,rougeLsum,ok,rougeLsum_0.1
661,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.28894230769230766,rougeLsum,ok,rougeLsum_0.2
662,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.15576923076923077,rougeLsum,ok,rougeLsum_0.30000000000000004
663,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.09182692307692308,rougeLsum,ok,rougeLsum_0.4
664,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.04807692307692308,rougeLsum,ok,rougeLsum_0.5
665,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.0125,rougeLsum,ok,rougeLsum_0.6
666,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0004807692307692308,rougeLsum,ok,rougeLsum_0.7000000000000001
667,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0,rougeLsum,ok,rougeLsum_0.8
668,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0,rougeLsum,ok,rougeLsum_0.9
669,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.1,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.1
670,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.2,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.2
671,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.30000000000000004,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.30000000000000004
672,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.4,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.4
673,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.5,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.5
674,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.6,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.6
675,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.7000000000000001,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.7000000000000001
676,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.8,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.8
677,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,223,0.9,0.6558966074313409,j_qwen32b_gen_16.0_0.5,ok,j_qwen32b_gen_16.0_0.5_0.9
678,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
679,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
680,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
681,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
682,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
683,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
684,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
685,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
686,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.5091346153846154,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
687,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
688,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
689,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
690,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
691,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
692,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
693,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
694,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
695,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.041826923076923074,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
696,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.1
697,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.2
698,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.30000000000000004
699,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.4
700,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.5
701,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.6
702,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.7000000000000001
703,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.8
704,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.04326923076923077,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.9
705,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.1
706,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.2
707,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.30000000000000004
708,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.4
709,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.5
710,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.6
711,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.7000000000000001
712,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.8
713,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.5120192307692307,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.9
714,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.1
715,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.2
716,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.30000000000000004
717,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.4
718,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.5
719,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.6
720,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.7000000000000001
721,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.8
722,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0009615384615384616,j_qwen32b_qa_16_0.5,ok,j_qwen32b_qa_16_0.5_0.9
723,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.1
724,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.2
725,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.30000000000000004
726,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.4
727,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.5
728,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.6
729,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.7000000000000001
730,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.8
731,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.5076923076923077,j_qwen32b_gen_16_0.49,ok,j_qwen32b_gen_16_0.49_0.9
732,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.1
733,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.2
734,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.30000000000000004
735,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.4
736,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.5
737,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.6
738,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.7000000000000001
739,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.8
740,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.03942307692307692,j_qwen32b_qa_16_0.49,ok,j_qwen32b_qa_16_0.49_0.9
741,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.4355769230769231,exact_correctness,ok,exact_correctness_0.5
742,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.7942307692307692,bma_judge_w8,ok,bma_judge_w8_0.1
743,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.6235576923076923,bma_judge_w8,ok,bma_judge_w8_0.2
744,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.46634615384615385,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
745,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.3283653846153846,bma_judge_w8,ok,bma_judge_w8_0.4
746,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.052884615384615384,bma_judge_w8,ok,bma_judge_w8_0.5
747,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.04230769230769231,bma_judge_w8,ok,bma_judge_w8_0.6
748,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.03365384615384615,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
749,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.013942307692307693,bma_judge_w8,ok,bma_judge_w8_0.8
750,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0004807692307692308,bma_judge_w8,ok,bma_judge_w8_0.9
751,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.1,0.7942307692307692,bma_judge,ok,bma_judge_0.1
752,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.2,0.6235576923076923,bma_judge,ok,bma_judge_0.2
753,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.30000000000000004,0.46634615384615385,bma_judge,ok,bma_judge_0.30000000000000004
754,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.4,0.3283653846153846,bma_judge,ok,bma_judge_0.4
755,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.5,0.052884615384615384,bma_judge,ok,bma_judge_0.5
756,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.6,0.04230769230769231,bma_judge,ok,bma_judge_0.6
757,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.7000000000000001,0.03365384615384615,bma_judge,ok,bma_judge_0.7000000000000001
758,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.8,0.013942307692307693,bma_judge,ok,bma_judge_0.8
759,COLLIE,rerun/COLLIE_qwen2_32b_i_0.6,0,0.9,0.0004807692307692308,bma_judge,ok,bma_judge_0.9
760,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.09278846153846154,bleu,ok,bleu_0.1
761,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.06442307692307692,bleu,ok,bleu_0.2
762,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.04375,bleu,ok,bleu_0.30000000000000004
763,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.01875,bleu,ok,bleu_0.4
764,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.004326923076923077,bleu,ok,bleu_0.5
765,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.0009615384615384616,bleu,ok,bleu_0.6
766,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.0004807692307692308,bleu,ok,bleu_0.7000000000000001
767,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0,bleu,ok,bleu_0.8
768,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0,bleu,ok,bleu_0.9
769,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.09423076923076923,bleu_adapt,ok,bleu_adapt_0.1
770,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.06538461538461539,bleu_adapt,ok,bleu_adapt_0.2
771,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.04471153846153846,bleu_adapt,ok,bleu_adapt_0.30000000000000004
772,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.01971153846153846,bleu_adapt,ok,bleu_adapt_0.4
773,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.005288461538461539,bleu_adapt,ok,bleu_adapt_0.5
774,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.0019230769230769232,bleu_adapt,ok,bleu_adapt_0.6
775,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.0014423076923076924,bleu_adapt,ok,bleu_adapt_0.7000000000000001
776,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0,bleu_adapt,ok,bleu_adapt_0.8
777,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0,bleu_adapt,ok,bleu_adapt_0.9
778,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.6716346153846153,rouge1,ok,rouge1_0.1
779,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.40384615384615385,rouge1,ok,rouge1_0.2
780,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.2230769230769231,rouge1,ok,rouge1_0.30000000000000004
781,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.11826923076923077,rouge1,ok,rouge1_0.4
782,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.0625,rouge1,ok,rouge1_0.5
783,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.02451923076923077,rouge1,ok,rouge1_0.6
784,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.005288461538461539,rouge1,ok,rouge1_0.7000000000000001
785,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0019230769230769232,rouge1,ok,rouge1_0.8
786,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0009615384615384616,rouge1,ok,rouge1_0.9
787,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.14326923076923076,rouge2,ok,rouge2_0.1
788,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.10192307692307692,rouge2,ok,rouge2_0.2
789,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.07211538461538461,rouge2,ok,rouge2_0.30000000000000004
790,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.03942307692307692,rouge2,ok,rouge2_0.4
791,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.012980769230769231,rouge2,ok,rouge2_0.5
792,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.0038461538461538464,rouge2,ok,rouge2_0.6
793,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.0019230769230769232,rouge2,ok,rouge2_0.7000000000000001
794,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0014423076923076924,rouge2,ok,rouge2_0.8
795,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0009615384615384616,rouge2,ok,rouge2_0.9
796,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.5826923076923077,rougeL,ok,rougeL_0.1
797,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.2875,rougeL,ok,rougeL_0.2
798,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.15048076923076922,rougeL,ok,rougeL_0.30000000000000004
799,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.06923076923076923,rougeL,ok,rougeL_0.4
800,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.02403846153846154,rougeL,ok,rougeL_0.5
801,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.007211538461538462,rougeL,ok,rougeL_0.6
802,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.0028846153846153848,rougeL,ok,rougeL_0.7000000000000001
803,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0014423076923076924,rougeL,ok,rougeL_0.8
804,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0009615384615384616,rougeL,ok,rougeL_0.9
805,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.5725961538461538,rougeLsum,ok,rougeLsum_0.1
806,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.2899038461538462,rougeLsum,ok,rougeLsum_0.2
807,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.16105769230769232,rougeLsum,ok,rougeLsum_0.30000000000000004
808,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.08798076923076924,rougeLsum,ok,rougeLsum_0.4
809,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.04519230769230769,rougeLsum,ok,rougeLsum_0.5
810,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.013461538461538462,rougeLsum,ok,rougeLsum_0.6
811,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.0033653846153846156,rougeLsum,ok,rougeLsum_0.7000000000000001
812,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.0014423076923076924,rougeLsum,ok,rougeLsum_0.8
813,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.0009615384615384616,rougeLsum,ok,rougeLsum_0.9
814,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.1
815,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.2
816,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.30000000000000004
817,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.4
818,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.5
819,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.6
820,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.7000000000000001
821,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.8
822,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.02451923076923077,j_llama70b_qa_16_0.5,ok,j_llama70b_qa_16_0.5_0.9
823,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.1
824,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.2
825,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.30000000000000004
826,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.4
827,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.5
828,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.6
829,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.7000000000000001
830,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.8
831,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.28076923076923077,j_llama70b_gen_16_0.49,ok,j_llama70b_gen_16_0.49_0.9
832,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.1
833,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.2
834,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.30000000000000004
835,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.4
836,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.5
837,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.6
838,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.7000000000000001
839,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.8
840,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.02548076923076923,j_llama70b_qa_16_0.5_1,ok,j_llama70b_qa_16_0.5_1_0.9
841,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.1
842,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.2
843,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.30000000000000004
844,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.4
845,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.5
846,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.6
847,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.7000000000000001
848,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.8
849,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.2793269230769231,j_llama70b_gen_16_0.49_1,ok,j_llama70b_gen_16_0.49_1_0.9
850,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.2865384615384615,exact_correctness,ok,exact_correctness_0.5
851,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.28942307692307695,bma_judge_w8,ok,bma_judge_w8_0.1
852,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.28942307692307695,bma_judge_w8,ok,bma_judge_w8_0.2
853,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.2725961538461538,bma_judge_w8,ok,bma_judge_w8_0.30000000000000004
854,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.2725961538461538,bma_judge_w8,ok,bma_judge_w8_0.4
855,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.026442307692307692,bma_judge_w8,ok,bma_judge_w8_0.5
856,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.026442307692307692,bma_judge_w8,ok,bma_judge_w8_0.6
857,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.026442307692307692,bma_judge_w8,ok,bma_judge_w8_0.7000000000000001
858,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.021634615384615384,bma_judge_w8,ok,bma_judge_w8_0.8
859,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.021634615384615384,bma_judge_w8,ok,bma_judge_w8_0.9
860,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.1,0.28942307692307695,bma_judge,ok,bma_judge_0.1
861,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.2,0.28942307692307695,bma_judge,ok,bma_judge_0.2
862,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.30000000000000004,0.2725961538461538,bma_judge,ok,bma_judge_0.30000000000000004
863,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.4,0.2725961538461538,bma_judge,ok,bma_judge_0.4
864,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.5,0.026442307692307692,bma_judge,ok,bma_judge_0.5
865,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.6,0.026442307692307692,bma_judge,ok,bma_judge_0.6
866,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.7000000000000001,0.026442307692307692,bma_judge,ok,bma_judge_0.7000000000000001
867,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.8,0.021634615384615384,bma_judge,ok,bma_judge_0.8
868,COLLIE,rerun/COLLIE_qwen2_7b_i_0.6,0,0.9,0.021634615384615384,bma_judge,ok,bma_judge_0.9
