,perturb_larger,dataset,model,uncertainty
0,0.564453125,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,predent
1,0.5693359375,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,len_norm_predent
2,0.4609375,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,seqlen_sample
3,0.7666015625,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,seqlen_correct
4,0.5947265625,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,TOKEN_SAR
5,0.6044921875,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,SENT_SAR
6,0.6220703125,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,SAR
7,0.6494140625,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,log_pplx
8,0.607421875,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,sement
9,0.0,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,min_logprob
10,0.6220703125,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,len_norm_sement
11,0.6025390625,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,ptrue_neg_log_prob
12,0.5009765625,SQUADPERT,rerun/SQUADPERT_llama3_70b_i_0.6,GNLL
13,0.7041015625,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,predent
14,0.6650390625,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,len_norm_predent
15,0.6181640625,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,seqlen_sample
16,0.7021484375,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,seqlen_correct
17,0.5966796875,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,TOKEN_SAR
18,0.6572265625,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,SENT_SAR
19,0.6025390625,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,SAR
20,0.708984375,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,log_pplx
21,0.6474609375,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,sement
22,0.0,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,min_logprob
23,0.6953125,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,len_norm_sement
24,0.423828125,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,ptrue_neg_log_prob
25,0.693359375,COQAPERT,rerun/COQAPERT_qwen2_32b_i_0.6,GNLL
26,0.556640625,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,predent
27,0.5458984375,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,len_norm_predent
28,0.4609375,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,seqlen_sample
29,0.9697265625,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,seqlen_correct
30,0.59765625,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,TOKEN_SAR
31,0.615234375,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,SENT_SAR
32,0.5751953125,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,SAR
33,0.701171875,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,log_pplx
34,0.57421875,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,sement
35,0.0,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,min_logprob
36,0.587890625,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,len_norm_sement
37,0.65625,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,ptrue_neg_log_prob
38,0.580078125,COQAPERT,rerun/COQAPERT_llama3_8b_i_0.6,GNLL
39,0.630859375,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,predent
40,0.61328125,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,len_norm_predent
41,0.619140625,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,seqlen_sample
42,0.685546875,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,seqlen_correct
43,0.6005859375,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,TOKEN_SAR
44,0.5859375,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,SENT_SAR
45,0.578125,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,SAR
46,0.6708984375,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,log_pplx
47,0.634765625,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,sement
48,0.0009765625,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,min_logprob
49,0.6552734375,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,len_norm_sement
50,0.330078125,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,ptrue_neg_log_prob
51,0.6494140625,SQUADPERT,rerun/SQUADPERT_qwen2_7b_i_0.6,GNLL
52,0.6259765625,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,predent
53,0.595703125,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,len_norm_predent
54,0.5986328125,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,seqlen_sample
55,0.6220703125,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,seqlen_correct
56,0.5712890625,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,TOKEN_SAR
57,0.5517578125,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,SENT_SAR
58,0.5615234375,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,SAR
59,0.658203125,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,log_pplx
60,0.630859375,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,sement
61,0.0,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,min_logprob
62,0.6630859375,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,len_norm_sement
63,0.4638671875,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,ptrue_neg_log_prob
64,0.630859375,SQUADPERT,rerun/SQUADPERT_qwen2_32b_i_0.6,GNLL
65,0.646484375,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,predent
66,0.595703125,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,len_norm_predent
67,0.5810546875,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,seqlen_sample
68,0.6630859375,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,seqlen_correct
69,0.595703125,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,TOKEN_SAR
70,0.625,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,SENT_SAR
71,0.5927734375,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,SAR
72,0.712890625,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,log_pplx
73,0.609375,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,sement
74,0.0,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,min_logprob
75,0.6640625,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,len_norm_sement
76,0.546875,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,ptrue_neg_log_prob
77,0.6845703125,COQAPERT,rerun/COQAPERT_qwen2_7b_i_0.6,GNLL
78,0.62890625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,predent
79,0.6181640625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,len_norm_predent
80,0.650390625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,seqlen_sample
81,0.7822265625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,seqlen_correct
82,0.58203125,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,TOKEN_SAR
83,0.6171875,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,SENT_SAR
84,0.6044921875,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,SAR
85,0.65625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,log_pplx
86,0.669921875,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,sement
87,0.0,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,min_logprob
88,0.6845703125,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,len_norm_sement
89,0.5615234375,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,ptrue_neg_log_prob
90,0.6337890625,COQAPERT,rerun/COQAPERT_llama3_70b_i_0.6,GNLL
91,0.603515625,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,predent
92,0.5712890625,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,len_norm_predent
93,0.5654296875,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,seqlen_sample
94,0.8115234375,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,seqlen_correct
95,0.57421875,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,TOKEN_SAR
96,0.6201171875,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,SENT_SAR
97,0.5625,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,SAR
98,0.70703125,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,log_pplx
99,0.607421875,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,sement
100,0.0,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,min_logprob
101,0.6396484375,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,len_norm_sement
102,0.84375,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,ptrue_neg_log_prob
103,0.6455078125,SQUADPERT,rerun/SQUADPERT_llama3_8b_i_0.6,GNLL
