anli_GPT_3_style_r1_score_eval
anli_GPT_3_style_r2_score_eval
anli_GPT_3_style_r3_score_eval
anli_MNLI_crowdsource_r1_score_eval
anli_MNLI_crowdsource_r2_score_eval
anli_MNLI_crowdsource_r3_score_eval
anli_always_sometimes_never_r1_score_eval
anli_always_sometimes_never_r2_score_eval
anli_always_sometimes_never_r3_score_eval
anli_based_on_the_previous_passage_r1_score_eval
anli_based_on_the_previous_passage_r2_score_eval
anli_based_on_the_previous_passage_r3_score_eval
anli_can_we_infer_r1_score_eval
anli_can_we_infer_r2_score_eval
anli_can_we_infer_r3_score_eval
anli_claim_true_false_inconclusive_r1_score_eval
anli_claim_true_false_inconclusive_r2_score_eval
anli_claim_true_false_inconclusive_r3_score_eval
anli_consider_always_sometimes_never_r1_score_eval
anli_consider_always_sometimes_never_r2_score_eval
anli_consider_always_sometimes_never_r3_score_eval
anli_does_it_follow_that_r1_score_eval
anli_does_it_follow_that_r2_score_eval
anli_does_it_follow_that_r3_score_eval
anli_does_this_imply_r1_score_eval
anli_does_this_imply_r2_score_eval
anli_does_this_imply_r3_score_eval
anli_guaranteed_possible_impossible_r1_score_eval
anli_guaranteed_possible_impossible_r2_score_eval
anli_guaranteed_possible_impossible_r3_score_eval
anli_guaranteed_true_r1_score_eval
anli_guaranteed_true_r2_score_eval
anli_guaranteed_true_r3_score_eval
anli_justified_in_saying_r1_score_eval
anli_justified_in_saying_r2_score_eval
anli_justified_in_saying_r3_score_eval
anli_must_be_true_r1_score_eval
anli_must_be_true_r2_score_eval
anli_must_be_true_r3_score_eval
anli_should_assume_r1_score_eval
anli_should_assume_r2_score_eval
anli_should_assume_r3_score_eval
anli_take_the_following_as_truth_r1_score_eval
anli_take_the_following_as_truth_r2_score_eval
anli_take_the_following_as_truth_r3_score_eval
hellaswag_Predict_ending_with_hint_score_eval
hellaswag_Randomized_prompts_template_score_eval
hellaswag_complete_first_then_score_eval
hellaswag_if_begins_how_continues_score_eval
super_glue_cb_GPT_3_style_score_eval
super_glue_cb_MNLI_crowdsource_score_eval
super_glue_cb_always_sometimes_never_score_eval
super_glue_cb_based_on_the_previous_passage_score_eval
super_glue_cb_can_we_infer_score_eval
super_glue_cb_claim_true_false_inconclusive_score_eval
super_glue_cb_consider_always_sometimes_never_score_eval
super_glue_cb_does_it_follow_that_score_eval
super_glue_cb_does_this_imply_score_eval
super_glue_cb_guaranteed_possible_impossible_score_eval
super_glue_cb_guaranteed_true_score_eval
super_glue_cb_justified_in_saying_score_eval
super_glue_cb_must_be_true_score_eval
super_glue_cb_should_assume_score_eval
super_glue_cb_take_the_following_as_truth_score_eval
super_glue_copa_C1_or_C2_premise_so_because__score_eval
super_glue_copa__As_a_result_C1_or_C2__score_eval
super_glue_copa__What_could_happen_next_C1_or_C2__score_eval
super_glue_copa__which_may_be_caused_by_score_eval
super_glue_copa__why_C1_or_C2_score_eval
super_glue_copa_best_option_score_eval
super_glue_copa_cause_effect_score_eval
super_glue_copa_choose_score_eval
super_glue_copa_exercise_score_eval
super_glue_copa_i_am_hesitating_score_eval
super_glue_copa_more_likely_score_eval
super_glue_copa_plausible_alternatives_score_eval
super_glue_rte_GPT_3_style_score_eval
super_glue_rte_MNLI_crowdsource_score_eval
super_glue_rte_based_on_the_previous_passage_score_eval
super_glue_rte_can_we_infer_score_eval
super_glue_rte_does_it_follow_that_score_eval
super_glue_rte_does_this_imply_score_eval
super_glue_rte_guaranteed_true_score_eval
super_glue_rte_justified_in_saying_score_eval
super_glue_rte_must_be_true_score_eval
super_glue_rte_should_assume_score_eval
super_glue_wic_GPT_3_prompt_score_eval
super_glue_wic_GPT_3_prompt_with_label_score_eval
super_glue_wic_affirmation_true_or_false_score_eval
super_glue_wic_grammar_homework_score_eval
super_glue_wic_polysemous_score_eval
super_glue_wic_question_context_meaning_score_eval
super_glue_wic_question_context_meaning_with_label_score_eval
super_glue_wic_question_context_score_eval
super_glue_wic_same_sense_score_eval
super_glue_wic_similar_sense_score_eval
super_glue_wsc.fixed_GPT_3_Style_score_eval
super_glue_wsc.fixed_I_think_they_mean_score_eval
super_glue_wsc.fixed_Who_or_what_is_are_score_eval
super_glue_wsc.fixed_by_p_they_mean_score_eval
super_glue_wsc.fixed_does_p_stand_for_score_eval
super_glue_wsc.fixed_does_the_pronoun_refer_to_score_eval
super_glue_wsc.fixed_in_other_words_score_eval
super_glue_wsc.fixed_p_is_are_r_score_eval
super_glue_wsc.fixed_replaced_with_score_eval
super_glue_wsc.fixed_the_pronoun_refers_to_score_eval
winogrande_winogrande_xl_Replace_score_eval
winogrande_winogrande_xl_does_underscore_refer_to_score_eval
winogrande_winogrande_xl_fill_in_the_blank_score_eval
winogrande_winogrande_xl_stand_for_score_eval
winogrande_winogrande_xl_underscore_refer_to_score_eval
