ALL = 'all'
LM_EVAL = "lm_eval"
WIKITEXT2 = "wikitext2"
WINOGRANDE = "winogrande"
HELLASWAG = "hellaswag"
MMLU_COT = "mmlu_cot"
PIQA = "piqa"
OPENBOOKQA = "openbookqa"
ARC_EASY = "arc_easy"
ARC_CHALLENGE = "arc_challenge"
GSM8K = "gsm8k"
TRUTHFULQA_MC2 = "truthfulqa_mc2"
BOOLQ = "boolq"
COMMONSENSE_QA = "commonsense_qa"
COPA = "copa"
SOCIAL_IQA = "social_iqa"
SCIQ = "sciq"
LAMBADA = "lambada"
LOGIQA = "logiqa"
GPQA = "gpqa"
ACC_NORM_NONE = "acc_norm,none"
ACC_NONE = "acc,none"
PPL_NONE = "perplexity,none"
EXACT_MATCH_FLEXIBLE = "exact_match,flexible-extract"
EXACT_MATCH_STRICT = "exact_match,strict-match"


main_benchmark = {
    WINOGRANDE: [ACC_NONE],
    HELLASWAG: [ACC_NORM_NONE],
    ARC_EASY: [ACC_NORM_NONE],
    ARC_CHALLENGE: [ACC_NORM_NONE, EXACT_MATCH_STRICT],
    PIQA: [ACC_NORM_NONE],
    OPENBOOKQA: [ACC_NORM_NONE],
    TRUTHFULQA_MC2: [ACC_NORM_NONE],
    BOOLQ: [ACC_NONE],
    COMMONSENSE_QA: [ACC_NONE],
    COPA: [ACC_NONE],
    SOCIAL_IQA: [ACC_NORM_NONE],
    SCIQ: [ACC_NORM_NONE],
    LAMBADA: [PPL_NONE],
    LOGIQA: [ACC_NORM_NONE],
    GPQA: [ACC_NORM_NONE],
}

ALL_LM_EVAL_TASKS = [
    WIKITEXT2,
    WINOGRANDE,
    HELLASWAG,
    MMLU_COT,
    PIQA,
    OPENBOOKQA,
    ARC_EASY,
    ARC_CHALLENGE,
    GSM8K,
    TRUTHFULQA_MC2,
    BOOLQ,
    COMMONSENSE_QA,
    COPA,
    SCIQ,
    LAMBADA,
]
