# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import lighteval.tasks.default_prompts as prompt
from lighteval.metrics.metrics import Metrics
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.templates.qa import get_qa_prompt_function
from lighteval.utils.language import Language


abstract_narrative_understanding_bigbench = LightevalTaskConfig(
    name="abstract_narrative_understanding",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="abstract_narrative_understanding",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
agieval_aqua_rat_lighteval = LightevalTaskConfig(
    name="agieval:aqua-rat",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-aqua-rat",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_biology_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-biology",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-biology",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_chemistry_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-chemistry",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-chemistry",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_chinese_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-chinese",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-chinese",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_english_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-english",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-english",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_geography_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-geography",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-geography",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_history_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-history",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-history",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_mathqa_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-mathqa",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-mathqa",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_gaokao_physics_lighteval = LightevalTaskConfig(
    name="agieval:gaokao-physics",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-gaokao-physics",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_logiqa_en_lighteval = LightevalTaskConfig(
    name="agieval:logiqa-en",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-logiqa-en",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_logiqa_zh_lighteval = LightevalTaskConfig(
    name="agieval:logiqa-zh",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-logiqa-zh",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_lsat_ar_lighteval = LightevalTaskConfig(
    name="agieval:lsat-ar",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-lsat-ar",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_lsat_lr_lighteval = LightevalTaskConfig(
    name="agieval:lsat-lr",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-lsat-lr",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_lsat_rc_lighteval = LightevalTaskConfig(
    name="agieval:lsat-rc",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-lsat-rc",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_sat_en_lighteval = LightevalTaskConfig(
    name="agieval:sat-en",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-sat-en",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_sat_en_without_passage_lighteval = LightevalTaskConfig(
    name="agieval:sat-en-without-passage",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-sat-en-without-passage",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
agieval_sat_math_lighteval = LightevalTaskConfig(
    name="agieval:sat-math",
    suite=["lighteval"],
    prompt_function=prompt.agieval,
    hf_repo="dmayhem93/agieval-sat-math",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
aime24 = LightevalTaskConfig(
    name="aime24",
    suite=["lighteval"],
    prompt_function=prompt.aime_prompt_fn,
    hf_repo="HuggingFaceH4/aime_2024",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=32768,
    metric=[
        Metrics.expr_gold_metric,
        # Metrics.math_pass_at_1_1n,
        # Metrics.math_pass_at_1_4n,
        # Metrics.math_pass_at_1_8n,
        # Metrics.math_pass_at_1_16n,
        # Metrics.math_pass_at_1_32n,
        # Metrics.math_pass_at_1_64n,
    ],
    version=2,
)
aime24_gpassk = LightevalTaskConfig(
    name="aime24_gpassk",
    suite=["lighteval"],
    prompt_function=prompt.aime_prompt_fn,
    hf_repo="HuggingFaceH4/aime_2024",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8192,
    metric=[Metrics.g_pass_at_16_expr_gold],
    version=1,
)
aime25 = LightevalTaskConfig(
    name="aime25",
    suite=["lighteval"],
    prompt_function=prompt.aime_prompt_fn,
    hf_repo="yentinglin/aime_2025",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10000,
    metric=[
        Metrics.expr_gold_metric,
        # Metrics.math_pass_at_1_1n,
        # Metrics.math_pass_at_1_4n,
        # Metrics.math_pass_at_1_8n,
        # Metrics.math_pass_at_1_16n,
        # Metrics.math_pass_at_1_32n,
        # Metrics.math_pass_at_1_64n,
    ],
    version=2,
)
aime25_gpassk = LightevalTaskConfig(
    name="aime25_gpassk",
    suite=["lighteval"],
    prompt_function=prompt.aime_prompt_fn,
    hf_repo="yentinglin/aime_2025",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8192,
    metric=[Metrics.g_pass_at_16_expr_gold],
    version=1,
)
anachronisms_bigbench = LightevalTaskConfig(
    name="anachronisms",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="anachronisms",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
analogical_similarity_bigbench = LightevalTaskConfig(
    name="analogical_similarity",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="analogical_similarity",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
analytic_entailment_bigbench = LightevalTaskConfig(
    name="analytic_entailment",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="analytic_entailment",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
anli_r1_lighteval = LightevalTaskConfig(
    name="anli:r1",
    suite=["lighteval", "anli"],
    prompt_function=prompt.anli,
    hf_repo="anli",
    hf_subset="plain_text",
    hf_avail_splits=["train_r1", "dev_r1", "test_r1"],
    evaluation_splits=["test_r1"],
    few_shots_split="train_r1",
    few_shots_select="random_sampling_from_train",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
anli_r2_lighteval = LightevalTaskConfig(
    name="anli:r2",
    suite=["lighteval", "anli"],
    prompt_function=prompt.anli,
    hf_repo="anli",
    hf_subset="plain_text",
    hf_avail_splits=["train_r2", "dev_r2", "test_r2"],
    evaluation_splits=["test_r2"],
    few_shots_split="train_r2",
    few_shots_select="random_sampling_from_train",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
anli_r3_lighteval = LightevalTaskConfig(
    name="anli:r3",
    suite=["lighteval", "anli"],
    prompt_function=prompt.anli,
    hf_repo="anli",
    hf_subset="plain_text",
    hf_avail_splits=["train_r3", "dev_r3", "test_r3"],
    evaluation_splits=["test_r3"],
    few_shots_split="train_r3",
    few_shots_select="random_sampling_from_train",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arc_agi_2 = LightevalTaskConfig(
    name="arc_agi_2",
    suite=["lighteval"],
    prompt_function=prompt.arc_agi_2,
    hf_repo="arc-agi-community/arc-agi-2",
    hf_subset="default",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.exact_match],
    stop_sequence=None,
    trust_dataset=False,
    version=0,
)
arc_c_letters_original = LightevalTaskConfig(
    name="arc:c:letters",
    suite=["original", "arc"],
    prompt_function=prompt.arc_with_options_letters_predict,
    hf_repo="ai2_arc",
    hf_subset="ARC-Challenge",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arc_c_options_original = LightevalTaskConfig(
    name="arc:c:options",
    suite=["original", "arc"],
    prompt_function=prompt.arc_with_options,
    hf_repo="ai2_arc",
    hf_subset="ARC-Challenge",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arc_c_simple_original = LightevalTaskConfig(
    name="arc:c:simple",
    suite=["original", "arc"],
    prompt_function=prompt.arc,
    hf_repo="ai2_arc",
    hf_subset="ARC-Challenge",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arc_challenge_leaderboard = LightevalTaskConfig(
    name="arc:challenge",
    suite=["leaderboard", "arc"],
    prompt_function=prompt.arc,
    hf_repo="ai2_arc",
    hf_subset="ARC-Challenge",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling_from_train",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arc_easy_lighteval = LightevalTaskConfig(
    name="arc:easy",
    suite=["lighteval", "arc"],
    prompt_function=prompt.arc,
    hf_repo="ai2_arc",
    hf_subset="ARC-Easy",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling_from_train",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_1dc_lighteval = LightevalTaskConfig(
    name="arithmetic:1dc",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_1dc",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_2da_lighteval = LightevalTaskConfig(
    name="arithmetic:2da",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_2da",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_2dm_lighteval = LightevalTaskConfig(
    name="arithmetic:2dm",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_2dm",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_2ds_lighteval = LightevalTaskConfig(
    name="arithmetic:2ds",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_2ds",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_3da_lighteval = LightevalTaskConfig(
    name="arithmetic:3da",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_3da",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_3ds_lighteval = LightevalTaskConfig(
    name="arithmetic:3ds",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_3ds",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_4da_lighteval = LightevalTaskConfig(
    name="arithmetic:4da",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_4da",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_4ds_lighteval = LightevalTaskConfig(
    name="arithmetic:4ds",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_4ds",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_5da_lighteval = LightevalTaskConfig(
    name="arithmetic:5da",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_5da",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_5ds_lighteval = LightevalTaskConfig(
    name="arithmetic:5ds",
    suite=["lighteval", "arithmetic"],
    prompt_function=prompt.arithmetic,
    hf_repo="EleutherAI/arithmetic",
    hf_subset="arithmetic_5ds",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
arithmetic_bb_bigbench = LightevalTaskConfig(
    name="arithmetic_bb",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="arithmetic",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ascii_word_recognition_bigbench = LightevalTaskConfig(
    name="ascii_word_recognition",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="ascii_word_recognition",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
asdiv_lighteval = LightevalTaskConfig(
    name="asdiv",
    suite=["lighteval"],
    prompt_function=prompt.asdiv,
    hf_repo="EleutherAI/asdiv",
    hf_subset="asdiv",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
authorship_verification_bigbench = LightevalTaskConfig(
    name="authorship_verification",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="authorship_verification",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
auto_categorization_bigbench = LightevalTaskConfig(
    name="auto_categorization",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="auto_categorization",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
auto_debugging_bigbench_lite = LightevalTaskConfig(
    name="auto_debugging",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_and_after_query,
    hf_repo="bigbench",
    hf_subset="auto_debugging",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
babi_qa_helm = LightevalTaskConfig(
    name="babi_qa",
    suite=["helm"],
    prompt_function=prompt.babi_qa,
    hf_repo="facebook/babi_qa",
    hf_subset="en-valid-qa1",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_causal_judgment_lighteval = LightevalTaskConfig(
    name="bigbench:causal_judgment",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="causal_judgement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_date_understanding_lighteval = LightevalTaskConfig(
    name="bigbench:date_understanding",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="date_understanding",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_disambiguation_qa_lighteval = LightevalTaskConfig(
    name="bigbench:disambiguation_qa",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="disambiguation_qa",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_geometric_shapes_lighteval = LightevalTaskConfig(
    name="bigbench:geometric_shapes",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="geometric_shapes",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_five_objects_lighteval = LightevalTaskConfig(
    name="bigbench:logical_deduction_five_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_five_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_seven_objects_lighteval = LightevalTaskConfig(
    name="bigbench:logical_deduction_seven_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_seven_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_three_objects_lighteval = LightevalTaskConfig(
    name="bigbench:logical_deduction_three_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_three_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_movie_recommendation_lighteval = LightevalTaskConfig(
    name="bigbench:movie_recommendation",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="movie_recommendation",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_navigate_lighteval = LightevalTaskConfig(
    name="bigbench:navigate",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="navigate",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_reasoning_about_colored_objects_lighteval = LightevalTaskConfig(
    name="bigbench:reasoning_about_colored_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="reasoning_about_colored_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_ruin_names_lighteval = LightevalTaskConfig(
    name="bigbench:ruin_names",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="ruin_names",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_salient_translation_error_detection_lighteval = LightevalTaskConfig(
    name="bigbench:salient_translation_error_detection",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="salient_translation_error_detection",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_snarks_lighteval = LightevalTaskConfig(
    name="bigbench:snarks",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="snarks",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_sports_understanding_lighteval = LightevalTaskConfig(
    name="bigbench:sports_understanding",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="sports_understanding",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_temporal_sequences_lighteval = LightevalTaskConfig(
    name="bigbench:temporal_sequences",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="temporal_sequences",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_five_objects_lighteval = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_five_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_five_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_seven_objects_lighteval = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_seven_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_seven_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_three_objects_lighteval = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_three_objects",
    suite=["lighteval"],
    prompt_function=prompt.bbh_lighteval,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_three_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bigbench_causal_judgment_harness = LightevalTaskConfig(
    name="bigbench:causal_judgment",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="causal_judgement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_date_understanding_harness = LightevalTaskConfig(
    name="bigbench:date_understanding",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="date_understanding",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_disambiguation_qa_harness = LightevalTaskConfig(
    name="bigbench:disambiguation_qa",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="disambiguation_qa",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_geometric_shapes_harness = LightevalTaskConfig(
    name="bigbench:geometric_shapes",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="geometric_shapes",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_five_objects_harness = LightevalTaskConfig(
    name="bigbench:logical_deduction_five_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_five_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_seven_objects_harness = LightevalTaskConfig(
    name="bigbench:logical_deduction_seven_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_seven_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_three_objects_harness = LightevalTaskConfig(
    name="bigbench:logical_deduction_three_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="logical_deduction_three_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_movie_recommendation_harness = LightevalTaskConfig(
    name="bigbench:movie_recommendation",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="movie_recommendation",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_navigate_harness = LightevalTaskConfig(
    name="bigbench:navigate",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="navigate",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_reasoning_about_colored_objects_harness = LightevalTaskConfig(
    name="bigbench:reasoning_about_colored_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="reasoning_about_colored_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_ruin_names_harness = LightevalTaskConfig(
    name="bigbench:ruin_names",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="ruin_names",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_salient_translation_error_detection_harness = LightevalTaskConfig(
    name="bigbench:salient_translation_error_detection",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="salient_translation_error_detection",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_snarks_harness = LightevalTaskConfig(
    name="bigbench:snarks",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="snarks",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_sports_understanding_harness = LightevalTaskConfig(
    name="bigbench:sports_understanding",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="sports_understanding",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_temporal_sequences_harness = LightevalTaskConfig(
    name="bigbench:temporal_sequences",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="temporal_sequences",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_five_objects_harness = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_five_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_five_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_seven_objects_harness = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_seven_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_seven_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bigbench_tracking_shuffled_objects_three_objects_harness = LightevalTaskConfig(
    name="bigbench:tracking_shuffled_objects_three_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_harness,
    hf_repo="lighteval/bbh",
    hf_subset="tracking_shuffled_objects_three_objects",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["</s>", "Q=", "\n\n"],
    must_remove_duplicate_docs=True,
    trust_dataset=True,
    version=0,
)
bbh_boolean_expressions_harness = LightevalTaskConfig(
    name="bbh:boolean_expressions",
    suite=["harness"],
    prompt_function=prompt.bbh_boolean_expressions,
    hf_repo="lukaemon/bbh",
    hf_subset="boolean_expressions",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_causal_judgment_harness = LightevalTaskConfig(
    name="bbh:causal_judgment",
    suite=["harness"],
    prompt_function=prompt.bbh_causal_judgment,
    hf_repo="lukaemon/bbh",
    hf_subset="causal_judgement",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_date_understanding_harness = LightevalTaskConfig(
    name="bbh:date_understanding",
    suite=["harness"],
    prompt_function=prompt.bbh_date_understanding,
    hf_repo="lukaemon/bbh",
    hf_subset="date_understanding",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_disambiguation_qa_harness = LightevalTaskConfig(
    name="bbh:disambiguation_qa",
    suite=["harness"],
    prompt_function=prompt.bbh_disambiguation_qa,
    hf_repo="lukaemon/bbh",
    hf_subset="disambiguation_qa",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_dyck_languages_harness = LightevalTaskConfig(
    name="bbh:dyck_languages",
    suite=["harness"],
    prompt_function=prompt.bbh_dyck_languages,
    hf_repo="lukaemon/bbh",
    hf_subset="dyck_languages",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_formal_fallacies_harness = LightevalTaskConfig(
    name="bbh:formal_fallacies",
    suite=["harness"],
    prompt_function=prompt.bbh_formal_fallacies,
    hf_repo="lukaemon/bbh",
    hf_subset="formal_fallacies",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_geometric_shapes_harness = LightevalTaskConfig(
    name="bbh:geometric_shapes",
    suite=["harness"],
    prompt_function=prompt.bbh_geometric_shapes,
    hf_repo="lukaemon/bbh",
    hf_subset="geometric_shapes",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_hyperbaton_harness = LightevalTaskConfig(
    name="bbh:hyperbaton",
    suite=["harness"],
    prompt_function=prompt.bbh_hyperbaton,
    hf_repo="lukaemon/bbh",
    hf_subset="hyperbaton",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_logical_deduction_five_objects_harness = LightevalTaskConfig(
    name="bbh:logical_deduction_five_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_logical_deduction_five_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="logical_deduction_five_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_logical_deduction_seven_objects_harness = LightevalTaskConfig(
    name="bbh:logical_deduction_seven_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_logical_deduction_seven_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="logical_deduction_seven_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_logical_deduction_three_objects_harness = LightevalTaskConfig(
    name="bbh:logical_deduction_three_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_logical_deduction_three_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="logical_deduction_three_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_movie_recommendation_harness = LightevalTaskConfig(
    name="bbh:movie_recommendation",
    suite=["harness"],
    prompt_function=prompt.bbh_movie_recommendation,
    hf_repo="lukaemon/bbh",
    hf_subset="movie_recommendation",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_multistep_arithmetic_two_harness = LightevalTaskConfig(
    name="bbh:multistep_arithmetic_two",
    suite=["harness"],
    prompt_function=prompt.bbh_multistep_arithmetic_two,
    hf_repo="lukaemon/bbh",
    hf_subset="multistep_arithmetic_two",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_navigate_harness = LightevalTaskConfig(
    name="bbh:navigate",
    suite=["harness"],
    prompt_function=prompt.bbh_navigate,
    hf_repo="lukaemon/bbh",
    hf_subset="navigate",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_object_counting_harness = LightevalTaskConfig(
    name="bbh:object_counting",
    suite=["harness"],
    prompt_function=prompt.bbh_object_counting,
    hf_repo="lukaemon/bbh",
    hf_subset="object_counting",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_penguins_in_a_table_harness = LightevalTaskConfig(
    name="bbh:penguins_in_a_table",
    suite=["harness"],
    prompt_function=prompt.bbh_penguins_in_a_table,
    hf_repo="lukaemon/bbh",
    hf_subset="penguins_in_a_table",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_reasoning_about_colored_objects_harness = LightevalTaskConfig(
    name="bbh:reasoning_about_colored_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_reasoning_about_colored_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="reasoning_about_colored_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_ruin_names_harness = LightevalTaskConfig(
    name="bbh:ruin_names",
    suite=["harness"],
    prompt_function=prompt.bbh_ruin_names,
    hf_repo="lukaemon/bbh",
    hf_subset="ruin_names",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_salient_translation_error_detection_harness = LightevalTaskConfig(
    name="bbh:salient_translation_error_detection",
    suite=["harness"],
    prompt_function=prompt.bbh_salient_translation_error_detection,
    hf_repo="lukaemon/bbh",
    hf_subset="salient_translation_error_detection",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_snarks_harness = LightevalTaskConfig(
    name="bbh:snarks",
    suite=["harness"],
    prompt_function=prompt.bbh_snarks,
    hf_repo="lukaemon/bbh",
    hf_subset="snarks",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_sports_understanding_harness = LightevalTaskConfig(
    name="bbh:sports_understanding",
    suite=["harness"],
    prompt_function=prompt.bbh_sports_understanding,
    hf_repo="lukaemon/bbh",
    hf_subset="sports_understanding",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_temporal_sequences_harness = LightevalTaskConfig(
    name="bbh:temporal_sequences",
    suite=["harness"],
    prompt_function=prompt.bbh_temporal_sequences,
    hf_repo="lukaemon/bbh",
    hf_subset="temporal_sequences",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_tracking_shuffled_objects_five_objects_harness = LightevalTaskConfig(
    name="bbh:tracking_shuffled_objects_five_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_tracking_shuffled_objects_five_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="tracking_shuffled_objects_five_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_tracking_shuffled_objects_seven_objects_harness = LightevalTaskConfig(
    name="bbh:tracking_shuffled_objects_seven_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_tracking_shuffled_objects_seven_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="tracking_shuffled_objects_seven_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_tracking_shuffled_objects_three_objects_harness = LightevalTaskConfig(
    name="bbh:tracking_shuffled_objects_three_objects",
    suite=["harness"],
    prompt_function=prompt.bbh_tracking_shuffled_objects_three_objects,
    hf_repo="lukaemon/bbh",
    hf_subset="tracking_shuffled_objects_three_objects",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_web_of_lies_harness = LightevalTaskConfig(
    name="bbh:web_of_lies",
    suite=["harness"],
    prompt_function=prompt.bbh_web_of_lies,
    hf_repo="lukaemon/bbh",
    hf_subset="web_of_lies",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbh_word_sorting_harness = LightevalTaskConfig(
    name="bbh:word_sorting",
    suite=["harness"],
    prompt_function=prompt.bbh_word_sorting,
    hf_repo="lukaemon/bbh",
    hf_subset="word_sorting",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["</s>", "Q=", "\n\n"],
    trust_dataset=True,
    version=0,
)
bbq_helm = LightevalTaskConfig(
    name="bbq",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="all",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Age_helm = LightevalTaskConfig(
    name="bbq:Age",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Age",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Disability_status_helm = LightevalTaskConfig(
    name="bbq:Disability_status",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Disability_status",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Gender_identity_helm = LightevalTaskConfig(
    name="bbq:Gender_identity",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Gender_identity",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Nationality_helm = LightevalTaskConfig(
    name="bbq:Nationality",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Nationality",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Physical_appearance_helm = LightevalTaskConfig(
    name="bbq:Physical_appearance",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Physical_appearance",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Race_ethnicity_helm = LightevalTaskConfig(
    name="bbq:Race_ethnicity",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Race_ethnicity",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Race_x_SES_helm = LightevalTaskConfig(
    name="bbq:Race_x_SES",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Race_x_SES",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Race_x_gender_helm = LightevalTaskConfig(
    name="bbq:Race_x_gender",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Race_x_gender",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Religion_helm = LightevalTaskConfig(
    name="bbq:Religion",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Religion",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_SES_helm = LightevalTaskConfig(
    name="bbq:SES",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="SES",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_Sexual_orientation_helm = LightevalTaskConfig(
    name="bbq:Sexual_orientation",
    suite=["helm"],
    prompt_function=prompt.bbq,
    hf_repo="lighteval/bbq_helm",
    hf_subset="Sexual_orientation",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.perfect_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bbq_lite_json_bigbench_lite = LightevalTaskConfig(
    name="bbq_lite_json",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="bbq_lite_json",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_auto_debugging_helm = LightevalTaskConfig(
    name="bigbench:auto_debugging",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="auto_debugging",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_age_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:age_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-age_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_age_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:age_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-age_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_disability_status_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:disability_status_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-disability_status_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_disability_status_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:disability_status_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-disability_status_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_gender_identity_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:gender_identity_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-gender_identity_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_gender_identity_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:gender_identity_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-gender_identity_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_nationality_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:nationality_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-nationality_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_nationality_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:nationality_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-nationality_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_physical_appearance_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:physical_appearance_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-physical_appearance_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_physical_appearance_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:physical_appearance_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-physical_appearance_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_race_ethnicity_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:race_ethnicity_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-race_ethnicity_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_race_ethnicity_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:race_ethnicity_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-race_ethnicity_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_religion_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:religion_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-religion_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_religion_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:religion_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-religion_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_ses_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:ses_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-ses_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_ses_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:ses_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-ses_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_sexual_orientation_ambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:sexual_orientation_ambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-sexual_orientation_ambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_bbq_lite_json_sexual_orientation_disambig_helm = LightevalTaskConfig(
    name="bigbench:bbq_lite_json:sexual_orientation_disambig",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="bbq_lite_json-sexual_orientation_disambig",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_code_line_description_helm = LightevalTaskConfig(
    name="bigbench:code_line_description",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="code_line_description",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conceptual_combinations_contradictions_helm = LightevalTaskConfig(
    name="bigbench:conceptual_combinations:contradictions",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conceptual_combinations-contradictions",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conceptual_combinations_emergent_properties_helm = LightevalTaskConfig(
    name="bigbench:conceptual_combinations:emergent_properties",
    suite=["helm"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conceptual_combinations-emergent_properties",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conceptual_combinations_fanciful_fictional_combinations_helm = LightevalTaskConfig(
    name="bigbench:conceptual_combinations:fanciful_fictional_combinations",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conceptual_combinations-fanciful_fictional_combinations",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conceptual_combinations_homonyms_helm = LightevalTaskConfig(
    name="bigbench:conceptual_combinations:homonyms",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conceptual_combinations-homonyms",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conceptual_combinations_invented_words_helm = LightevalTaskConfig(
    name="bigbench:conceptual_combinations:invented_words",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conceptual_combinations-invented_words",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_adna_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:adna_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-adna_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_adna_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:adna_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-adna_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_atikampe_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:atikampe_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-atikampe_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_atikampe_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:atikampe_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-atikampe_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_gornam_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:gornam_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-gornam_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_gornam_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:gornam_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-gornam_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_holuan_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:holuan_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-holuan_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_holuan_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:holuan_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-holuan_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_mkafala_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:mkafala_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-mkafala_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_mkafala_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:mkafala_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-mkafala_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_postpositive_english_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:postpositive_english_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-postpositive_english_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_postpositive_english_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:postpositive_english_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-postpositive_english_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_unapuri_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:unapuri_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-unapuri_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_unapuri_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:unapuri_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-unapuri_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_vaomi_from_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:vaomi_from",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-vaomi_from",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_conlang_translation_vaomi_to_helm = LightevalTaskConfig(
    name="bigbench:conlang_translation:vaomi_to",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="conlang_translation-vaomi_to",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_emoji_movie_helm = LightevalTaskConfig(
    name="bigbench:emoji_movie",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="emoji_movie",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_formal_fallacies_syllogisms_negation_helm = LightevalTaskConfig(
    name="bigbench:formal_fallacies_syllogisms_negation",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="formal_fallacies_syllogisms_negation",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_hindu_knowledge_helm = LightevalTaskConfig(
    name="bigbench:hindu_knowledge",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="hindu_knowledge",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_known_unknowns_helm = LightevalTaskConfig(
    name="bigbench:known_unknowns",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="known_unknowns",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_language_identification_helm = LightevalTaskConfig(
    name="bigbench:language_identification",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="language_identification",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_linguistics_puzzles_helm = LightevalTaskConfig(
    name="bigbench:linguistics_puzzles",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="linguistics_puzzles",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logic_grid_puzzle_helm = LightevalTaskConfig(
    name="bigbench:logic_grid_puzzle",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="logic_grid_puzzle",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_five_objects_helm = LightevalTaskConfig(
    name="bigbench:logical_deduction-five_objects",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="logical_deduction-five_objects",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_seven_objects_helm = LightevalTaskConfig(
    name="bigbench:logical_deduction-seven_objects",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="logical_deduction-seven_objects",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_logical_deduction_three_objects_helm = LightevalTaskConfig(
    name="bigbench:logical_deduction-three_objects",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="logical_deduction-three_objects",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_misconceptions_russian_helm = LightevalTaskConfig(
    name="bigbench:misconceptions_russian",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="misconceptions_russian",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_novel_concepts_helm = LightevalTaskConfig(
    name="bigbench:novel_concepts",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="novel_concepts",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_operators_helm = LightevalTaskConfig(
    name="bigbench:operators",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="operators",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_parsinlu_reading_comprehension_helm = LightevalTaskConfig(
    name="bigbench:parsinlu_reading_comprehension",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="parsinlu_reading_comprehension",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_play_dialog_same_or_different_helm = LightevalTaskConfig(
    name="bigbench:play_dialog_same_or_different",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="play_dialog_same_or_different",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_repeat_copy_logic_helm = LightevalTaskConfig(
    name="bigbench:repeat_copy_logic",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="repeat_copy_logic",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_strange_stories_boolean_helm = LightevalTaskConfig(
    name="bigbench:strange_stories-boolean",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="strange_stories-boolean",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_strange_stories_multiple_choice_helm = LightevalTaskConfig(
    name="bigbench:strange_stories-multiple_choice",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="strange_stories-multiple_choice",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_strategyqa_helm = LightevalTaskConfig(
    name="bigbench:strategyqa",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="strategyqa",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_symbol_interpretation_adversarial_helm = LightevalTaskConfig(
    name="bigbench:symbol_interpretation-adversarial",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="symbol_interpretation-adversarial",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_symbol_interpretation_emoji_agnostic_helm = LightevalTaskConfig(
    name="bigbench:symbol_interpretation-emoji_agnostic",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="symbol_interpretation-emoji_agnostic",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_symbol_interpretation_name_agnostic_helm = LightevalTaskConfig(
    name="bigbench:symbol_interpretation-name_agnostic",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="symbol_interpretation-name_agnostic",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_symbol_interpretation_plain_helm = LightevalTaskConfig(
    name="bigbench:symbol_interpretation-plain",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="symbol_interpretation-plain",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_symbol_interpretation_tricky_helm = LightevalTaskConfig(
    name="bigbench:symbol_interpretation-tricky",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="symbol_interpretation-tricky",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_vitaminc_fact_verification_helm = LightevalTaskConfig(
    name="bigbench:vitaminc_fact_verification",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="vitaminc_fact_verification",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bigbench_winowhy_helm = LightevalTaskConfig(
    name="bigbench:winowhy",
    suite=["helm", "bigbench_scenario"],
    prompt_function=prompt.bigbench_helm,
    hf_repo="lighteval/bigbench_helm",
    hf_subset="winowhy",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_adjunct_island_lighteval = LightevalTaskConfig(
    name="blimp:adjunct_island",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="adjunct_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_adjunct_island_helm = LightevalTaskConfig(
    name="blimp:adjunct_island",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="adjunct_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_anaphor_gender_agreement_lighteval = LightevalTaskConfig(
    name="blimp:anaphor_gender_agreement",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="anaphor_gender_agreement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_anaphor_gender_agreement_helm = LightevalTaskConfig(
    name="blimp:anaphor_gender_agreement",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="anaphor_gender_agreement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_anaphor_number_agreement_lighteval = LightevalTaskConfig(
    name="blimp:anaphor_number_agreement",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="anaphor_number_agreement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_anaphor_number_agreement_helm = LightevalTaskConfig(
    name="blimp:anaphor_number_agreement",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="anaphor_number_agreement",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_animate_subject_passive_lighteval = LightevalTaskConfig(
    name="blimp:animate_subject_passive",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="animate_subject_passive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_animate_subject_passive_helm = LightevalTaskConfig(
    name="blimp:animate_subject_passive",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="animate_subject_passive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_animate_subject_trans_lighteval = LightevalTaskConfig(
    name="blimp:animate_subject_trans",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="animate_subject_trans",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_animate_subject_trans_helm = LightevalTaskConfig(
    name="blimp:animate_subject_trans",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="animate_subject_trans",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_causative_lighteval = LightevalTaskConfig(
    name="blimp:causative",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="causative",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_causative_helm = LightevalTaskConfig(
    name="blimp:causative",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="causative",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_complex_NP_island_lighteval = LightevalTaskConfig(
    name="blimp:complex_NP_island",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="complex_NP_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_complex_NP_island_helm = LightevalTaskConfig(
    name="blimp:complex_NP_island",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="complex_NP_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_coordinate_structure_constraint_complex_left_branch_lighteval = LightevalTaskConfig(
    name="blimp:coordinate_structure_constraint_complex_left_branch",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="coordinate_structure_constraint_complex_left_branch",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_coordinate_structure_constraint_complex_left_branch_helm = LightevalTaskConfig(
    name="blimp:coordinate_structure_constraint_complex_left_branch",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="coordinate_structure_constraint_complex_left_branch",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_coordinate_structure_constraint_object_extraction_lighteval = LightevalTaskConfig(
    name="blimp:coordinate_structure_constraint_object_extraction",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="coordinate_structure_constraint_object_extraction",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_coordinate_structure_constraint_object_extraction_helm = LightevalTaskConfig(
    name="blimp:coordinate_structure_constraint_object_extraction",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="coordinate_structure_constraint_object_extraction",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_1_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_1_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_2_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_2_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_irregular_1_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_irregular_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_irregular_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_irregular_1_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_irregular_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_irregular_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_irregular_2_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_irregular_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_irregular_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_irregular_2_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_irregular_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_irregular_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_2_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_2_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_irregular_1_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_irregular_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_irregular_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_irregular_1_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_irregular_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_irregular_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_irregular_2_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_irregular_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_irregular_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adj_irregular_2_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adj_irregular_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adj_irregular_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adjective_1_lighteval = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adjective_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adjective_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_determiner_noun_agreement_with_adjective_1_helm = LightevalTaskConfig(
    name="blimp:determiner_noun_agreement_with_adjective_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="determiner_noun_agreement_with_adjective_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_distractor_agreement_relational_noun_lighteval = LightevalTaskConfig(
    name="blimp:distractor_agreement_relational_noun",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="distractor_agreement_relational_noun",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_distractor_agreement_relational_noun_helm = LightevalTaskConfig(
    name="blimp:distractor_agreement_relational_noun",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="distractor_agreement_relational_noun",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_distractor_agreement_relative_clause_lighteval = LightevalTaskConfig(
    name="blimp:distractor_agreement_relative_clause",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="distractor_agreement_relative_clause",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_distractor_agreement_relative_clause_helm = LightevalTaskConfig(
    name="blimp:distractor_agreement_relative_clause",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="distractor_agreement_relative_clause",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_drop_argument_lighteval = LightevalTaskConfig(
    name="blimp:drop_argument",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="drop_argument",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_drop_argument_helm = LightevalTaskConfig(
    name="blimp:drop_argument",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="drop_argument",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_ellipsis_n_bar_1_lighteval = LightevalTaskConfig(
    name="blimp:ellipsis_n_bar_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="ellipsis_n_bar_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_ellipsis_n_bar_1_helm = LightevalTaskConfig(
    name="blimp:ellipsis_n_bar_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="ellipsis_n_bar_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_ellipsis_n_bar_2_lighteval = LightevalTaskConfig(
    name="blimp:ellipsis_n_bar_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="ellipsis_n_bar_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_ellipsis_n_bar_2_helm = LightevalTaskConfig(
    name="blimp:ellipsis_n_bar_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="ellipsis_n_bar_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_object_raising_lighteval = LightevalTaskConfig(
    name="blimp:existential_there_object_raising",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="existential_there_object_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_object_raising_helm = LightevalTaskConfig(
    name="blimp:existential_there_object_raising",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="existential_there_object_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_quantifiers_1_lighteval = LightevalTaskConfig(
    name="blimp:existential_there_quantifiers_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="existential_there_quantifiers_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_quantifiers_1_helm = LightevalTaskConfig(
    name="blimp:existential_there_quantifiers_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="existential_there_quantifiers_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_quantifiers_2_lighteval = LightevalTaskConfig(
    name="blimp:existential_there_quantifiers_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="existential_there_quantifiers_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_quantifiers_2_helm = LightevalTaskConfig(
    name="blimp:existential_there_quantifiers_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="existential_there_quantifiers_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_subject_raising_lighteval = LightevalTaskConfig(
    name="blimp:existential_there_subject_raising",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="existential_there_subject_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_existential_there_subject_raising_helm = LightevalTaskConfig(
    name="blimp:existential_there_subject_raising",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="existential_there_subject_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_expletive_it_object_raising_lighteval = LightevalTaskConfig(
    name="blimp:expletive_it_object_raising",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="expletive_it_object_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_expletive_it_object_raising_helm = LightevalTaskConfig(
    name="blimp:expletive_it_object_raising",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="expletive_it_object_raising",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_inchoative_lighteval = LightevalTaskConfig(
    name="blimp:inchoative",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="inchoative",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_inchoative_helm = LightevalTaskConfig(
    name="blimp:inchoative",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="inchoative",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_intransitive_lighteval = LightevalTaskConfig(
    name="blimp:intransitive",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="intransitive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_intransitive_helm = LightevalTaskConfig(
    name="blimp:intransitive",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="intransitive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_past_participle_adjectives_lighteval = LightevalTaskConfig(
    name="blimp:irregular_past_participle_adjectives",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="irregular_past_participle_adjectives",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_past_participle_adjectives_helm = LightevalTaskConfig(
    name="blimp:irregular_past_participle_adjectives",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="irregular_past_participle_adjectives",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_past_participle_verbs_lighteval = LightevalTaskConfig(
    name="blimp:irregular_past_participle_verbs",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="irregular_past_participle_verbs",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_past_participle_verbs_helm = LightevalTaskConfig(
    name="blimp:irregular_past_participle_verbs",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="irregular_past_participle_verbs",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_plural_subject_verb_agreement_1_lighteval = LightevalTaskConfig(
    name="blimp:irregular_plural_subject_verb_agreement_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="irregular_plural_subject_verb_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_plural_subject_verb_agreement_1_helm = LightevalTaskConfig(
    name="blimp:irregular_plural_subject_verb_agreement_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="irregular_plural_subject_verb_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_plural_subject_verb_agreement_2_lighteval = LightevalTaskConfig(
    name="blimp:irregular_plural_subject_verb_agreement_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="irregular_plural_subject_verb_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_irregular_plural_subject_verb_agreement_2_helm = LightevalTaskConfig(
    name="blimp:irregular_plural_subject_verb_agreement_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="irregular_plural_subject_verb_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_left_branch_island_echo_question_lighteval = LightevalTaskConfig(
    name="blimp:left_branch_island_echo_question",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="left_branch_island_echo_question",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_left_branch_island_echo_question_helm = LightevalTaskConfig(
    name="blimp:left_branch_island_echo_question",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="left_branch_island_echo_question",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_left_branch_island_simple_question_lighteval = LightevalTaskConfig(
    name="blimp:left_branch_island_simple_question",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="left_branch_island_simple_question",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_left_branch_island_simple_question_helm = LightevalTaskConfig(
    name="blimp:left_branch_island_simple_question",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="left_branch_island_simple_question",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_matrix_question_npi_licensor_present_lighteval = LightevalTaskConfig(
    name="blimp:matrix_question_npi_licensor_present",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="matrix_question_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_matrix_question_npi_licensor_present_helm = LightevalTaskConfig(
    name="blimp:matrix_question_npi_licensor_present",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="matrix_question_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_npi_present_1_lighteval = LightevalTaskConfig(
    name="blimp:npi_present_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="npi_present_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_npi_present_1_helm = LightevalTaskConfig(
    name="blimp:npi_present_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="npi_present_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_npi_present_2_lighteval = LightevalTaskConfig(
    name="blimp:npi_present_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="npi_present_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_npi_present_2_helm = LightevalTaskConfig(
    name="blimp:npi_present_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="npi_present_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_only_npi_licensor_present_lighteval = LightevalTaskConfig(
    name="blimp:only_npi_licensor_present",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="only_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_only_npi_licensor_present_helm = LightevalTaskConfig(
    name="blimp:only_npi_licensor_present",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="only_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_only_npi_scope_lighteval = LightevalTaskConfig(
    name="blimp:only_npi_scope",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="only_npi_scope",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_only_npi_scope_helm = LightevalTaskConfig(
    name="blimp:only_npi_scope",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="only_npi_scope",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_passive_1_lighteval = LightevalTaskConfig(
    name="blimp:passive_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="passive_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_passive_1_helm = LightevalTaskConfig(
    name="blimp:passive_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="passive_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_passive_2_lighteval = LightevalTaskConfig(
    name="blimp:passive_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="passive_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_passive_2_helm = LightevalTaskConfig(
    name="blimp:passive_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="passive_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_c_command_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_c_command",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_c_command",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_c_command_helm = LightevalTaskConfig(
    name="blimp:principle_A_c_command",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_c_command",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_case_1_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_case_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_case_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_case_1_helm = LightevalTaskConfig(
    name="blimp:principle_A_case_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_case_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_case_2_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_case_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_case_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_case_2_helm = LightevalTaskConfig(
    name="blimp:principle_A_case_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_case_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_1_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_domain_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_domain_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_1_helm = LightevalTaskConfig(
    name="blimp:principle_A_domain_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_domain_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_2_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_domain_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_domain_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_2_helm = LightevalTaskConfig(
    name="blimp:principle_A_domain_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_domain_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_3_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_domain_3",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_domain_3",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_domain_3_helm = LightevalTaskConfig(
    name="blimp:principle_A_domain_3",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_domain_3",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_reconstruction_lighteval = LightevalTaskConfig(
    name="blimp:principle_A_reconstruction",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="principle_A_reconstruction",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_principle_A_reconstruction_helm = LightevalTaskConfig(
    name="blimp:principle_A_reconstruction",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="principle_A_reconstruction",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_regular_plural_subject_verb_agreement_1_lighteval = LightevalTaskConfig(
    name="blimp:regular_plural_subject_verb_agreement_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="regular_plural_subject_verb_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_regular_plural_subject_verb_agreement_1_helm = LightevalTaskConfig(
    name="blimp:regular_plural_subject_verb_agreement_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="regular_plural_subject_verb_agreement_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_regular_plural_subject_verb_agreement_2_lighteval = LightevalTaskConfig(
    name="blimp:regular_plural_subject_verb_agreement_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="regular_plural_subject_verb_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_regular_plural_subject_verb_agreement_2_helm = LightevalTaskConfig(
    name="blimp:regular_plural_subject_verb_agreement_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="regular_plural_subject_verb_agreement_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_negation_npi_licensor_present_lighteval = LightevalTaskConfig(
    name="blimp:sentential_negation_npi_licensor_present",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="sentential_negation_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_negation_npi_licensor_present_helm = LightevalTaskConfig(
    name="blimp:sentential_negation_npi_licensor_present",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="sentential_negation_npi_licensor_present",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_negation_npi_scope_lighteval = LightevalTaskConfig(
    name="blimp:sentential_negation_npi_scope",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="sentential_negation_npi_scope",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_negation_npi_scope_helm = LightevalTaskConfig(
    name="blimp:sentential_negation_npi_scope",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="sentential_negation_npi_scope",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_subject_island_lighteval = LightevalTaskConfig(
    name="blimp:sentential_subject_island",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="sentential_subject_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_sentential_subject_island_helm = LightevalTaskConfig(
    name="blimp:sentential_subject_island",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="sentential_subject_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_superlative_quantifiers_1_lighteval = LightevalTaskConfig(
    name="blimp:superlative_quantifiers_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="superlative_quantifiers_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_superlative_quantifiers_1_helm = LightevalTaskConfig(
    name="blimp:superlative_quantifiers_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="superlative_quantifiers_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_superlative_quantifiers_2_lighteval = LightevalTaskConfig(
    name="blimp:superlative_quantifiers_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="superlative_quantifiers_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_superlative_quantifiers_2_helm = LightevalTaskConfig(
    name="blimp:superlative_quantifiers_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="superlative_quantifiers_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_tough_vs_raising_1_lighteval = LightevalTaskConfig(
    name="blimp:tough_vs_raising_1",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="tough_vs_raising_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_tough_vs_raising_1_helm = LightevalTaskConfig(
    name="blimp:tough_vs_raising_1",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="tough_vs_raising_1",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_tough_vs_raising_2_lighteval = LightevalTaskConfig(
    name="blimp:tough_vs_raising_2",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="tough_vs_raising_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_tough_vs_raising_2_helm = LightevalTaskConfig(
    name="blimp:tough_vs_raising_2",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="tough_vs_raising_2",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_transitive_lighteval = LightevalTaskConfig(
    name="blimp:transitive",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="transitive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_transitive_helm = LightevalTaskConfig(
    name="blimp:transitive",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="transitive",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_island_lighteval = LightevalTaskConfig(
    name="blimp:wh_island",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_island_helm = LightevalTaskConfig(
    name="blimp:wh_island",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_island",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_object_gap_lighteval = LightevalTaskConfig(
    name="blimp:wh_questions_object_gap",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_questions_object_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_object_gap_helm = LightevalTaskConfig(
    name="blimp:wh_questions_object_gap",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_questions_object_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_subject_gap_lighteval = LightevalTaskConfig(
    name="blimp:wh_questions_subject_gap",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_questions_subject_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_subject_gap_helm = LightevalTaskConfig(
    name="blimp:wh_questions_subject_gap",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_questions_subject_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_subject_gap_long_distance_lighteval = LightevalTaskConfig(
    name="blimp:wh_questions_subject_gap_long_distance",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_questions_subject_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_questions_subject_gap_long_distance_helm = LightevalTaskConfig(
    name="blimp:wh_questions_subject_gap_long_distance",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_questions_subject_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_no_gap_lighteval = LightevalTaskConfig(
    name="blimp:wh_vs_that_no_gap",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_vs_that_no_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_no_gap_helm = LightevalTaskConfig(
    name="blimp:wh_vs_that_no_gap",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_vs_that_no_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_no_gap_long_distance_lighteval = LightevalTaskConfig(
    name="blimp:wh_vs_that_no_gap_long_distance",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_vs_that_no_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_no_gap_long_distance_helm = LightevalTaskConfig(
    name="blimp:wh_vs_that_no_gap_long_distance",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_vs_that_no_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_with_gap_lighteval = LightevalTaskConfig(
    name="blimp:wh_vs_that_with_gap",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_vs_that_with_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_with_gap_helm = LightevalTaskConfig(
    name="blimp:wh_vs_that_with_gap",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_vs_that_with_gap",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_with_gap_long_distance_lighteval = LightevalTaskConfig(
    name="blimp:wh_vs_that_with_gap_long_distance",
    suite=["lighteval", "blimp"],
    prompt_function=prompt.blimp,
    hf_repo="blimp",
    hf_subset="wh_vs_that_with_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
blimp_wh_vs_that_with_gap_long_distance_helm = LightevalTaskConfig(
    name="blimp:wh_vs_that_with_gap_long_distance",
    suite=["helm", "blimp"],
    prompt_function=prompt.blimp_helm,
    hf_repo="blimp",
    hf_subset="wh_vs_that_with_gap_long_distance",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_helm = LightevalTaskConfig(
    name="bold",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="all",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_gender_helm = LightevalTaskConfig(
    name="bold:gender",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="gender",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_political_ideology_helm = LightevalTaskConfig(
    name="bold:political_ideology",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="political_ideology",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_profession_helm = LightevalTaskConfig(
    name="bold:profession",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="profession",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_race_helm = LightevalTaskConfig(
    name="bold:race",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="race",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bold_religious_ideology_helm = LightevalTaskConfig(
    name="bold:religious_ideology",
    suite=["helm"],
    prompt_function=prompt.bold,
    hf_repo="lighteval/bold_helm",
    hf_subset="religious_ideology",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
boolq_helm = LightevalTaskConfig(
    name="boolq",
    suite=["helm", "helm_general"],
    prompt_function=prompt.boolq_helm,
    hf_repo="lighteval/boolq_helm",
    hf_subset="default",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
boolq_contrastset_helm = LightevalTaskConfig(
    name="boolq:contrastset",
    suite=["helm"],
    prompt_function=prompt.boolq_helm_contrastset,
    hf_repo="lighteval/boolq_helm",
    hf_subset="default",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
bridging_anaphora_resolution_barqa_bigbench = LightevalTaskConfig(
    name="bridging_anaphora_resolution_barqa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="bridging_anaphora_resolution_barqa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
causal_judgment_bigbench = LightevalTaskConfig(
    name="causal_judgment",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="causal_judgment",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
cause_and_effect_bigbench = LightevalTaskConfig(
    name="cause_and_effect",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="cause_and_effect",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
checkmate_in_one_bigbench = LightevalTaskConfig(
    name="checkmate_in_one",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="checkmate_in_one",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
chess_state_tracking_bigbench = LightevalTaskConfig(
    name="chess_state_tracking",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="chess_state_tracking",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
chinese_remainder_theorem_bigbench = LightevalTaskConfig(
    name="chinese_remainder_theorem",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="chinese_remainder_theorem",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
cifar10_classification_bigbench = LightevalTaskConfig(
    name="cifar10_classification",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="cifar10_classification",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_helm = LightevalTaskConfig(
    name="civil_comments",
    suite=["helm", "helm_general"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="all",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_LGBTQ_helm = LightevalTaskConfig(
    name="civil_comments:LGBTQ",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="LGBTQ",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_black_helm = LightevalTaskConfig(
    name="civil_comments:black",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="black",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_christian_helm = LightevalTaskConfig(
    name="civil_comments:christian",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="christian",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_female_helm = LightevalTaskConfig(
    name="civil_comments:female",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="female",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_male_helm = LightevalTaskConfig(
    name="civil_comments:male",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="male",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_muslim_helm = LightevalTaskConfig(
    name="civil_comments:muslim",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="muslim",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_other_religions_helm = LightevalTaskConfig(
    name="civil_comments:other_religions",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="other_religions",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
civil_comments_white_helm = LightevalTaskConfig(
    name="civil_comments:white",
    suite=["helm"],
    prompt_function=prompt.civil_comments,
    hf_repo="lighteval/civil_comments_helm",
    hf_subset="white",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
code_line_description_bigbench_lite = LightevalTaskConfig(
    name="code_line_description",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_and_after_query,
    hf_repo="bigbench",
    hf_subset="code_line_description",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
codenames_bigbench = LightevalTaskConfig(
    name="codenames",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="codenames",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
color_bigbench = LightevalTaskConfig(
    name="color",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="color",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
common_morpheme_bigbench = LightevalTaskConfig(
    name="common_morpheme",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="common_morpheme",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
commonsenseqa_helm = LightevalTaskConfig(
    name="commonsenseqa",
    suite=["helm", "commonsense_scenario"],
    prompt_function=prompt.commonsense_qa,
    hf_repo="commonsense_qa",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
conceptual_combinations_bigbench_lite = LightevalTaskConfig(
    name="conceptual_combinations",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="conceptual_combinations",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
conlang_translation_bigbench_lite = LightevalTaskConfig(
    name="conlang_translation",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="conlang_translation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.perfect_exact_match],
    stop_sequence=[".", ";", "!", "?"],
    trust_dataset=True,
    version=0,
)
contextual_parametric_knowledge_conflicts_bigbench = LightevalTaskConfig(
    name="contextual_parametric_knowledge_conflicts",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="contextual_parametric_knowledge_conflicts",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_1_prefix_length_125_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_1-prefix_length_125",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_1-prefix_length_125",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_1_prefix_length_25_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_1-prefix_length_25",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_1-prefix_length_25",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_1_prefix_length_5_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_1-prefix_length_5",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_1-prefix_length_5",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_3_prefix_length_125_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_3-prefix_length_125",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_3-prefix_length_125",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_3_prefix_length_25_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_3-prefix_length_25",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_3-prefix_length_25",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_n_books_1000_extractions_per_book_3_prefix_length_5_helm = LightevalTaskConfig(
    name="copyright:n_books_1000-extractions_per_book_3-prefix_length_5",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="n_books_1000-extractions_per_book_3-prefix_length_5",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_oh_the_places_helm = LightevalTaskConfig(
    name="copyright:oh_the_places",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="oh_the_places",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_pilot_helm = LightevalTaskConfig(
    name="copyright:pilot",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="pilot",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_10_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_10",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_10",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_125_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_125",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_125",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_25_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_25",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_25",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_250_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_250",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_250",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_5_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_5",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_5",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_popular_books_prefix_length_50_helm = LightevalTaskConfig(
    name="copyright:popular_books-prefix_length_50",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="popular_books-prefix_length_50",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_prompt_num_line_1_min_lines_20_helm = LightevalTaskConfig(
    name="copyright:prompt_num_line_1-min_lines_20",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="prompt_num_line_1-min_lines_20",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_prompt_num_line_10_min_lines_20_helm = LightevalTaskConfig(
    name="copyright:prompt_num_line_10-min_lines_20",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="prompt_num_line_10-min_lines_20",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
copyright_prompt_num_line_5_min_lines_20_helm = LightevalTaskConfig(
    name="copyright:prompt_num_line_5-min_lines_20",
    suite=["helm", "copyright_scenario"],
    prompt_function=prompt.copyright,
    hf_repo="lighteval/copyright_helm",
    hf_subset="prompt_num_line_5-min_lines_20",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.copyright],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
coqa_first_question = LightevalTaskConfig(
    name="coqa",
    prompt_function=get_qa_prompt_function(
        Language.ENGLISH,
        lambda line: {
            "question": line["questions"][0],
            "context": line["story"],
            "choices": [line["answers"]["input_text"][0]],
        },
    ),
    suite=("lighteval",),
    hf_repo="stanfordnlp/coqa",
    hf_subset="default",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    stop_sequence=["\n", "Question:", "question:"],
    generation_size=100,
    version=1,
    metric=(
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_quasi,
    ),
)
coqa_bb_lighteval = LightevalTaskConfig(
    name="coqa_bb",
    suite=["lighteval", "bigbench_programmatic", "bigbench"],
    prompt_function=prompt.coqa,
    hf_repo="coqa",
    hf_subset="default",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.perfect_exact_match, Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
covid_dialogue_helm = LightevalTaskConfig(
    name="covid_dialogue",
    suite=["helm"],
    prompt_function=prompt.covid_dialogue,
    hf_repo="lighteval/covid_dialogue",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=128,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
crash_blossom_bigbench = LightevalTaskConfig(
    name="crash_blossom",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="crash_blossom",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
crass_ai_bigbench = LightevalTaskConfig(
    name="crass_ai",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="crass_ai",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
cryobiology_spanish_bigbench = LightevalTaskConfig(
    name="cryobiology_spanish",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="cryobiology_spanish",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
cryptonite_bigbench = LightevalTaskConfig(
    name="cryptonite",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="cryptonite",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
cs_algorithms_bigbench = LightevalTaskConfig(
    name="cs_algorithms",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="cs_algorithms",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
dark_humor_detection_bigbench = LightevalTaskConfig(
    name="dark_humor_detection",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="dark_humor_detection",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
date_understanding_bigbench = LightevalTaskConfig(
    name="date_understanding",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="date_understanding",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
disambiguation_qa_bigbench = LightevalTaskConfig(
    name="disambiguation_qa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="disambiguation_qa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
discourse_marker_prediction_bigbench = LightevalTaskConfig(
    name="discourse_marker_prediction",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="discourse_marker_prediction",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
disfl_qa_bigbench = LightevalTaskConfig(
    name="disfl_qa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="disfl_qa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
drop_qa = LightevalTaskConfig(
    name="drop",
    prompt_function=get_qa_prompt_function(
        Language.ENGLISH,
        lambda line: {
            "context": line["passage"],
            "question": line["question"],
            "choices": list(
                filter(
                    lambda x: x,
                    [line["answer"].get("number")]
                    + line["answer"]["spans"]
                    + [prompt.get_drop_date(line["answer"].get("date"))],
                )
            ),
        },
    ),
    suite=("lighteval",),
    hf_repo="lighteval/drop_harness",
    hf_subset="default",
    hf_filter=lambda line: list(
        filter(
            lambda x: x,
            [line["answer"].get("number")]
            + line["answer"]["spans"]
            + [prompt.get_drop_date(line["answer"].get("date"))],
        )
    ),
    evaluation_splits=("validation",),
    few_shots_split="train",
    generation_size=250,
    stop_sequence=["Question:", "question:", "\n"],
    metric=(
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_quasi,
    ),
    version=1,
)
dyck_language_2_helm = LightevalTaskConfig(
    name="dyck_language:2",
    suite=["helm"],
    prompt_function=prompt.dyck_language,
    hf_repo="lighteval/DyckLanguage",
    hf_subset="2",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
dyck_language_3_helm = LightevalTaskConfig(
    name="dyck_language:3",
    suite=["helm"],
    prompt_function=prompt.dyck_language,
    hf_repo="lighteval/DyckLanguage",
    hf_subset="3",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
dyck_language_4_helm = LightevalTaskConfig(
    name="dyck_language:4",
    suite=["helm"],
    prompt_function=prompt.dyck_language,
    hf_repo="lighteval/DyckLanguage",
    hf_subset="4",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
dyck_languages_bigbench = LightevalTaskConfig(
    name="dyck_languages",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="dyck_languages",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
elementary_math_qa_bigbench = LightevalTaskConfig(
    name="elementary_math_qa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="elementary_math_qa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
emoji_movie_bigbench_lite = LightevalTaskConfig(
    name="emoji_movie",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="emoji_movie",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
emojis_emotion_prediction_bigbench = LightevalTaskConfig(
    name="emojis_emotion_prediction",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="emojis_emotion_prediction",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
empirical_judgments_bigbench = LightevalTaskConfig(
    name="empirical_judgments",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="empirical_judgments",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
english_proverbs_bigbench = LightevalTaskConfig(
    name="english_proverbs",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="english_proverbs",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
english_russian_proverbs_bigbench = LightevalTaskConfig(
    name="english_russian_proverbs",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="english_russian_proverbs",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entailed_polarity_bigbench = LightevalTaskConfig(
    name="entailed_polarity",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="entailed_polarity",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entailed_polarity_hindi_bigbench = LightevalTaskConfig(
    name="entailed_polarity_hindi",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="entailed_polarity_hindi",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_data_imputation_Buy_helm = LightevalTaskConfig(
    name="entity_data_imputation:Buy",
    suite=["helm"],
    prompt_function=prompt.entity_data_imputation,
    hf_repo="lighteval/Buy",
    hf_subset="default",
    hf_avail_splits=["train", "test", "valid"],
    evaluation_splits=["valid", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_data_imputation_Restaurant_helm = LightevalTaskConfig(
    name="entity_data_imputation:Restaurant",
    suite=["helm"],
    prompt_function=prompt.entity_data_imputation,
    hf_repo="lighteval/Restaurant",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Abt_Buy_helm = LightevalTaskConfig(
    name="entity_matching:Abt_Buy",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Abt_Buy",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Amazon_Google_helm = LightevalTaskConfig(
    name="entity_matching:Amazon_Google",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Amazon_Google",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Beer_helm = LightevalTaskConfig(
    name="entity_matching:Beer",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Beer",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Company_helm = LightevalTaskConfig(
    name="entity_matching:Company",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Company",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_DBLP_ACM_helm = LightevalTaskConfig(
    name="entity_matching:DBLP_ACM",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="DBLP_ACM",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_DBLP_GoogleScholar_helm = LightevalTaskConfig(
    name="entity_matching:DBLP_GoogleScholar",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="DBLP_GoogleScholar",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Dirty_DBLP_ACM_helm = LightevalTaskConfig(
    name="entity_matching:Dirty_DBLP_ACM",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Dirty_DBLP_ACM",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Dirty_DBLP_GoogleScholar_helm = LightevalTaskConfig(
    name="entity_matching:Dirty_DBLP_GoogleScholar",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Dirty_DBLP_GoogleScholar",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Dirty_Walmart_Amazon_helm = LightevalTaskConfig(
    name="entity_matching:Dirty_Walmart_Amazon",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Dirty_Walmart_Amazon",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Dirty_iTunes_Amazon_helm = LightevalTaskConfig(
    name="entity_matching:Dirty_iTunes_Amazon",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Dirty_iTunes_Amazon",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Fodors_Zagats_helm = LightevalTaskConfig(
    name="entity_matching=Fodors_Zagats",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Fodors_Zagats",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_Walmart_Amazon_helm = LightevalTaskConfig(
    name="entity_matching:Walmart_Amazon",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="Walmart_Amazon",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
entity_matching_iTunes_Amazon_helm = LightevalTaskConfig(
    name="entity_matching:iTunes_Amazon",
    suite=["helm"],
    prompt_function=prompt.entity_matching,
    hf_repo="lighteval/EntityMatching",
    hf_subset="iTunes_Amazon",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
epistemic_reasoning_bigbench = LightevalTaskConfig(
    name="epistemic_reasoning",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="epistemic_reasoning",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ethics_commonsense_lighteval = LightevalTaskConfig(
    name="ethics:commonsense",
    suite=["lighteval", "ethics"],
    prompt_function=prompt.ethics_commonsense,
    hf_repo="lighteval/hendrycks_ethics",
    hf_subset="commonsense",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ethics_deontology_lighteval = LightevalTaskConfig(
    name="ethics:deontology",
    suite=["lighteval", "ethics"],
    prompt_function=prompt.ethics_deontology,
    hf_repo="lighteval/hendrycks_ethics",
    hf_subset="deontology",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ethics_justice_lighteval = LightevalTaskConfig(
    name="ethics:justice",
    suite=["lighteval", "ethics"],
    prompt_function=prompt.ethics_justice,
    hf_repo="lighteval/hendrycks_ethics",
    hf_subset="justice",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ethics_utilitarianism_lighteval = LightevalTaskConfig(
    name="ethics:utilitarianism",
    suite=["lighteval", "ethics"],
    prompt_function=prompt.ethics_utilitarianism,
    hf_repo="lighteval/hendrycks_ethics",
    hf_subset="utilitarianism",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ethics_virtue_lighteval = LightevalTaskConfig(
    name="ethics:virtue",
    suite=["lighteval", "ethics"],
    prompt_function=prompt.ethics_virtue,
    hf_repo="lighteval/hendrycks_ethics",
    hf_subset="virtue",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
evaluating_information_essentiality_bigbench = LightevalTaskConfig(
    name="evaluating_information_essentiality",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="evaluating_information_essentiality",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
fact_checker_bigbench = LightevalTaskConfig(
    name="fact_checker",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="fact_checker",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
fantasy_reasoning_bigbench = LightevalTaskConfig(
    name="fantasy_reasoning",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="fantasy_reasoning",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
few_shot_nlg_bigbench = LightevalTaskConfig(
    name="few_shot_nlg",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="few_shot_nlg",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.bleurt],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
figure_of_speech_detection_bigbench = LightevalTaskConfig(
    name="figure_of_speech_detection",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="figure_of_speech_detection",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
formal_fallacies_syllogisms_negation_bigbench_lite = LightevalTaskConfig(
    name="formal_fallacies_syllogisms_negation",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="formal_fallacies_syllogisms_negation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
gem_bigbench = LightevalTaskConfig(
    name="gem",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="gem",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
gender_inclusive_sentences_german_bigbench = LightevalTaskConfig(
    name="gender_inclusive_sentences_german",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="gender_inclusive_sentences_german",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
general_knowledge_bigbench = LightevalTaskConfig(
    name="general_knowledge",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="general_knowledge",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
geometric_shapes_bigbench = LightevalTaskConfig(
    name="geometric_shapes",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="geometric_shapes",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_cola_lighteval = LightevalTaskConfig(
    name="glue:cola",
    suite=["lighteval", "glue"],
    prompt_function=prompt.cola,
    hf_repo="glue",
    hf_subset="cola",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.mcc_single_token],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_mnli_lighteval = LightevalTaskConfig(
    name="glue:mnli",
    suite=["lighteval", "glue"],
    prompt_function=prompt.mnli,
    hf_repo="glue",
    hf_subset="mnli_matched",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_mnli_mismatched_lighteval = LightevalTaskConfig(
    name="glue:mnli_mismatched",
    suite=["lighteval", "glue"],
    prompt_function=prompt.mnli,
    hf_repo="glue",
    hf_subset="mnli_mismatched",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_mrpc_lighteval = LightevalTaskConfig(
    name="glue:mrpc",
    suite=["lighteval", "glue"],
    prompt_function=prompt.mrpc,
    hf_repo="glue",
    hf_subset="mrpc",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, "loglikelihood_f1"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_qnli_lighteval = LightevalTaskConfig(
    name="glue:qnli",
    suite=["lighteval", "glue"],
    prompt_function=prompt.qnli,
    hf_repo="glue",
    hf_subset="qnli",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_qqp_lighteval = LightevalTaskConfig(
    name="glue:qqp",
    suite=["lighteval", "glue"],
    prompt_function=prompt.qqp,
    hf_repo="glue",
    hf_subset="qqp",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, "loglikelihood_f1"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_rte_lighteval = LightevalTaskConfig(
    name="glue:rte",
    suite=["lighteval", "glue"],
    prompt_function=prompt.rte,
    hf_repo="glue",
    hf_subset="rte",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_sst2_lighteval = LightevalTaskConfig(
    name="glue:sst2",
    suite=["lighteval", "glue"],
    prompt_function=prompt.sst,
    hf_repo="glue",
    hf_subset="sst2",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_stsb_lighteval = LightevalTaskConfig(
    name="glue:stsb",
    suite=["lighteval", "glue"],
    prompt_function=prompt.stsb,
    hf_repo="glue",
    hf_subset="stsb",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
glue_wnli_lighteval = LightevalTaskConfig(
    name="glue:wnli",
    suite=["lighteval", "glue"],
    prompt_function=prompt.wnli,
    hf_repo="glue",
    hf_subset="wnli",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
goal_step_wikihow_bigbench = LightevalTaskConfig(
    name="goal_step_wikihow",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="goal_step_wikihow",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
gpqa_lighteval = LightevalTaskConfig(
    name="gpqa",
    suite=["lighteval"],
    prompt_function=prompt.gpqa,
    hf_repo="Idavidrein/gpqa",
    hf_subset="gpqa_main",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
gpqa_diamond_instruct_lighteval = LightevalTaskConfig(
    name="gpqa:diamond",
    suite=["lighteval"],
    prompt_function=prompt.gpqa_instruct,
    hf_repo="Idavidrein/gpqa",
    hf_subset="gpqa_diamond",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=32768,  # needed for reasoning models like R1
    metric=[
        Metrics.gpqa_instruct_metric,
        # Metrics.gpqa_instruct_pass_at_1_1n,
        # Metrics.gpqa_instruct_pass_at_1_4n,
        # Metrics.gpqa_instruct_pass_at_1_8n,
    ],
    stop_sequence=[],  # no stop sequence, will use eos token
    trust_dataset=True,
    version=1,
)
gpqa_extended_instruct_lighteval = LightevalTaskConfig(
    name="gpqa:extended",
    suite=["lighteval"],
    prompt_function=prompt.gpqa_instruct,
    hf_repo="Idavidrein/gpqa",
    hf_subset="gpqa_extended",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=32768,  # needed for reasoning models like R1
    metric=[Metrics.gpqa_instruct_metric],
    stop_sequence=[],  # no stop sequence, will use eos token
    trust_dataset=True,
    version=0,
)
gpqa_main_instruct_lighteval = LightevalTaskConfig(
    name="gpqa:main",
    suite=["lighteval"],
    prompt_function=prompt.gpqa_instruct,
    hf_repo="Idavidrein/gpqa",
    hf_subset="gpqa_main",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=32768,  # needed for reasoning models like R1
    metric=[Metrics.gpqa_instruct_metric],
    stop_sequence=[],  # no stop sequence, will use eos token
    trust_dataset=True,
    version=0,
)
gre_reading_comprehension_bigbench = LightevalTaskConfig(
    name="gre_reading_comprehension",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="gre_reading_comprehension",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
gsm8k_leaderboard = LightevalTaskConfig(
    name="gsm8k",
    suite=["leaderboard"],
    prompt_function=prompt.gsm8k,
    hf_repo="gsm8k",
    hf_subset="main",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling_from_train",
    generation_size=256,
    metric=[Metrics.quasi_exact_match_gsm8k],
    stop_sequence=["Question:"],
    trust_dataset=True,
    version=0,
)
gsm8k_lighteval = LightevalTaskConfig(
    name="gsm8k",
    suite=["lighteval"],
    prompt_function=prompt.gsm8k,
    hf_repo="gsm8k",
    hf_subset="main",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select="random_sampling_from_train",
    generation_size=256,
    metric=[
        Metrics.expr_gold_metric,
    ],
    stop_sequence=["Question:"],
    trust_dataset=True,
    version=0,
)
headqa_en_lighteval = LightevalTaskConfig(
    name="headqa:en",
    suite=["lighteval", "headqa"],
    prompt_function=prompt.headqa,
    hf_repo="lighteval/headqa_harness",
    hf_subset="en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
headqa_es_lighteval = LightevalTaskConfig(
    name="headqa:es",
    suite=["lighteval", "headqa"],
    prompt_function=prompt.headqa,
    hf_repo="lighteval/headqa_harness",
    hf_subset="es",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hellaswag_leaderboard = LightevalTaskConfig(
    name="hellaswag",
    suite=["leaderboard"],
    prompt_function=prompt.hellaswag_harness,
    hf_repo="hellaswag",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select="random_sampling_from_train",
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hellaswag_generative = LightevalTaskConfig(
    name="hellaswag",
    suite=["helm", "helm_general"],
    prompt_function=prompt.hellaswag_generative,
    hf_repo="hellaswag",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hhh_alignment_bigbench = LightevalTaskConfig(
    name="hhh_alignment",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="hhh_alignment",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hindi_question_answering_bigbench = LightevalTaskConfig(
    name="hindi_question_answering",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="hindi_question_answering",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hindu_knowledge_bigbench_lite = LightevalTaskConfig(
    name="hindu_knowledge",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="hindu_knowledge",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hinglish_toxicity_bigbench = LightevalTaskConfig(
    name="hinglish_toxicity",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="hinglish_toxicity",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
human_organs_senses_bigbench = LightevalTaskConfig(
    name="human_organs_senses",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="human_organs_senses",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
hyperbaton_bigbench = LightevalTaskConfig(
    name="hyperbaton",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="hyperbaton",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
identify_math_theorems_bigbench = LightevalTaskConfig(
    name="identify_math_theorems",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="identify_math_theorems",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
identify_odd_metaphor_bigbench = LightevalTaskConfig(
    name="identify_odd_metaphor",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="identify_odd_metaphor",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
imdb_helm = LightevalTaskConfig(
    name="imdb",
    suite=["helm", "helm_general"],
    prompt_function=prompt.imdb,
    hf_repo="lighteval/IMDB_helm",
    hf_subset="default",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
imdb_contrastset_helm = LightevalTaskConfig(
    name="imdb:contrastset",
    suite=["helm"],
    prompt_function=prompt.imdb_contrastset,
    hf_repo="lighteval/IMDB_helm",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
implicatures_bigbench = LightevalTaskConfig(
    name="implicatures",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="implicatures",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
implicit_relations_bigbench = LightevalTaskConfig(
    name="implicit_relations",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="implicit_relations",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
intent_recognition_bigbench = LightevalTaskConfig(
    name="intent_recognition",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="intent_recognition",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_abstract_algebra_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:abstract_algebra",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_abstract_algebra,
    hf_repo="lighteval/mmlu",
    hf_subset="abstract_algebra",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_college_chemistry_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:college_chemistry",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_college_chemistry,
    hf_repo="lighteval/mmlu",
    hf_subset="college_chemistry",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_global_facts_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:global_facts",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_global_facts,
    hf_repo="lighteval/mmlu",
    hf_subset="global_facts",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_miscellaneous_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:miscellaneous",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_miscellaneous,
    hf_repo="lighteval/mmlu",
    hf_subset="miscellaneous",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_nutrition_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:nutrition",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_nutrition,
    hf_repo="lighteval/mmlu",
    hf_subset="nutrition",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
interactive_qa_mmlu_us_foreign_policy_helm = LightevalTaskConfig(
    name="interactive_qa_mmlu:us_foreign_policy",
    suite=["helm", "interactive_qa_mmlu_scenario"],
    prompt_function=prompt.mmlu_qa_us_foreign_policy,
    hf_repo="lighteval/mmlu",
    hf_subset="us_foreign_policy",
    hf_avail_splits=["dev", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
international_phonetic_alphabet_nli_bigbench = LightevalTaskConfig(
    name="international_phonetic_alphabet_nli",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="international_phonetic_alphabet_nli",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
international_phonetic_alphabet_transliterate_bigbench = LightevalTaskConfig(
    name="international_phonetic_alphabet_transliterate",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="international_phonetic_alphabet_transliterate",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
intersect_geometry_bigbench = LightevalTaskConfig(
    name="intersect_geometry",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="intersect_geometry",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
irony_identification_bigbench = LightevalTaskConfig(
    name="irony_identification",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="irony_identification",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_ar_en_lighteval = LightevalTaskConfig(
    name="iwslt17:ar-en",
    suite=["lighteval", "harness_selection"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_ar-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_de_en_lighteval = LightevalTaskConfig(
    name="iwslt17:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_ar_lighteval = LightevalTaskConfig(
    name="iwslt17:en-ar",
    suite=["lighteval", "harness_selection"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_ar-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_de_lighteval = LightevalTaskConfig(
    name="iwslt17:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_fr_lighteval = LightevalTaskConfig(
    name="iwslt17:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_ja_lighteval = LightevalTaskConfig(
    name="iwslt17:en-ja",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_en-ja",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_ko_lighteval = LightevalTaskConfig(
    name="iwslt17:en-ko",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_en-ko",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_en_zh_lighteval = LightevalTaskConfig(
    name="iwslt17:en-zh",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_en-zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_fr_en_lighteval = LightevalTaskConfig(
    name="iwslt17:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_ja_en_lighteval = LightevalTaskConfig(
    name="iwslt17:ja-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_ja-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_ko_en_lighteval = LightevalTaskConfig(
    name="iwslt17:ko-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_ko-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
iwslt17_zh_en_lighteval = LightevalTaskConfig(
    name="iwslt17:zh-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="iwslt17_zh-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
jeopardy = LightevalTaskConfig(
    name="jeopardy",
    prompt_function=get_qa_prompt_function(
        Language.ENGLISH,
        lambda line: {
            "question": line["question"],
            "choices": [line["answer"]],
        },
    ),
    suite=("lighteval",),
    hf_repo="openaccess-ai-collective/jeopardy",
    hf_subset="default",
    evaluation_splits=("train",),
    few_shots_split="train",
    generation_size=250,
    stop_sequence=["\n", "Question:", "question:"],
    metric=(
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_quasi,
    ),
)
kanji_ascii_bigbench = LightevalTaskConfig(
    name="kanji_ascii",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="kanji_ascii",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
kannada_bigbench = LightevalTaskConfig(
    name="kannada",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="kannada",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
key_value_maps_bigbench = LightevalTaskConfig(
    name="key_value_maps",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="key_value_maps",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
known_unknowns_bigbench_lite = LightevalTaskConfig(
    name="known_unknowns",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="known_unknowns",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_standard_lighteval = LightevalTaskConfig(
    name="lambada:standard",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="lambada",
    hf_subset="plain_text",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_standard_cloze_lighteval = LightevalTaskConfig(
    name="lambada:standard_cloze",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada_cloze,
    hf_repo="lambada",
    hf_subset="plain_text",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_lighteval = LightevalTaskConfig(
    name="lambada:openai",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_de_lighteval = LightevalTaskConfig(
    name="lambada:openai:de",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_en_lighteval = LightevalTaskConfig(
    name="lambada:openai:en",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_es_lighteval = LightevalTaskConfig(
    name="lambada:openai:es",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_fr_lighteval = LightevalTaskConfig(
    name="lambada:openai:fr",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_it_lighteval = LightevalTaskConfig(
    name="lambada:openai:it",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="it",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lambada_openai_cloze_lighteval = LightevalTaskConfig(
    name="lambada:openai_cloze",
    suite=["lighteval", "lambada"],
    prompt_function=prompt.lambada_cloze,
    hf_repo="EleutherAI/lambada_openai",
    hf_subset="en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[Metrics.target_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
language_games_bigbench = LightevalTaskConfig(
    name="language_games",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="language_games",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
language_identification_bigbench_lite = LightevalTaskConfig(
    name="language_identification",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="language_identification",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
legal_summarization_billsum_helm = LightevalTaskConfig(
    name="legal_summarization:billsum",
    suite=["helm"],
    prompt_function=prompt.legal_summarization,
    hf_repo="lighteval/legal_summarization",
    hf_subset="BillSum",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1024,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
legal_summarization_eurlexsum_helm = LightevalTaskConfig(
    name="legal_summarization:eurlexsum",
    suite=["helm"],
    prompt_function=prompt.legal_summarization,
    hf_repo="lighteval/legal_summarization",
    hf_subset="EurLexSum",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
legal_summarization_multilexsum_helm = LightevalTaskConfig(
    name="legal_summarization:multilexsum",
    suite=["helm"],
    prompt_function=prompt.multilexsum,
    hf_repo="lighteval/legal_summarization",
    hf_subset="MultiLexSum",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=256,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
legalsupport_helm = LightevalTaskConfig(
    name="legalsupport",
    suite=["helm"],
    prompt_function=prompt.legal_support,
    hf_repo="lighteval/LegalSupport",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[
        Metrics.loglikelihood_acc,
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_case_hold_helm = LightevalTaskConfig(
    name="lexglue:case_hold",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_case_hold,
    hf_repo="lighteval/lexglue",
    hf_subset="case_hold",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_ecthr_a_helm = LightevalTaskConfig(
    name="lexglue:ecthr_a",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_ecthr_a,
    hf_repo="lighteval/lexglue",
    hf_subset="ecthr_a",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_ecthr_b_helm = LightevalTaskConfig(
    name="lexglue:ecthr_b",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_ecthr_b,
    hf_repo="lighteval/lexglue",
    hf_subset="ecthr_b",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_eurlex_helm = LightevalTaskConfig(
    name="lexglue:eurlex",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_eurlex,
    hf_repo="lighteval/lexglue",
    hf_subset="eurlex",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_ledgar_helm = LightevalTaskConfig(
    name="lexglue:ledgar",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_ledgar,
    hf_repo="lighteval/lexglue",
    hf_subset="ledgar",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_scotus_helm = LightevalTaskConfig(
    name="lexglue:scotus",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_scotus,
    hf_repo="lighteval/lexglue",
    hf_subset="scotus",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lexglue_unfair_tos_helm = LightevalTaskConfig(
    name="lexglue:unfair_tos",
    suite=["helm", "lex_glue_scenario"],
    prompt_function=prompt.lex_glue_unfair_tos,
    hf_repo="lighteval/lexglue",
    hf_subset="unfair_tos",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_brazilian_court_decisions_judgment_helm = LightevalTaskConfig(
    name="lextreme:brazilian_court_decisions_judgment",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_brazilian_court_decisions_judgment,
    hf_repo="lighteval/lextreme",
    hf_subset="brazilian_court_decisions_judgment",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_brazilian_court_decisions_unanimity_helm = LightevalTaskConfig(
    name="lextreme:brazilian_court_decisions_unanimity",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_brazilian_court_decisions_unanimity,
    hf_repo="lighteval/lextreme",
    hf_subset="brazilian_court_decisions_unanimity",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_covid19_emergency_event_helm = LightevalTaskConfig(
    name="lextreme:covid19_emergency_event",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_covid19_emergency_event,
    hf_repo="lighteval/lextreme",
    hf_subset="covid19_emergency_event",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_german_argument_mining_helm = LightevalTaskConfig(
    name="lextreme:german_argument_mining",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_german_argument_mining,
    hf_repo="lighteval/lextreme",
    hf_subset="german_argument_mining",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_greek_legal_code_chapter_helm = LightevalTaskConfig(
    name="lextreme:greek_legal_code_chapter",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_greek_legal_code_chapter,
    hf_repo="lighteval/lextreme",
    hf_subset="greek_legal_code_chapter",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_greek_legal_code_subject_helm = LightevalTaskConfig(
    name="lextreme:greek_legal_code_subject",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_greek_legal_code_subject,
    hf_repo="lighteval/lextreme",
    hf_subset="greek_legal_code_subject",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_greek_legal_code_volume_helm = LightevalTaskConfig(
    name="lextreme:greek_legal_code_volume",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_greek_legal_code_volume,
    hf_repo="lighteval/lextreme",
    hf_subset="greek_legal_code_volume",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_greek_legal_ner_helm = LightevalTaskConfig(
    name="lextreme:greek_legal_ner",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_greek_legal_ner,
    hf_repo="lighteval/lextreme",
    hf_subset="greek_legal_ner",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=430,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_legalnero_helm = LightevalTaskConfig(
    name="lextreme:legalnero",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_legalnero,
    hf_repo="lighteval/lextreme",
    hf_subset="legalnero",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=788,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_lener_br_helm = LightevalTaskConfig(
    name="lextreme:lener_br",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_lener_br,
    hf_repo="lighteval/lextreme",
    hf_subset="lener_br",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=338,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_mapa_coarse_helm = LightevalTaskConfig(
    name="lextreme:mapa_coarse",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_mapa_coarse,
    hf_repo="lighteval/lextreme",
    hf_subset="mapa_coarse",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=274,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_mapa_fine_helm = LightevalTaskConfig(
    name="lextreme:mapa_fine",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_mapa_fine,
    hf_repo="lighteval/lextreme",
    hf_subset="mapa_fine",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=274,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_multi_eurlex_level_1_helm = LightevalTaskConfig(
    name="lextreme:multi_eurlex_level_1",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_multi_eurlex_level_1,
    hf_repo="lighteval/lextreme",
    hf_subset="multi_eurlex_level_1",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_multi_eurlex_level_2_helm = LightevalTaskConfig(
    name="lextreme:multi_eurlex_level_2",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_multi_eurlex_level_2,
    hf_repo="lighteval/lextreme",
    hf_subset="multi_eurlex_level_2",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_multi_eurlex_level_3_helm = LightevalTaskConfig(
    name="lextreme:multi_eurlex_level_3",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_multi_eurlex_level_3,
    hf_repo="lighteval/lextreme",
    hf_subset="multi_eurlex_level_3",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_online_terms_of_service_clause_topics_helm = LightevalTaskConfig(
    name="lextreme:online_terms_of_service_clause_topics",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_online_terms_of_service_clause_topics,
    hf_repo="lighteval/lextreme",
    hf_subset="online_terms_of_service_clause_topics",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_online_terms_of_service_unfairness_levels_helm = LightevalTaskConfig(
    name="lextreme:online_terms_of_service_unfairness_levels",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_online_terms_of_service_unfairness_levels,
    hf_repo="lighteval/lextreme",
    hf_subset="online_terms_of_service_unfairness_levels",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=10,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lextreme_swiss_judgment_prediction_helm = LightevalTaskConfig(
    name="lextreme:swiss_judgment_prediction",
    suite=["helm", "lextreme_scenario"],
    prompt_function=prompt.lextreme_swiss_judgment_prediction,
    hf_repo="lighteval/lextreme",
    hf_subset="swiss_judgment_prediction",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.f1_score,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
linguistic_mappings_bigbench = LightevalTaskConfig(
    name="linguistic_mappings",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="linguistic_mappings",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
linguistics_puzzles_bigbench_lite = LightevalTaskConfig(
    name="linguistics_puzzles",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="linguistics_puzzles",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
logic_grid_puzzle_bigbench_lite = LightevalTaskConfig(
    name="logic_grid_puzzle",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="logic_grid_puzzle",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
logical_args_bigbench = LightevalTaskConfig(
    name="logical_args",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="logical_args",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
logical_deduction_bigbench_lite = LightevalTaskConfig(
    name="logical_deduction",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="logical_deduction",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
logical_fallacy_detection_bigbench = LightevalTaskConfig(
    name="logical_fallacy_detection",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="logical_fallacy_detection",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
logical_sequence_bigbench = LightevalTaskConfig(
    name="logical_sequence",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="logical_sequence",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
logiqa_lighteval = LightevalTaskConfig(
    name="logiqa",
    suite=["lighteval"],
    prompt_function=prompt.logiqa,
    hf_repo="lighteval/logiqa_harness",
    hf_subset="logiqa",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lsat_qa_helm = LightevalTaskConfig(
    name="lsat_qa",
    suite=["helm", "lsat_qa_scenario"],
    prompt_function=prompt.lsat_qa,
    hf_repo="lighteval/lsat_qa",
    hf_subset="all",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lsat_qa_assignment_helm = LightevalTaskConfig(
    name="lsat_qa:assignment",
    suite=["helm", "lsat_qa_scenario"],
    prompt_function=prompt.lsat_qa,
    hf_repo="lighteval/lsat_qa",
    hf_subset="assignment",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lsat_qa_grouping_helm = LightevalTaskConfig(
    name="lsat_qa:grouping",
    suite=["helm", "lsat_qa_scenario"],
    prompt_function=prompt.lsat_qa,
    hf_repo="lighteval/lsat_qa",
    hf_subset="grouping",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lsat_qa_miscellaneous_helm = LightevalTaskConfig(
    name="lsat_qa:miscellaneous",
    suite=["helm", "lsat_qa_scenario"],
    prompt_function=prompt.lsat_qa,
    hf_repo="lighteval/lsat_qa",
    hf_subset="miscellaneous",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
lsat_qa_ordering_helm = LightevalTaskConfig(
    name="lsat_qa:ordering",
    suite=["helm", "lsat_qa_scenario"],
    prompt_function=prompt.lsat_qa,
    hf_repo="lighteval/lsat_qa",
    hf_subset="ordering",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_500 = LightevalTaskConfig(
    name="math_500",
    suite=["lighteval"],
    prompt_function=prompt.math_500,
    hf_repo="HuggingFaceH4/MATH-500",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=32768,
    metric=[
        Metrics.latex_gold_metrics,
        # Metrics.math_pass_at_1_1n,
        # Metrics.math_pass_at_1_4n,
    ],
    version=2,
)
math_500_gpassk = LightevalTaskConfig(
    name="math_500_gpassk",
    suite=["lighteval"],
    prompt_function=prompt.math_500,
    hf_repo="HuggingFaceH4/MATH-500",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8192,
    metric=[Metrics.g_pass_at_16_latex_gold],
    version=1,
)
math_algebra_lighteval = LightevalTaskConfig(
    name="math:algebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="algebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_counting_and_probability_lighteval = LightevalTaskConfig(
    name="math:counting_and_probability",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="counting_and_probability",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_geometry_lighteval = LightevalTaskConfig(
    name="math:geometry",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="geometry",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_intermediate_algebra_lighteval = LightevalTaskConfig(
    name="math:intermediate_algebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="intermediate_algebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_number_theory_lighteval = LightevalTaskConfig(
    name="math:number_theory",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="number_theory",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_prealgebra_lighteval = LightevalTaskConfig(
    name="math:prealgebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="prealgebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_precalculus_lighteval = LightevalTaskConfig(
    name="math:precalculus",
    suite=["lighteval", "math"],
    prompt_function=prompt.math,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="precalculus",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=1,
)
math_cot_algebra_lighteval = LightevalTaskConfig(
    name="math_cot:algebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="algebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_counting_and_probability_lighteval = LightevalTaskConfig(
    name="math_cot:counting_and_probability",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="counting_and_probability",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_geometry_lighteval = LightevalTaskConfig(
    name="math_cot:geometry",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="geometry",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_intermediate_algebra_lighteval = LightevalTaskConfig(
    name="math_cot:intermediate_algebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="intermediate_algebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_number_theory_lighteval = LightevalTaskConfig(
    name="math_cot:number_theory",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="number_theory",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_prealgebra_lighteval = LightevalTaskConfig(
    name="math_cot:prealgebra",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="prealgebra",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
math_cot_precalculus_lighteval = LightevalTaskConfig(
    name="math_cot:precalculus",
    suite=["lighteval", "math"],
    prompt_function=prompt.math_cot,
    hf_repo="DigitalLearningGmbH/MATH-lighteval",
    hf_subset="precalculus",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mathematical_induction_bigbench = LightevalTaskConfig(
    name="mathematical_induction",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="mathematical_induction",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mathqa_lighteval = LightevalTaskConfig(
    name="mathqa",
    suite=["lighteval"],
    prompt_function=prompt.mathqa,
    hf_repo="math_qa",
    hf_subset="default",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
matrixshapes_bigbench = LightevalTaskConfig(
    name="matrixshapes",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="matrixshapes",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
me_q_sum_helm = LightevalTaskConfig(
    name="me_q_sum",
    suite=["helm"],
    prompt_function=prompt.me_q_sum,
    hf_repo="lighteval/me_q_sum",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=128,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
med_dialog_healthcaremagic_helm = LightevalTaskConfig(
    name="med_dialog:healthcaremagic",
    suite=["helm"],
    prompt_function=prompt.med_dialog,
    hf_repo="lighteval/med_dialog",
    hf_subset="healthcaremagic",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=128,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
med_dialog_icliniq_helm = LightevalTaskConfig(
    name="med_dialog:icliniq",
    suite=["helm"],
    prompt_function=prompt.med_dialog,
    hf_repo="lighteval/med_dialog",
    hf_subset="icliniq",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=128,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
med_mcqa_helm = LightevalTaskConfig(
    name="med_mcqa",
    suite=["helm"],
    prompt_function=prompt.med_mcqa,
    hf_repo="lighteval/med_mcqa",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.loglikelihood_acc,
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
med_paragraph_simplification_helm = LightevalTaskConfig(
    name="med_paragraph_simplification",
    suite=["helm"],
    prompt_function=prompt.med_paragraph_simplification,
    hf_repo="lighteval/med_paragraph_simplification",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=512,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
med_qa_helm = LightevalTaskConfig(
    name="med_qa",
    suite=["helm"],
    prompt_function=prompt.med_qa,
    hf_repo="bigbio/med_qa",
    hf_subset="med_qa_en_source",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.loglikelihood_acc,
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
metaphor_boolean_bigbench = LightevalTaskConfig(
    name="metaphor_boolean",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="metaphor_boolean",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
metaphor_understanding_bigbench = LightevalTaskConfig(
    name="metaphor_understanding",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="metaphor_understanding",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mgsm_en_lighteval = LightevalTaskConfig(
    name="mgsm:en",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_en,
    hf_repo="juletxara/mgsm",
    hf_subset="en",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "Question="],
    trust_dataset=True,
    version=0,
)
mgsm_es_lighteval = LightevalTaskConfig(
    name="mgsm:es",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_es,
    hf_repo="juletxara/mgsm",
    hf_subset="es",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "Pregunta="],
    trust_dataset=True,
    version=0,
)
mgsm_fr_lighteval = LightevalTaskConfig(
    name="mgsm:fr",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_fr,
    hf_repo="juletxara/mgsm",
    hf_subset="fr",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "Question="],
    trust_dataset=True,
    version=0,
)
mgsm_de_lighteval = LightevalTaskConfig(
    name="mgsm:de",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_de,
    hf_repo="juletxara/mgsm",
    hf_subset="de",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "Frage="],
    trust_dataset=True,
    version=0,
)
mgsm_ru_lighteval = LightevalTaskConfig(
    name="mgsm:ru",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_ru,
    hf_repo="juletxara/mgsm",
    hf_subset="ru",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u0417\u0430\u0434\u0430\u0447\u0430="],
    trust_dataset=True,
    version=0,
)
mgsm_zh_lighteval = LightevalTaskConfig(
    name="mgsm:zh",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_zh,
    hf_repo="juletxara/mgsm",
    hf_subset="zh",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u95ee\u9898="],
    trust_dataset=True,
    version=0,
)
mgsm_ja_lighteval = LightevalTaskConfig(
    name="mgsm:ja",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_ja,
    hf_repo="juletxara/mgsm",
    hf_subset="ja",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u554f\u984c="],
    trust_dataset=True,
    version=0,
)
mgsm_th_lighteval = LightevalTaskConfig(
    name="mgsm:th",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_th,
    hf_repo="juletxara/mgsm",
    hf_subset="th",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u0e42\u0e08\u0e17\u0e22\u0e4c="],
    trust_dataset=True,
    version=0,
)
mgsm_sw_lighteval = LightevalTaskConfig(
    name="mgsm:sw",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_sw,
    hf_repo="juletxara/mgsm",
    hf_subset="sw",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "Swali="],
    trust_dataset=True,
    version=0,
)
mgsm_bn_lighteval = LightevalTaskConfig(
    name="mgsm:bn",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_bn,
    hf_repo="juletxara/mgsm",
    hf_subset="bn",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8="],
    trust_dataset=True,
    version=0,
)
mgsm_te_lighteval = LightevalTaskConfig(
    name="mgsm:te",
    suite=["lighteval"],
    prompt_function=prompt.mgsm_te,
    hf_repo="juletxara/mgsm",
    hf_subset="te",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n", "=", "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28="],
    trust_dataset=True,
    version=0,
)
minute_mysteries_qa_bigbench = LightevalTaskConfig(
    name="minute_mysteries_qa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="minute_mysteries_qa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.rouge_t5],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
misconceptions_bigbench = LightevalTaskConfig(
    name="misconceptions",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="misconceptions",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
misconceptions_russian_bigbench_lite = LightevalTaskConfig(
    name="misconceptions_russian",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="misconceptions_russian",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_abstract_algebra_original = LightevalTaskConfig(
    name="mmlu:abstract_algebra",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_abstract_algebra,
    hf_repo="cais/mmlu",
    hf_subset="abstract_algebra",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_abstract_algebra_leaderboard = LightevalTaskConfig(
    name="mmlu:abstract_algebra",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="abstract_algebra",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_abstract_algebra_helm = LightevalTaskConfig(
    name="mmlu:abstract_algebra",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="abstract_algebra",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_anatomy_original = LightevalTaskConfig(
    name="mmlu:anatomy",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_anatomy,
    hf_repo="cais/mmlu",
    hf_subset="anatomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_anatomy_leaderboard = LightevalTaskConfig(
    name="mmlu:anatomy",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="anatomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_anatomy_helm = LightevalTaskConfig(
    name="mmlu:anatomy",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="anatomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_astronomy_original = LightevalTaskConfig(
    name="mmlu:astronomy",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_astronomy,
    hf_repo="cais/mmlu",
    hf_subset="astronomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_astronomy_leaderboard = LightevalTaskConfig(
    name="mmlu:astronomy",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="astronomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_astronomy_helm = LightevalTaskConfig(
    name="mmlu:astronomy",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="astronomy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_business_ethics_original = LightevalTaskConfig(
    name="mmlu:business_ethics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_business_ethics,
    hf_repo="cais/mmlu",
    hf_subset="business_ethics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_business_ethics_leaderboard = LightevalTaskConfig(
    name="mmlu:business_ethics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="business_ethics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_business_ethics_helm = LightevalTaskConfig(
    name="mmlu:business_ethics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="business_ethics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_clinical_knowledge_original = LightevalTaskConfig(
    name="mmlu:clinical_knowledge",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_clinical_knowledge,
    hf_repo="cais/mmlu",
    hf_subset="clinical_knowledge",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_clinical_knowledge_leaderboard = LightevalTaskConfig(
    name="mmlu:clinical_knowledge",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="clinical_knowledge",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_clinical_knowledge_helm = LightevalTaskConfig(
    name="mmlu:clinical_knowledge",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="clinical_knowledge",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_biology_original = LightevalTaskConfig(
    name="mmlu:college_biology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_biology,
    hf_repo="cais/mmlu",
    hf_subset="college_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_biology_leaderboard = LightevalTaskConfig(
    name="mmlu:college_biology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_biology_helm = LightevalTaskConfig(
    name="mmlu:college_biology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_chemistry_original = LightevalTaskConfig(
    name="mmlu:college_chemistry",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_chemistry,
    hf_repo="cais/mmlu",
    hf_subset="college_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_chemistry_leaderboard = LightevalTaskConfig(
    name="mmlu:college_chemistry",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_chemistry_helm = LightevalTaskConfig(
    name="mmlu:college_chemistry",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_computer_science_original = LightevalTaskConfig(
    name="mmlu:college_computer_science",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_computer_science,
    hf_repo="cais/mmlu",
    hf_subset="college_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_computer_science_leaderboard = LightevalTaskConfig(
    name="mmlu:college_computer_science",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_computer_science_helm = LightevalTaskConfig(
    name="mmlu:college_computer_science",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_mathematics_original = LightevalTaskConfig(
    name="mmlu:college_mathematics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_mathematics,
    hf_repo="cais/mmlu",
    hf_subset="college_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_mathematics_leaderboard = LightevalTaskConfig(
    name="mmlu:college_mathematics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_mathematics_helm = LightevalTaskConfig(
    name="mmlu:college_mathematics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_medicine_original = LightevalTaskConfig(
    name="mmlu:college_medicine",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_medicine,
    hf_repo="cais/mmlu",
    hf_subset="college_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_medicine_leaderboard = LightevalTaskConfig(
    name="mmlu:college_medicine",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_medicine_helm = LightevalTaskConfig(
    name="mmlu:college_medicine",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_physics_original = LightevalTaskConfig(
    name="mmlu:college_physics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_college_physics,
    hf_repo="cais/mmlu",
    hf_subset="college_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_physics_leaderboard = LightevalTaskConfig(
    name="mmlu:college_physics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="college_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_college_physics_helm = LightevalTaskConfig(
    name="mmlu:college_physics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="college_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_computer_security_original = LightevalTaskConfig(
    name="mmlu:computer_security",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_computer_security,
    hf_repo="cais/mmlu",
    hf_subset="computer_security",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_computer_security_leaderboard = LightevalTaskConfig(
    name="mmlu:computer_security",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="computer_security",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_computer_security_helm = LightevalTaskConfig(
    name="mmlu:computer_security",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="computer_security",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_conceptual_physics_original = LightevalTaskConfig(
    name="mmlu:conceptual_physics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_conceptual_physics,
    hf_repo="cais/mmlu",
    hf_subset="conceptual_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_conceptual_physics_leaderboard = LightevalTaskConfig(
    name="mmlu:conceptual_physics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="conceptual_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_conceptual_physics_helm = LightevalTaskConfig(
    name="mmlu:conceptual_physics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="conceptual_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_econometrics_original = LightevalTaskConfig(
    name="mmlu:econometrics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_econometrics,
    hf_repo="cais/mmlu",
    hf_subset="econometrics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_econometrics_leaderboard = LightevalTaskConfig(
    name="mmlu:econometrics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="econometrics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_econometrics_helm = LightevalTaskConfig(
    name="mmlu:econometrics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="econometrics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_electrical_engineering_original = LightevalTaskConfig(
    name="mmlu:electrical_engineering",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_electrical_engineering,
    hf_repo="cais/mmlu",
    hf_subset="electrical_engineering",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_electrical_engineering_leaderboard = LightevalTaskConfig(
    name="mmlu:electrical_engineering",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="electrical_engineering",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_electrical_engineering_helm = LightevalTaskConfig(
    name="mmlu:electrical_engineering",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="electrical_engineering",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_elementary_mathematics_original = LightevalTaskConfig(
    name="mmlu:elementary_mathematics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_elementary_mathematics,
    hf_repo="cais/mmlu",
    hf_subset="elementary_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_elementary_mathematics_leaderboard = LightevalTaskConfig(
    name="mmlu:elementary_mathematics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="elementary_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_elementary_mathematics_helm = LightevalTaskConfig(
    name="mmlu:elementary_mathematics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="elementary_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_formal_logic_original = LightevalTaskConfig(
    name="mmlu:formal_logic",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_formal_logic,
    hf_repo="cais/mmlu",
    hf_subset="formal_logic",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_formal_logic_leaderboard = LightevalTaskConfig(
    name="mmlu:formal_logic",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="formal_logic",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_formal_logic_helm = LightevalTaskConfig(
    name="mmlu:formal_logic",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="formal_logic",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_global_facts_original = LightevalTaskConfig(
    name="mmlu:global_facts",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_global_facts,
    hf_repo="cais/mmlu",
    hf_subset="global_facts",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_global_facts_leaderboard = LightevalTaskConfig(
    name="mmlu:global_facts",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="global_facts",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_global_facts_helm = LightevalTaskConfig(
    name="mmlu:global_facts",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="global_facts",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_biology_original = LightevalTaskConfig(
    name="mmlu:high_school_biology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_biology,
    hf_repo="cais/mmlu",
    hf_subset="high_school_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_biology_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_biology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_biology_helm = LightevalTaskConfig(
    name="mmlu:high_school_biology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_biology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_chemistry_original = LightevalTaskConfig(
    name="mmlu:high_school_chemistry",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_chemistry,
    hf_repo="cais/mmlu",
    hf_subset="high_school_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_chemistry_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_chemistry",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_chemistry_helm = LightevalTaskConfig(
    name="mmlu:high_school_chemistry",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_chemistry",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_computer_science_original = LightevalTaskConfig(
    name="mmlu:high_school_computer_science",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_computer_science,
    hf_repo="cais/mmlu",
    hf_subset="high_school_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_computer_science_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_computer_science",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_computer_science_helm = LightevalTaskConfig(
    name="mmlu:high_school_computer_science",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_computer_science",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_european_history_original = LightevalTaskConfig(
    name="mmlu:high_school_european_history",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_european_history,
    hf_repo="cais/mmlu",
    hf_subset="high_school_european_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_european_history_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_european_history",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_european_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_european_history_helm = LightevalTaskConfig(
    name="mmlu:high_school_european_history",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_european_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_geography_original = LightevalTaskConfig(
    name="mmlu:high_school_geography",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_geography,
    hf_repo="cais/mmlu",
    hf_subset="high_school_geography",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_geography_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_geography",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_geography",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_geography_helm = LightevalTaskConfig(
    name="mmlu:high_school_geography",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_geography",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_government_and_politics_original = LightevalTaskConfig(
    name="mmlu:high_school_government_and_politics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_government_and_politics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_government_and_politics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_government_and_politics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_government_and_politics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_government_and_politics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_government_and_politics_helm = LightevalTaskConfig(
    name="mmlu:high_school_government_and_politics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_government_and_politics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_macroeconomics_original = LightevalTaskConfig(
    name="mmlu:high_school_macroeconomics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_macroeconomics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_macroeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_macroeconomics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_macroeconomics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_macroeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_macroeconomics_helm = LightevalTaskConfig(
    name="mmlu:high_school_macroeconomics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_macroeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_mathematics_original = LightevalTaskConfig(
    name="mmlu:high_school_mathematics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_mathematics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_mathematics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_mathematics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_mathematics_helm = LightevalTaskConfig(
    name="mmlu:high_school_mathematics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_mathematics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_microeconomics_original = LightevalTaskConfig(
    name="mmlu:high_school_microeconomics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_microeconomics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_microeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_microeconomics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_microeconomics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_microeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_microeconomics_helm = LightevalTaskConfig(
    name="mmlu:high_school_microeconomics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_microeconomics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_physics_original = LightevalTaskConfig(
    name="mmlu:high_school_physics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_physics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_physics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_physics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_physics_helm = LightevalTaskConfig(
    name="mmlu:high_school_physics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_physics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_psychology_original = LightevalTaskConfig(
    name="mmlu:high_school_psychology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_psychology,
    hf_repo="cais/mmlu",
    hf_subset="high_school_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_psychology_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_psychology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_psychology_helm = LightevalTaskConfig(
    name="mmlu:high_school_psychology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_statistics_original = LightevalTaskConfig(
    name="mmlu:high_school_statistics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_statistics,
    hf_repo="cais/mmlu",
    hf_subset="high_school_statistics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_statistics_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_statistics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_statistics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_statistics_helm = LightevalTaskConfig(
    name="mmlu:high_school_statistics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_statistics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_us_history_original = LightevalTaskConfig(
    name="mmlu:high_school_us_history",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_us_history,
    hf_repo="cais/mmlu",
    hf_subset="high_school_us_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_us_history_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_us_history",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_us_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_us_history_helm = LightevalTaskConfig(
    name="mmlu:high_school_us_history",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_us_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_world_history_original = LightevalTaskConfig(
    name="mmlu:high_school_world_history",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_high_school_world_history,
    hf_repo="cais/mmlu",
    hf_subset="high_school_world_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_world_history_leaderboard = LightevalTaskConfig(
    name="mmlu:high_school_world_history",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_world_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_high_school_world_history_helm = LightevalTaskConfig(
    name="mmlu:high_school_world_history",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="high_school_world_history",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_aging_original = LightevalTaskConfig(
    name="mmlu:human_aging",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_human_aging,
    hf_repo="cais/mmlu",
    hf_subset="human_aging",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_aging_leaderboard = LightevalTaskConfig(
    name="mmlu:human_aging",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="human_aging",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_aging_helm = LightevalTaskConfig(
    name="mmlu:human_aging",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="human_aging",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_sexuality_original = LightevalTaskConfig(
    name="mmlu:human_sexuality",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_human_sexuality,
    hf_repo="cais/mmlu",
    hf_subset="human_sexuality",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_sexuality_leaderboard = LightevalTaskConfig(
    name="mmlu:human_sexuality",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="human_sexuality",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_human_sexuality_helm = LightevalTaskConfig(
    name="mmlu:human_sexuality",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="human_sexuality",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_international_law_original = LightevalTaskConfig(
    name="mmlu:international_law",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_international_law,
    hf_repo="cais/mmlu",
    hf_subset="international_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_international_law_leaderboard = LightevalTaskConfig(
    name="mmlu:international_law",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="international_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_international_law_helm = LightevalTaskConfig(
    name="mmlu:international_law",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="international_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_jurisprudence_original = LightevalTaskConfig(
    name="mmlu:jurisprudence",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_jurisprudence,
    hf_repo="cais/mmlu",
    hf_subset="jurisprudence",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_jurisprudence_leaderboard = LightevalTaskConfig(
    name="mmlu:jurisprudence",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="jurisprudence",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_jurisprudence_helm = LightevalTaskConfig(
    name="mmlu:jurisprudence",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="jurisprudence",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_logical_fallacies_original = LightevalTaskConfig(
    name="mmlu:logical_fallacies",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_logical_fallacies,
    hf_repo="cais/mmlu",
    hf_subset="logical_fallacies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_logical_fallacies_leaderboard = LightevalTaskConfig(
    name="mmlu:logical_fallacies",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="logical_fallacies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_logical_fallacies_helm = LightevalTaskConfig(
    name="mmlu:logical_fallacies",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="logical_fallacies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_machine_learning_original = LightevalTaskConfig(
    name="mmlu:machine_learning",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_machine_learning,
    hf_repo="cais/mmlu",
    hf_subset="machine_learning",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_machine_learning_leaderboard = LightevalTaskConfig(
    name="mmlu:machine_learning",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="machine_learning",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_machine_learning_helm = LightevalTaskConfig(
    name="mmlu:machine_learning",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="machine_learning",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_management_original = LightevalTaskConfig(
    name="mmlu:management",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_management,
    hf_repo="cais/mmlu",
    hf_subset="management",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_management_leaderboard = LightevalTaskConfig(
    name="mmlu:management",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="management",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_management_helm = LightevalTaskConfig(
    name="mmlu:management",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="management",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_marketing_original = LightevalTaskConfig(
    name="mmlu:marketing",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_marketing,
    hf_repo="cais/mmlu",
    hf_subset="marketing",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_marketing_leaderboard = LightevalTaskConfig(
    name="mmlu:marketing",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="marketing",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_marketing_helm = LightevalTaskConfig(
    name="mmlu:marketing",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="marketing",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_medical_genetics_original = LightevalTaskConfig(
    name="mmlu:medical_genetics",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_medical_genetics,
    hf_repo="cais/mmlu",
    hf_subset="medical_genetics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_medical_genetics_leaderboard = LightevalTaskConfig(
    name="mmlu:medical_genetics",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="medical_genetics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_medical_genetics_helm = LightevalTaskConfig(
    name="mmlu:medical_genetics",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="medical_genetics",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_miscellaneous_original = LightevalTaskConfig(
    name="mmlu:miscellaneous",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_miscellaneous,
    hf_repo="cais/mmlu",
    hf_subset="miscellaneous",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_miscellaneous_leaderboard = LightevalTaskConfig(
    name="mmlu:miscellaneous",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="miscellaneous",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_miscellaneous_helm = LightevalTaskConfig(
    name="mmlu:miscellaneous",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="miscellaneous",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_disputes_original = LightevalTaskConfig(
    name="mmlu:moral_disputes",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_moral_disputes,
    hf_repo="cais/mmlu",
    hf_subset="moral_disputes",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_disputes_leaderboard = LightevalTaskConfig(
    name="mmlu:moral_disputes",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="moral_disputes",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_disputes_helm = LightevalTaskConfig(
    name="mmlu:moral_disputes",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="moral_disputes",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_scenarios_original = LightevalTaskConfig(
    name="mmlu:moral_scenarios",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_moral_scenarios,
    hf_repo="cais/mmlu",
    hf_subset="moral_scenarios",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_scenarios_leaderboard = LightevalTaskConfig(
    name="mmlu:moral_scenarios",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="moral_scenarios",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_moral_scenarios_helm = LightevalTaskConfig(
    name="mmlu:moral_scenarios",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="moral_scenarios",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_nutrition_original = LightevalTaskConfig(
    name="mmlu:nutrition",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_nutrition,
    hf_repo="cais/mmlu",
    hf_subset="nutrition",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_nutrition_leaderboard = LightevalTaskConfig(
    name="mmlu:nutrition",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="nutrition",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_nutrition_helm = LightevalTaskConfig(
    name="mmlu:nutrition",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="nutrition",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_philosophy_original = LightevalTaskConfig(
    name="mmlu:philosophy",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_philosophy,
    hf_repo="cais/mmlu",
    hf_subset="philosophy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_philosophy_leaderboard = LightevalTaskConfig(
    name="mmlu:philosophy",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="philosophy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_philosophy_helm = LightevalTaskConfig(
    name="mmlu:philosophy",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="philosophy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_prehistory_original = LightevalTaskConfig(
    name="mmlu:prehistory",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_prehistory,
    hf_repo="cais/mmlu",
    hf_subset="prehistory",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_prehistory_leaderboard = LightevalTaskConfig(
    name="mmlu:prehistory",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="prehistory",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_prehistory_helm = LightevalTaskConfig(
    name="mmlu:prehistory",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="prehistory",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_accounting_original = LightevalTaskConfig(
    name="mmlu:professional_accounting",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_professional_accounting,
    hf_repo="cais/mmlu",
    hf_subset="professional_accounting",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_accounting_leaderboard = LightevalTaskConfig(
    name="mmlu:professional_accounting",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_accounting",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_accounting_helm = LightevalTaskConfig(
    name="mmlu:professional_accounting",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_accounting",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_law_original = LightevalTaskConfig(
    name="mmlu:professional_law",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_professional_law,
    hf_repo="cais/mmlu",
    hf_subset="professional_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_law_leaderboard = LightevalTaskConfig(
    name="mmlu:professional_law",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_law_helm = LightevalTaskConfig(
    name="mmlu:professional_law",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_law",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_medicine_original = LightevalTaskConfig(
    name="mmlu:professional_medicine",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_professional_medicine,
    hf_repo="cais/mmlu",
    hf_subset="professional_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_medicine_leaderboard = LightevalTaskConfig(
    name="mmlu:professional_medicine",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_medicine_helm = LightevalTaskConfig(
    name="mmlu:professional_medicine",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_medicine",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_psychology_original = LightevalTaskConfig(
    name="mmlu:professional_psychology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_professional_psychology,
    hf_repo="cais/mmlu",
    hf_subset="professional_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_psychology_leaderboard = LightevalTaskConfig(
    name="mmlu:professional_psychology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_professional_psychology_helm = LightevalTaskConfig(
    name="mmlu:professional_psychology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="professional_psychology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_public_relations_original = LightevalTaskConfig(
    name="mmlu:public_relations",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_public_relations,
    hf_repo="cais/mmlu",
    hf_subset="public_relations",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_public_relations_leaderboard = LightevalTaskConfig(
    name="mmlu:public_relations",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="public_relations",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_public_relations_helm = LightevalTaskConfig(
    name="mmlu:public_relations",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="public_relations",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_security_studies_original = LightevalTaskConfig(
    name="mmlu:security_studies",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_security_studies,
    hf_repo="cais/mmlu",
    hf_subset="security_studies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_security_studies_leaderboard = LightevalTaskConfig(
    name="mmlu:security_studies",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="security_studies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_security_studies_helm = LightevalTaskConfig(
    name="mmlu:security_studies",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="security_studies",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_sociology_original = LightevalTaskConfig(
    name="mmlu:sociology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_sociology,
    hf_repo="cais/mmlu",
    hf_subset="sociology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_sociology_leaderboard = LightevalTaskConfig(
    name="mmlu:sociology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="sociology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_sociology_helm = LightevalTaskConfig(
    name="mmlu:sociology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="sociology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_us_foreign_policy_original = LightevalTaskConfig(
    name="mmlu:us_foreign_policy",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_us_foreign_policy,
    hf_repo="cais/mmlu",
    hf_subset="us_foreign_policy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_us_foreign_policy_leaderboard = LightevalTaskConfig(
    name="mmlu:us_foreign_policy",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="us_foreign_policy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_us_foreign_policy_helm = LightevalTaskConfig(
    name="mmlu:us_foreign_policy",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="us_foreign_policy",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_virology_original = LightevalTaskConfig(
    name="mmlu:virology",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_virology,
    hf_repo="cais/mmlu",
    hf_subset="virology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_virology_leaderboard = LightevalTaskConfig(
    name="mmlu:virology",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="virology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_virology_helm = LightevalTaskConfig(
    name="mmlu:virology",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="virology",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_world_religions_original = LightevalTaskConfig(
    name="mmlu:world_religions",
    suite=["original", "mmlu"],
    prompt_function=prompt.mmlu_world_religions,
    hf_repo="cais/mmlu",
    hf_subset="world_religions",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_world_religions_leaderboard = LightevalTaskConfig(
    name="mmlu:world_religions",
    suite=["leaderboard", "mmlu"],
    prompt_function=prompt.mmlu_harness,
    hf_repo="lighteval/mmlu",
    hf_subset="world_religions",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select="sequential",
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mmlu_world_religions_helm = LightevalTaskConfig(
    name="mmlu:world_religions",
    suite=["helm", "helm_general"],
    prompt_function=prompt.mmlu_helm,
    hf_repo="lighteval/mmlu",
    hf_subset="world_religions",
    hf_avail_splits=["auxiliary_train", "test", "validation", "dev"],
    evaluation_splits=["test"],
    few_shots_split="dev",
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mnist_ascii_bigbench = LightevalTaskConfig(
    name="mnist_ascii",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="mnist_ascii",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
modified_arithmetic_bigbench = LightevalTaskConfig(
    name="modified_arithmetic",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="modified_arithmetic",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
moral_permissibility_bigbench = LightevalTaskConfig(
    name="moral_permissibility",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="moral_permissibility",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
movie_dialog_same_or_different_bigbench = LightevalTaskConfig(
    name="movie_dialog_same_or_different",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="movie_dialog_same_or_different",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
movie_recommendation_bigbench = LightevalTaskConfig(
    name="movie_recommendation",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="movie_recommendation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mtnt2019_en_fr_lighteval = LightevalTaskConfig(
    name="mtnt2019:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="mtnt2019_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=200,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mtnt2019_en_ja_lighteval = LightevalTaskConfig(
    name="mtnt2019:en-ja",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="mtnt2019_en-ja",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=200,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mtnt2019_fr_en_lighteval = LightevalTaskConfig(
    name="mtnt2019:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="mtnt2019_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=200,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mtnt2019_ja_en_lighteval = LightevalTaskConfig(
    name="mtnt2019:ja-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="mtnt2019_ja-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=200,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mult_data_wrangling_bigbench = LightevalTaskConfig(
    name="mult_data_wrangling",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="mult_data_wrangling",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
multiemo_bigbench = LightevalTaskConfig(
    name="multiemo",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="multiemo",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
musr_murder_mysteries = LightevalTaskConfig(
    name="musr:murder_mysteries",
    suite=["lighteval"],
    prompt_function=prompt.musr,
    hf_repo="TAUR-Lab/MuSR",
    hf_subset="default",
    hf_avail_splits=["murder_mysteries"],
    evaluation_splits=["murder_mysteries"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
musr_object_placements = LightevalTaskConfig(
    name="musr:object_placements",
    suite=["lighteval"],
    prompt_function=prompt.musr,
    hf_repo="TAUR-Lab/MuSR",
    hf_subset="default",
    hf_avail_splits=["object_placements"],
    evaluation_splits=["object_placements"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
musr_team_allocation = LightevalTaskConfig(
    name="musr:team_allocation",
    suite=["lighteval"],
    prompt_function=prompt.musr,
    hf_repo="TAUR-Lab/MuSR",
    hf_subset="default",
    hf_avail_splits=["team_allocation"],
    evaluation_splits=["team_allocation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mutual_lighteval = LightevalTaskConfig(
    name="mutual",
    suite=["lighteval"],
    prompt_function=prompt.mutual,
    hf_repo="lighteval/mutual_harness",
    hf_subset="mutual",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.recall_at_1, Metrics.recall_at_2, Metrics.mrr],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
mutual_plus_lighteval = LightevalTaskConfig(
    name="mutual_plus",
    suite=["lighteval"],
    prompt_function=prompt.mutual,
    hf_repo="lighteval/mutual_harness",
    hf_subset="mutual_plus",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.recall_at_1, Metrics.recall_at_2, Metrics.mrr],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
narrativeqa_helm = LightevalTaskConfig(
    name="narrativeqa",
    suite=["helm", "helm_general"],
    prompt_function=prompt.narrativeqa,
    hf_repo="lighteval/narrative_qa_helm",
    hf_subset="default",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
natural_instructions_bigbench = LightevalTaskConfig(
    name="natural_instructions",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="natural_instructions",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
natural_questions = LightevalTaskConfig(
    name="natural_questions",
    prompt_function=get_qa_prompt_function(
        Language.ENGLISH,
        lambda line: {"question": line["question"], "choices": [line["answer"]]},
    ),
    suite=("lighteval",),
    hf_repo="lighteval/small_natural_questions",
    hf_subset="default",
    evaluation_splits=("test",),
    few_shots_split="few_shot",
    generation_size=250,
    stop_sequence=["\n", "Question:", "question:"],
    metric=(
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_quasi,
    ),
)
navigate_bigbench = LightevalTaskConfig(
    name="navigate",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="navigate",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
nonsense_words_grammar_bigbench = LightevalTaskConfig(
    name="nonsense_words_grammar",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="nonsense_words_grammar",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
novel_concepts_bigbench_lite = LightevalTaskConfig(
    name="novel_concepts",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="novel_concepts",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_linear_example_helm = LightevalTaskConfig(
    name="numeracy:linear_example",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="linear_example",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_linear_standard_helm = LightevalTaskConfig(
    name="numeracy:linear_standard",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="linear_standard",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_parabola_example_helm = LightevalTaskConfig(
    name="numeracy:parabola_example",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="parabola_example",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_parabola_standard_helm = LightevalTaskConfig(
    name="numeracy:parabola_standard",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="parabola_standard",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_paraboloid_example_helm = LightevalTaskConfig(
    name="numeracy:paraboloid_example",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="paraboloid_example",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_paraboloid_standard_helm = LightevalTaskConfig(
    name="numeracy:paraboloid_standard",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="paraboloid_standard",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_plane_example_helm = LightevalTaskConfig(
    name="numeracy:plane_example",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="plane_example",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
numeracy_plane_standard_helm = LightevalTaskConfig(
    name="numeracy:plane_standard",
    suite=["helm"],
    prompt_function=prompt.numeracy,
    hf_repo="lighteval/numeracy",
    hf_subset="plane_standard",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
object_counting_bigbench = LightevalTaskConfig(
    name="object_counting",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="object_counting",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
odd_one_out_bigbench = LightevalTaskConfig(
    name="odd_one_out",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="odd_one_out",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
openbookqa_helm = LightevalTaskConfig(
    name="openbookqa",
    suite=["helm", "commonsense_scenario", "helm_general"],
    prompt_function=prompt.openbookqa_helm,
    hf_repo="openbookqa",
    hf_subset="main",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
openbookqa_lighteval = LightevalTaskConfig(
    name="openbookqa",
    suite=["lighteval"],
    prompt_function=prompt.openbookqa,
    hf_repo="openbookqa",
    hf_subset="main",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
operators_bigbench_lite = LightevalTaskConfig(
    name="operators",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="operators",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
paragraph_segmentation_bigbench = LightevalTaskConfig(
    name="paragraph_segmentation",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="paragraph_segmentation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
parsinlu_qa_bigbench = LightevalTaskConfig(
    name="parsinlu_qa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="parsinlu_qa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
parsinlu_reading_comprehension_bigbench_lite = LightevalTaskConfig(
    name="parsinlu_reading_comprehension",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="parsinlu_reading_comprehension",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=None,
    trust_dataset=True,
    version=0,
)
penguins_in_a_table_bigbench = LightevalTaskConfig(
    name="penguins_in_a_table",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="penguins_in_a_table",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
periodic_elements_bigbench = LightevalTaskConfig(
    name="periodic_elements",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="periodic_elements",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
persian_idioms_bigbench = LightevalTaskConfig(
    name="persian_idioms",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="persian_idioms",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
phrase_relatedness_bigbench = LightevalTaskConfig(
    name="phrase_relatedness",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="phrase_relatedness",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
physical_intuition_bigbench = LightevalTaskConfig(
    name="physical_intuition",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="physical_intuition",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
physics_bigbench = LightevalTaskConfig(
    name="physics",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="physics",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
physics_questions_bigbench = LightevalTaskConfig(
    name="physics_questions",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="physics_questions",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
piqa_lighteval = LightevalTaskConfig(
    name="piqa",
    suite=["lighteval"],
    prompt_function=prompt.piqa_harness,
    hf_repo="ybisk/piqa",
    hf_subset="plain_text",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
piqa_helm = LightevalTaskConfig(
    name="piqa",
    suite=["helm", "commonsense_scenario"],
    prompt_function=prompt.piqa_helm,
    hf_repo="ybisk/piqa",
    hf_subset="plain_text",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
play_dialog_same_or_different_bigbench_lite = LightevalTaskConfig(
    name="play_dialog_same_or_different",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="play_dialog_same_or_different",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
polish_sequence_labeling_bigbench = LightevalTaskConfig(
    name="polish_sequence_labeling",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="polish_sequence_labeling",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
presuppositions_as_nli_bigbench = LightevalTaskConfig(
    name="presuppositions_as_nli",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="presuppositions_as_nli",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
prost_lighteval = LightevalTaskConfig(
    name="prost",
    suite=["lighteval"],
    prompt_function=prompt.prost,
    hf_repo="corypaik/prost",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
pubmedqa_lighteval = LightevalTaskConfig(
    name="pubmedqa",
    suite=["lighteval"],
    prompt_function=prompt.pubmed_qa,
    hf_repo="pubmed_qa",
    hf_subset="pqa_labeled",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
pubmedqa_helm = LightevalTaskConfig(
    name="pubmedqa",
    suite=["helm"],
    prompt_function=prompt.pubmed_qa_helm,
    hf_repo="pubmed_qa",
    hf_subset="pqa_labeled",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qa4mre_2011_lighteval = LightevalTaskConfig(
    name="qa4mre:2011",
    suite=["lighteval"],
    prompt_function=prompt.qa4mre,
    hf_repo="qa4mre",
    hf_subset="2011.main.EN",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qa4mre_2012_lighteval = LightevalTaskConfig(
    name="qa4mre:2012",
    suite=["lighteval"],
    prompt_function=prompt.qa4mre,
    hf_repo="qa4mre",
    hf_subset="2012.main.EN",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qa4mre_2013_lighteval = LightevalTaskConfig(
    name="qa4mre:2013",
    suite=["lighteval"],
    prompt_function=prompt.qa4mre,
    hf_repo="qa4mre",
    hf_subset="2013.main.EN",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qa_wikidata_bigbench = LightevalTaskConfig(
    name="qa_wikidata",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="qa_wikidata",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleurt, Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qasper_lighteval = LightevalTaskConfig(
    name="qasper",
    suite=["lighteval"],
    prompt_function=prompt.qasper,
    hf_repo="qasper",
    hf_subset="qasper",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.f1_score_quasi],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
qasper_ll_lighteval = LightevalTaskConfig(
    name="qasper_ll",
    suite=["lighteval"],
    prompt_function=prompt.qasper_ll,
    hf_repo="qasper",
    hf_subset="qasper",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
quac_helm = LightevalTaskConfig(
    name="quac",
    suite=["helm"],
    prompt_function=prompt.quac,
    hf_repo="lighteval/quac_helm",
    hf_subset="default",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
question_selection_bigbench = LightevalTaskConfig(
    name="question_selection",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="question_selection",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
race_high_lighteval = LightevalTaskConfig(
    name="race:high",
    suite=["lighteval", "race"],
    prompt_function=prompt.race,
    hf_repo="EleutherAI/race",
    hf_subset="high",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_ade_corpus_v2_helm = LightevalTaskConfig(
    name="raft:ade_corpus_v2",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_ade_corpus_v2,
    hf_repo="ought/raft",
    hf_subset="ade_corpus_v2",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_banking_77_helm = LightevalTaskConfig(
    name="raft:banking_77",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_banking_77,
    hf_repo="ought/raft",
    hf_subset="banking_77",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_neurips_impact_statement_risks_helm = LightevalTaskConfig(
    name="raft:neurips_impact_statement_risks",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_neurips_impact_statement_risks,
    hf_repo="ought/raft",
    hf_subset="neurips_impact_statement_risks",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_one_stop_english_helm = LightevalTaskConfig(
    name="raft:one_stop_english",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_one_stop_english,
    hf_repo="ought/raft",
    hf_subset="one_stop_english",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_overruling_helm = LightevalTaskConfig(
    name="raft:overruling",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_overruling,
    hf_repo="ought/raft",
    hf_subset="overruling",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_semiconductor_org_types_helm = LightevalTaskConfig(
    name="raft:semiconductor_org_types",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_semiconductor_org_types,
    hf_repo="ought/raft",
    hf_subset="semiconductor_org_types",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_systematic_review_inclusion_helm = LightevalTaskConfig(
    name="raft:systematic_review_inclusion",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_systematic_review_inclusion,
    hf_repo="ought/raft",
    hf_subset="systematic_review_inclusion",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_tai_safety_research_helm = LightevalTaskConfig(
    name="raft:tai_safety_research",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_tai_safety_research,
    hf_repo="ought/raft",
    hf_subset="tai_safety_research",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_terms_of_service_helm = LightevalTaskConfig(
    name="raft:terms_of_service",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_terms_of_service,
    hf_repo="ought/raft",
    hf_subset="terms_of_service",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_tweet_eval_hate_helm = LightevalTaskConfig(
    name="raft:tweet_eval_hate",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_tweet_eval_hate,
    hf_repo="ought/raft",
    hf_subset="tweet_eval_hate",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
raft_twitter_complaints_helm = LightevalTaskConfig(
    name="raft:twitter_complaints",
    suite=["helm", "helm_general"],
    prompt_function=prompt.raft_twitter_complaints,
    hf_repo="ought/raft",
    hf_subset="twitter_complaints",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=30,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_macro,
        Metrics.f1_score_micro,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
real_or_fake_text_bigbench = LightevalTaskConfig(
    name="real_or_fake_text",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="real_or_fake_text",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
real_toxicity_prompts_helm = LightevalTaskConfig(
    name="real_toxicity_prompts",
    suite=["helm"],
    prompt_function=prompt.real_toxicity_prompts,
    hf_repo="allenai/real-toxicity-prompts",
    hf_subset="default",
    hf_avail_splits=["train"],
    evaluation_splits=["train"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.prediction_perplexity],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
reasoning_about_colored_objects_bigbench = LightevalTaskConfig(
    name="reasoning_about_colored_objects",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="reasoning_about_colored_objects",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
repeat_copy_logic_bigbench_lite = LightevalTaskConfig(
    name="repeat_copy_logic",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="repeat_copy_logic",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
rephrase_bigbench = LightevalTaskConfig(
    name="rephrase",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="rephrase",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
rhyming_bigbench = LightevalTaskConfig(
    name="rhyming",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="rhyming",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
riddle_sense_bigbench = LightevalTaskConfig(
    name="riddle_sense",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="riddle_sense",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
ruin_names_bigbench = LightevalTaskConfig(
    name="ruin_names",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="ruin_names",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
salient_translation_error_detection_bigbench = LightevalTaskConfig(
    name="salient_translation_error_detection",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="salient_translation_error_detection",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
scientific_press_release_bigbench = LightevalTaskConfig(
    name="scientific_press_release",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="scientific_press_release",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
sciq_lighteval = LightevalTaskConfig(
    name="sciq",
    suite=["lighteval"],
    prompt_function=prompt.sciq,
    hf_repo="sciq",
    hf_subset="default",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
semantic_parsing_in_context_sparc_bigbench = LightevalTaskConfig(
    name="semantic_parsing_in_context_sparc",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="semantic_parsing_in_context_sparc",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
semantic_parsing_spider_bigbench = LightevalTaskConfig(
    name="semantic_parsing_spider",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="semantic_parsing_spider",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
sentence_ambiguity_bigbench = LightevalTaskConfig(
    name="sentence_ambiguity",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="sentence_ambiguity",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
similarities_abstraction_bigbench = LightevalTaskConfig(
    name="similarities_abstraction",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="similarities_abstraction",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simp_turing_concept_bigbench = LightevalTaskConfig(
    name="simp_turing_concept",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simp_turing_concept",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simpleqa = LightevalTaskConfig(
    name="simpleqa",
    suite=["lighteval"],
    prompt_function=prompt.simpleqa,
    hf_repo="lighteval/SimpleQA",
    hf_subset="default",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split="few_shot",
    few_shots_select=None,
    generation_size=2048,
    metric=[Metrics.simpleqa_judge],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_arithmetic_json_bigbench = LightevalTaskConfig(
    name="simple_arithmetic_json",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_arithmetic_json",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_arithmetic_json_multiple_choice_bigbench = LightevalTaskConfig(
    name="simple_arithmetic_json_multiple_choice",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_arithmetic_json_multiple_choice",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_arithmetic_json_subtasks_bigbench = LightevalTaskConfig(
    name="simple_arithmetic_json_subtasks",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_arithmetic_json_subtasks",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_arithmetic_multiple_targets_json_bigbench = LightevalTaskConfig(
    name="simple_arithmetic_multiple_targets_json",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_arithmetic_multiple_targets_json",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_ethical_questions_bigbench = LightevalTaskConfig(
    name="simple_ethical_questions",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_ethical_questions",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
simple_text_editing_bigbench = LightevalTaskConfig(
    name="simple_text_editing",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="simple_text_editing",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
siqa_helm = LightevalTaskConfig(
    name="siqa",
    suite=["helm", "commonsense_scenario"],
    prompt_function=prompt.siqa,
    hf_repo="social_i_qa",
    hf_subset="default",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
snarks_bigbench = LightevalTaskConfig(
    name="snarks",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="snarks",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
social_iqa_bigbench = LightevalTaskConfig(
    name="social_iqa",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="social_iqa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
social_support_bigbench = LightevalTaskConfig(
    name="social_support",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="social_support",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.f1_score_macro],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
sports_understanding_bigbench = LightevalTaskConfig(
    name="sports_understanding",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="sports_understanding",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
squad_v2 = LightevalTaskConfig(
    name="squad_v2",
    prompt_function=get_qa_prompt_function(
        Language.ENGLISH,
        lambda line: {
            "question": line["question"],
            "context": line["context"],
            "choices": [ans for ans in line["answers"]["text"] if len(ans) > 0],
        },
    ),
    suite=("lighteval",),
    hf_repo="rajpurkar/squad_v2",
    hf_subset="squad_v2",
    hf_filter=lambda line: any(ans for ans in line["answers"]["text"] if len(ans) > 0),
    evaluation_splits=("validation",),
    few_shots_split="train",
    stop_sequence=["\n", "Question:", "question:"],
    generation_size=200,
    metric=(
        Metrics.prefix_quasi_exact_match,
        Metrics.f1_score_quasi,
    ),
)
storycloze_2016_lighteval = LightevalTaskConfig(
    name="storycloze:2016",
    suite=["lighteval", "storycloze"],
    prompt_function=prompt.storycloze,
    hf_repo="story_cloze",
    hf_subset="2016",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
storycloze_2018_lighteval = LightevalTaskConfig(
    name="storycloze:2018",
    suite=["lighteval", "storycloze"],
    prompt_function=prompt.storycloze,
    hf_repo="story_cloze",
    hf_subset="2018",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
strange_stories_bigbench_lite = LightevalTaskConfig(
    name="strange_stories",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="strange_stories",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
strategyqa_bigbench_lite = LightevalTaskConfig(
    name="strategyqa",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="strategyqa",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
sufficient_information_bigbench = LightevalTaskConfig(
    name="sufficient_information",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="sufficient_information",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
suicide_risk_bigbench = LightevalTaskConfig(
    name="suicide_risk",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="suicide_risk",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
summarization_cnn_dm_helm = LightevalTaskConfig(
    name="summarization:cnn-dm",
    suite=["helm", "helm_general"],
    prompt_function=prompt.cnn_dm,
    hf_repo="lighteval/summarization",
    hf_subset="cnn-dm",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=128,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
summarization_xsum_helm = LightevalTaskConfig(
    name="summarization:xsum",
    suite=["helm", "helm_general"],
    prompt_function=prompt.xsum,
    hf_repo="lighteval/summarization",
    hf_subset="xsum",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=64,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
summarization_xsum_sampled_helm = LightevalTaskConfig(
    name="summarization:xsum-sampled",
    suite=["helm"],
    prompt_function=prompt.xsum,
    hf_repo="lighteval/summarization",
    hf_subset="xsum-sampled",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=64,
    metric=[
        Metrics.rouge1,
        Metrics.rouge2,
        Metrics.rougeL,
        Metrics.faithfulness,
        Metrics.extractiveness,
        Metrics.bert_score,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_boolq_lighteval = LightevalTaskConfig(
    name="super_glue:boolq",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.boolq_harness,
    hf_repo="super_glue",
    hf_subset="boolq",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_cb_lighteval = LightevalTaskConfig(
    name="super_glue:cb",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.cb,
    hf_repo="super_glue",
    hf_subset="cb",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc, "multi_f1_numeric"],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_copa_lighteval = LightevalTaskConfig(
    name="super_glue:copa",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.copa,
    hf_repo="super_glue",
    hf_subset="copa",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_rte_lighteval = LightevalTaskConfig(
    name="super_glue:rte",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.rte,
    hf_repo="super_glue",
    hf_subset="rte",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_multirc_lighteval = LightevalTaskConfig(
    name="super_glue:multirc",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.multirc,
    hf_repo="super_glue",
    hf_subset="multirc",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_wic_lighteval = LightevalTaskConfig(
    name="super_glue:wic",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.wic,
    hf_repo="super_glue",
    hf_subset="wic",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
super_glue_wsc_lighteval = LightevalTaskConfig(
    name="super_glue:wsc",
    suite=["lighteval", "superglue"],
    prompt_function=prompt.wsc,
    hf_repo="super_glue",
    hf_subset="wsc",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
swahili_english_proverbs_bigbench = LightevalTaskConfig(
    name="swahili_english_proverbs",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="swahili_english_proverbs",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
swag_lighteval = LightevalTaskConfig(
    name="swag",
    suite=["lighteval"],
    prompt_function=prompt.swag,
    hf_repo="swag",
    hf_subset="regular",
    hf_avail_splits=["train", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
swedish_to_german_proverbs_bigbench = LightevalTaskConfig(
    name="swedish_to_german_proverbs",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="swedish_to_german_proverbs",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
symbol_interpretation_bigbench_lite = LightevalTaskConfig(
    name="symbol_interpretation",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_linefeed_before_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="symbol_interpretation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
synthetic_reasoning_induction_helm = LightevalTaskConfig(
    name="synthetic_reasoning:induction",
    suite=["helm"],
    prompt_function=prompt.synthetic_reasoning,
    hf_repo="lighteval/synthetic_reasoning",
    hf_subset="induction",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=50,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
synthetic_reasoning_natural_easy_helm = LightevalTaskConfig(
    name="synthetic_reasoning:natural_easy",
    suite=["helm"],
    prompt_function=prompt.synthetic_reasoning_natural,
    hf_repo="lighteval/synthetic_reasoning_natural",
    hf_subset="easy",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
synthetic_reasoning_natural_hard_helm = LightevalTaskConfig(
    name="synthetic_reasoning:natural_hard",
    suite=["helm"],
    prompt_function=prompt.synthetic_reasoning_natural,
    hf_repo="lighteval/synthetic_reasoning_natural",
    hf_subset="hard",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.exact_match, Metrics.f1_score],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
synthetic_reasoning_pattern_match_helm = LightevalTaskConfig(
    name="synthetic_reasoning:pattern_match",
    suite=["helm"],
    prompt_function=prompt.synthetic_reasoning,
    hf_repo="lighteval/synthetic_reasoning",
    hf_subset="pattern_match",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=50,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
synthetic_reasoning_variable_substitution_helm = LightevalTaskConfig(
    name="synthetic_reasoning:variable_substitution",
    suite=["helm"],
    prompt_function=prompt.synthetic_reasoning,
    hf_repo="lighteval/synthetic_reasoning",
    hf_subset="variable_substitution",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=50,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
tellmewhy_bigbench = LightevalTaskConfig(
    name="tellmewhy",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="tellmewhy",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
temporal_sequences_bigbench = LightevalTaskConfig(
    name="temporal_sequences",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="temporal_sequences",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
tense_bigbench = LightevalTaskConfig(
    name="tense",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="tense",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_arxiv_lighteval = LightevalTaskConfig(
    name="the_pile:arxiv",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_arxiv",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_arxiv_helm = LightevalTaskConfig(
    name="the_pile:arxiv",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="arxiv",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_bibliotik_helm = LightevalTaskConfig(
    name="the_pile:bibliotik",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="bibliotik",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_bookcorpus2_lighteval = LightevalTaskConfig(
    name="the_pile:bookcorpus2",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_bookcorpus2",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_books3_lighteval = LightevalTaskConfig(
    name="the_pile:books3",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_books3",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_commoncrawl_helm = LightevalTaskConfig(
    name="the_pile:commoncrawl",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="commoncrawl",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_dm_mathematics_lighteval = LightevalTaskConfig(
    name="the_pile:dm-mathematics",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_dm-mathematics",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_dm_mathematics_helm = LightevalTaskConfig(
    name="the_pile:dm-mathematics",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="dm-mathematics",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_enron_lighteval = LightevalTaskConfig(
    name="the_pile:enron",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_enron",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_enron_helm = LightevalTaskConfig(
    name="the_pile:enron",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="enron",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_europarl_lighteval = LightevalTaskConfig(
    name="the_pile:europarl",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_europarl",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_europarl_helm = LightevalTaskConfig(
    name="the_pile:europarl",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="europarl",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_freelaw_lighteval = LightevalTaskConfig(
    name="the_pile:freelaw",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_freelaw",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_freelaw_helm = LightevalTaskConfig(
    name="the_pile:freelaw",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="freelaw",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_github_lighteval = LightevalTaskConfig(
    name="the_pile:github",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_github",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_github_helm = LightevalTaskConfig(
    name="the_pile:github",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="github",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_gutenberg_lighteval = LightevalTaskConfig(
    name="the_pile:gutenberg",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_gutenberg",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_gutenberg_helm = LightevalTaskConfig(
    name="the_pile:gutenberg",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="gutenberg",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_hackernews_lighteval = LightevalTaskConfig(
    name="the_pile:hackernews",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_hackernews",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_hackernews_helm = LightevalTaskConfig(
    name="the_pile:hackernews",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="hackernews",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_nih_exporter_lighteval = LightevalTaskConfig(
    name="the_pile:nih-exporter",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_nih-exporter",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_nih_exporter_helm = LightevalTaskConfig(
    name="the_pile:nih-exporter",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="nih-exporter",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_opensubtitles_lighteval = LightevalTaskConfig(
    name="the_pile:opensubtitles",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_opensubtitles",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_opensubtitles_helm = LightevalTaskConfig(
    name="the_pile:opensubtitles",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="opensubtitles",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_openwebtext2_lighteval = LightevalTaskConfig(
    name="the_pile:openwebtext2",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_openwebtext2",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_openwebtext2_helm = LightevalTaskConfig(
    name="the_pile:openwebtext2",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="openwebtext2",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_philpapers_lighteval = LightevalTaskConfig(
    name="the_pile:philpapers",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_philpapers",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_pile_cc_lighteval = LightevalTaskConfig(
    name="the_pile:pile-cc",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_pile-cc",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_pubmed_abstracts_lighteval = LightevalTaskConfig(
    name="the_pile:pubmed-abstracts",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_pubmed-abstracts",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_pubmed_abstracts_helm = LightevalTaskConfig(
    name="the_pile:pubmed-abstracts",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="pubmed-abstracts",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_pubmed_central_lighteval = LightevalTaskConfig(
    name="the_pile:pubmed-central",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_pubmed-central",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_pubmed_central_helm = LightevalTaskConfig(
    name="the_pile:pubmed-central",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="pubmed-central",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_stackexchange_lighteval = LightevalTaskConfig(
    name="the_pile:stackexchange",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_stackexchange",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_stackexchange_helm = LightevalTaskConfig(
    name="the_pile:stackexchange",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="stackexchange",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_ubuntu_irc_lighteval = LightevalTaskConfig(
    name="the_pile:ubuntu-irc",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_ubuntu-irc",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_uspto_lighteval = LightevalTaskConfig(
    name="the_pile:uspto",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_upsto",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_upsto_helm = LightevalTaskConfig(
    name="the_pile:upsto",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="uspto",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_wikipedia_lighteval = LightevalTaskConfig(
    name="the_pile:wikipedia",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_wikipedia",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_wikipedia_helm = LightevalTaskConfig(
    name="the_pile:wikipedia",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="wikipedia",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_youtubesubtitles_lighteval = LightevalTaskConfig(
    name="the_pile:youtubesubtitles",
    suite=["lighteval", "pile"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile",
    hf_subset="pile_youtubesubtitles",
    hf_avail_splits=["validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
the_pile_youtubesubtitles_helm = LightevalTaskConfig(
    name="the_pile:youtubesubtitles",
    suite=["helm"],
    prompt_function=prompt.the_pile,
    hf_repo="lighteval/pile_helm",
    hf_subset="youtubesubtitles",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
timedial_bigbench = LightevalTaskConfig(
    name="timedial",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="timedial",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
toxigen_lighteval = LightevalTaskConfig(
    name="toxigen",
    suite=["lighteval"],
    prompt_function=prompt.toxigen,
    hf_repo="skg/toxigen-data",
    hf_subset="annotated",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
topical_chat_bigbench = LightevalTaskConfig(
    name="topical_chat",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="topical_chat",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc, Metrics.bleurt],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
tracking_shuffled_objects_bigbench = LightevalTaskConfig(
    name="tracking_shuffled_objects",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="tracking_shuffled_objects",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
triviaqa_lighteval = LightevalTaskConfig(
    name="triviaqa",
    suite=["lighteval"],
    prompt_function=prompt.triviaqa,
    hf_repo="trivia_qa",
    hf_subset="rc.nocontext",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=20,
    metric=[Metrics.quasi_exact_match_triviaqa],
    stop_sequence=["\n", ".", ","],
    trust_dataset=True,
    version=0,
)
truthfulqa_gen_lighteval = LightevalTaskConfig(
    name="truthfulqa:gen",
    suite=["lighteval"],
    prompt_function=prompt.truthful_qa_generative,
    hf_repo="truthful_qa",
    hf_subset="generation",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=200,
    metric=[Metrics.bleu, Metrics.rouge_t5],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
truthfulqa_mc_leaderboard = LightevalTaskConfig(
    name="truthfulqa:mc",
    suite=["leaderboard"],
    prompt_function=prompt.truthful_qa_multiple_choice,
    hf_repo="truthful_qa",
    hf_subset="multiple_choice",
    hf_avail_splits=["validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.truthfulqa_mc_metrics],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
truthfulqa_helm = LightevalTaskConfig(
    name="truthfulqa",
    suite=["helm", "helm_general"],
    prompt_function=prompt.truthful_qa_helm,
    hf_repo="lighteval/truthfulqa_helm",
    hf_subset="default",
    hf_avail_splits=["train", "valid"],
    evaluation_splits=["valid"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[
        Metrics.loglikelihood_acc,
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
twitterAAE_aa_helm = LightevalTaskConfig(
    name="twitterAAE:aa",
    suite=["helm"],
    prompt_function=prompt.twitter_aae,
    hf_repo="lighteval/twitterAAE",
    hf_subset="aa",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
twitterAAE_white_helm = LightevalTaskConfig(
    name="twitterAAE:white",
    suite=["helm"],
    prompt_function=prompt.twitter_aae,
    hf_repo="lighteval/twitterAAE",
    hf_subset="white",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
understanding_fables_bigbench = LightevalTaskConfig(
    name="understanding_fables",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="understanding_fables",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
undo_permutation_bigbench = LightevalTaskConfig(
    name="undo_permutation",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="undo_permutation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unit_conversion_bigbench = LightevalTaskConfig(
    name="unit_conversion",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="unit_conversion",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unit_interpretation_bigbench = LightevalTaskConfig(
    name="unit_interpretation",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="unit_interpretation",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unnatural_in_context_learning_bigbench = LightevalTaskConfig(
    name="unnatural_in_context_learning",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="unnatural_in_context_learning",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unscramble_anagrams1_lighteval = LightevalTaskConfig(
    name="unscramble:anagrams1",
    suite=["lighteval", "unscramble"],
    prompt_function=prompt.unscramble,
    hf_repo="lighteval/GPT3_unscramble",
    hf_subset="default",
    hf_avail_splits=["mid_word_1_anagrams"],
    evaluation_splits=["mid_word_1_anagrams"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unscramble_anagrams2_lighteval = LightevalTaskConfig(
    name="unscramble:anagrams2",
    suite=["lighteval", "unscramble"],
    prompt_function=prompt.unscramble,
    hf_repo="lighteval/GPT3_unscramble",
    hf_subset="default",
    hf_avail_splits=["mid_word_2_anagrams"],
    evaluation_splits=["mid_word_2_anagrams"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unscramble_cycle_letters_lighteval = LightevalTaskConfig(
    name="unscramble:cycle_letters",
    suite=["lighteval", "unscramble"],
    prompt_function=prompt.unscramble,
    hf_repo="lighteval/GPT3_unscramble",
    hf_subset="default",
    hf_avail_splits=["cycle_letters_in_word"],
    evaluation_splits=["cycle_letters_in_word"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unscramble_random_insertion_lighteval = LightevalTaskConfig(
    name="unscramble:random_insertion",
    suite=["lighteval", "unscramble"],
    prompt_function=prompt.unscramble,
    hf_repo="lighteval/GPT3_unscramble",
    hf_subset="default",
    hf_avail_splits=["random_insertion_in_word"],
    evaluation_splits=["random_insertion_in_word"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
unscramble_reversed_words_lighteval = LightevalTaskConfig(
    name="unscramble:reversed_words",
    suite=["lighteval", "unscramble"],
    prompt_function=prompt.unscramble,
    hf_repo="lighteval/GPT3_unscramble",
    hf_subset="default",
    hf_avail_splits=["reversed_words"],
    evaluation_splits=["reversed_words"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=5,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
vitaminc_fact_verification_bigbench_lite = LightevalTaskConfig(
    name="vitaminc_fact_verification",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="vitaminc_fact_verification",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
webqs_lighteval = LightevalTaskConfig(
    name="webqs",
    suite=["lighteval"],
    prompt_function=prompt.webqs,
    hf_repo="web_questions",
    hf_subset="default",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.acc_golds_likelihood],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
what_is_the_tao_bigbench = LightevalTaskConfig(
    name="what_is_the_tao",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="what_is_the_tao",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
which_wiki_edit_bigbench = LightevalTaskConfig(
    name="which_wiki_edit",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="which_wiki_edit",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_applies_to_jurisdiction_helm = LightevalTaskConfig(
    name="wikifact:applies_to_jurisdiction",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="applies_to_jurisdiction",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_atomic_number_helm = LightevalTaskConfig(
    name="wikifact:atomic_number",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="atomic_number",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_author_helm = LightevalTaskConfig(
    name="wikifact:author",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="author",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_award_received_helm = LightevalTaskConfig(
    name="wikifact:award_received",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="award_received",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_basic_form_of_government_helm = LightevalTaskConfig(
    name="wikifact:basic_form_of_government",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="basic_form_of_government",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_capital_helm = LightevalTaskConfig(
    name="wikifact:capital",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="capital",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_capital_of_helm = LightevalTaskConfig(
    name="wikifact:capital_of",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="capital_of",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_central_bank_helm = LightevalTaskConfig(
    name="wikifact:central_bank",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="central_bank",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_composer_helm = LightevalTaskConfig(
    name="wikifact:composer",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="composer",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_continent_helm = LightevalTaskConfig(
    name="wikifact:continent",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="continent",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_country_helm = LightevalTaskConfig(
    name="wikifact:country",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="country",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_country_of_citizenship_helm = LightevalTaskConfig(
    name="wikifact:country_of_citizenship",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="country_of_citizenship",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_country_of_origin_helm = LightevalTaskConfig(
    name="wikifact:country_of_origin",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="country_of_origin",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_creator_helm = LightevalTaskConfig(
    name="wikifact:creator",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="creator",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_currency_helm = LightevalTaskConfig(
    name="wikifact:currency",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="currency",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_defendant_helm = LightevalTaskConfig(
    name="wikifact:defendant",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="defendant",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_developer_helm = LightevalTaskConfig(
    name="wikifact:developer",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="developer",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_diplomatic_relation_helm = LightevalTaskConfig(
    name="wikifact:diplomatic_relation",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="diplomatic_relation",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_director_helm = LightevalTaskConfig(
    name="wikifact:director",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="director",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_discoverer_or_inventor_helm = LightevalTaskConfig(
    name="wikifact:discoverer_or_inventor",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="discoverer_or_inventor",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_drug_or_therapy_used_for_treatment_helm = LightevalTaskConfig(
    name="wikifact:drug_or_therapy_used_for_treatment",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="drug_or_therapy_used_for_treatment",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_educated_at_helm = LightevalTaskConfig(
    name="wikifact:educated_at",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="educated_at",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_electron_configuration_helm = LightevalTaskConfig(
    name="wikifact:electron_configuration",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="electron_configuration",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_employer_helm = LightevalTaskConfig(
    name="wikifact:employer",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="employer",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_field_of_work_helm = LightevalTaskConfig(
    name="wikifact:field_of_work",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="field_of_work",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_file_extension_helm = LightevalTaskConfig(
    name="wikifact:file_extension",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="file_extension",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_genetic_association_helm = LightevalTaskConfig(
    name="wikifact:genetic_association",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="genetic_association",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_genre_helm = LightevalTaskConfig(
    name="wikifact:genre",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="genre",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_has_part_helm = LightevalTaskConfig(
    name="wikifact:has_part",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="has_part",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_head_of_government_helm = LightevalTaskConfig(
    name="wikifact:head_of_government",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="head_of_government",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_head_of_state_helm = LightevalTaskConfig(
    name="wikifact:head_of_state",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="head_of_state",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_headquarters_location_helm = LightevalTaskConfig(
    name="wikifact:headquarters_location",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="headquarters_location",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_industry_helm = LightevalTaskConfig(
    name="wikifact:industry",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="industry",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_influenced_by_helm = LightevalTaskConfig(
    name="wikifact:influenced_by",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="influenced_by",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_instance_of_helm = LightevalTaskConfig(
    name="wikifact:instance_of",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="instance_of",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_instrument_helm = LightevalTaskConfig(
    name="wikifact:instrument",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="instrument",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_language_of_work_or_name_helm = LightevalTaskConfig(
    name="wikifact:language_of_work_or_name",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="language_of_work_or_name",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_languages_spoken_written_or_signed_helm = LightevalTaskConfig(
    name="wikifact:languages_spoken_written_or_signed",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="languages_spoken_written_or_signed",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_laws_applied_helm = LightevalTaskConfig(
    name="wikifact:laws_applied",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="laws_applied",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_located_in_the_administrative_territorial_entity_helm = LightevalTaskConfig(
    name="wikifact:located_in_the_administrative_territorial_entity",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="located_in_the_administrative_territorial_entity",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_location_helm = LightevalTaskConfig(
    name="wikifact:location",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="location",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_location_of_discovery_helm = LightevalTaskConfig(
    name="wikifact:location_of_discovery",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="location_of_discovery",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_location_of_formation_helm = LightevalTaskConfig(
    name="wikifact:location_of_formation",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="location_of_formation",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_majority_opinion_by_helm = LightevalTaskConfig(
    name="wikifact:majority_opinion_by",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="majority_opinion_by",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_manufacturer_helm = LightevalTaskConfig(
    name="wikifact:manufacturer",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="manufacturer",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_measured_physical_quantity_helm = LightevalTaskConfig(
    name="wikifact:measured_physical_quantity",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="measured_physical_quantity",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_medical_condition_treated_helm = LightevalTaskConfig(
    name="wikifact:medical_condition_treated",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="medical_condition_treated",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_member_of_helm = LightevalTaskConfig(
    name="wikifact:member_of",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="member_of",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_member_of_political_party_helm = LightevalTaskConfig(
    name="wikifact:member_of_political_party",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="member_of_political_party",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_member_of_sports_team_helm = LightevalTaskConfig(
    name="wikifact:member_of_sports_team",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="member_of_sports_team",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_movement_helm = LightevalTaskConfig(
    name="wikifact:movement",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="movement",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_named_after_helm = LightevalTaskConfig(
    name="wikifact:named_after",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="named_after",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_native_language_helm = LightevalTaskConfig(
    name="wikifact:native_language",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="native_language",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_number_of_processor_cores_helm = LightevalTaskConfig(
    name="wikifact:number_of_processor_cores",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="number_of_processor_cores",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_occupation_helm = LightevalTaskConfig(
    name="wikifact:occupation",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="occupation",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_office_held_by_head_of_government_helm = LightevalTaskConfig(
    name="wikifact:office_held_by_head_of_government",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="office_held_by_head_of_government",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_office_held_by_head_of_state_helm = LightevalTaskConfig(
    name="wikifact:office_held_by_head_of_state",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="office_held_by_head_of_state",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_official_language_helm = LightevalTaskConfig(
    name="wikifact:official_language",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="official_language",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_operating_system_helm = LightevalTaskConfig(
    name="wikifact:operating_system",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="operating_system",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_original_language_of_film_or_TV_show_helm = LightevalTaskConfig(
    name="wikifact:original_language_of_film_or_TV_show",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="original_language_of_film_or_TV_show",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_original_network_helm = LightevalTaskConfig(
    name="wikifact:original_network",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="original_network",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_overrules_helm = LightevalTaskConfig(
    name="wikifact:overrules",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="overrules",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_owned_by_helm = LightevalTaskConfig(
    name="wikifact:owned_by",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="owned_by",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_part_of_helm = LightevalTaskConfig(
    name="wikifact:part_of",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="part_of",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_participating_team_helm = LightevalTaskConfig(
    name="wikifact:participating_team",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="participating_team",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_place_of_birth_helm = LightevalTaskConfig(
    name="wikifact:place_of_birth",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="place_of_birth",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_place_of_death_helm = LightevalTaskConfig(
    name="wikifact:place_of_death",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="place_of_death",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_plaintiff_helm = LightevalTaskConfig(
    name="wikifact:plaintiff",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="plaintiff",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_position_held_helm = LightevalTaskConfig(
    name="wikifact:position_held",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="position_held",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_position_played_on_team_helm = LightevalTaskConfig(
    name="wikifact:position_played_on_team",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="position_played_on_team",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_programming_language_helm = LightevalTaskConfig(
    name="wikifact:programming_language",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="programming_language",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_recommended_unit_of_measurement_helm = LightevalTaskConfig(
    name="wikifact:recommended_unit_of_measurement",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="recommended_unit_of_measurement",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_record_label_helm = LightevalTaskConfig(
    name="wikifact:record_label",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="record_label",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_religion_helm = LightevalTaskConfig(
    name="wikifact:religion",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="religion",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_repealed_by_helm = LightevalTaskConfig(
    name="wikifact:repealed_by",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="repealed_by",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_shares_border_with_helm = LightevalTaskConfig(
    name="wikifact:shares_border_with",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="shares_border_with",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_solved_by_helm = LightevalTaskConfig(
    name="wikifact:solved_by",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="solved_by",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_statement_describes_helm = LightevalTaskConfig(
    name="wikifact:statement_describes",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="statement_describes",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_stock_exchange_helm = LightevalTaskConfig(
    name="wikifact:stock_exchange",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="stock_exchange",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_subclass_of_helm = LightevalTaskConfig(
    name="wikifact:subclass_of",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="subclass_of",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_subsidiary_helm = LightevalTaskConfig(
    name="wikifact:subsidiary",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="subsidiary",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_symptoms_and_signs_helm = LightevalTaskConfig(
    name="wikifact:symptoms_and_signs",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="symptoms_and_signs",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_therapeutic_area_helm = LightevalTaskConfig(
    name="wikifact:therapeutic_area",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="therapeutic_area",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_time_of_discovery_or_invention_helm = LightevalTaskConfig(
    name="wikifact:time_of_discovery_or_invention",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="time_of_discovery_or_invention",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_twinned_administrative_body_helm = LightevalTaskConfig(
    name="wikifact:twinned_administrative_body",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="twinned_administrative_body",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikifact_work_location_helm = LightevalTaskConfig(
    name="wikifact:work_location",
    suite=["helm"],
    prompt_function=prompt.wikifact,
    hf_repo="lighteval/wikifact",
    hf_subset="work_location",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=8,
    metric=[
        Metrics.exact_match,
        Metrics.quasi_exact_match,
        Metrics.prefix_exact_match,
        Metrics.prefix_quasi_exact_match,
    ],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikitext_2_lighteval = LightevalTaskConfig(
    name="wikitext:2",
    suite=["lighteval"],
    prompt_function=prompt.wikitext,
    hf_repo="wikitext",
    hf_subset="wikitext-2-raw-v1",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikitext_103_document_level_harness = LightevalTaskConfig(
    name="wikitext:103:document_level",
    suite=["harness"],
    prompt_function=prompt.wikitext_harness,
    hf_repo="EleutherAI/wikitext_document_level",
    hf_subset="wikitext-103-raw-v1",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wikitext_103_document_level_helm = LightevalTaskConfig(
    name="wikitext:103:document_level",
    suite=["helm"],
    prompt_function=prompt.wikitext_helm,
    hf_repo="EleutherAI/wikitext_document_level",
    hf_subset="wikitext-103-raw-v1",
    hf_avail_splits=["train", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wino_x_german_bigbench = LightevalTaskConfig(
    name="wino_x_german",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="wino_x_german",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
winogrande_leaderboard = LightevalTaskConfig(
    name="winogrande",
    suite=["leaderboard"],
    prompt_function=prompt.winogrande,
    hf_repo="winogrande",
    hf_subset="winogrande_xl",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation"],
    few_shots_split=None,
    few_shots_select="random_sampling",
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
winowhy_bigbench_lite = LightevalTaskConfig(
    name="winowhy",
    suite=["bigbench_lite", "bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench_whitespace_after_query,
    hf_repo="bigbench",
    hf_subset="winowhy",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_cs_en_lighteval = LightevalTaskConfig(
    name="wmt08:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_de_en_lighteval = LightevalTaskConfig(
    name="wmt08:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_en_cs_lighteval = LightevalTaskConfig(
    name="wmt08:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_en_de_lighteval = LightevalTaskConfig(
    name="wmt08:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_en_es_lighteval = LightevalTaskConfig(
    name="wmt08:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_en_fr_lighteval = LightevalTaskConfig(
    name="wmt08:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_en_hu_lighteval = LightevalTaskConfig(
    name="wmt08:en-hu",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_en-hu",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_es_en_lighteval = LightevalTaskConfig(
    name="wmt08:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_fr_en_lighteval = LightevalTaskConfig(
    name="wmt08:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt08_hu_en_lighteval = LightevalTaskConfig(
    name="wmt08:hu-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt08_hu-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_cs_en_lighteval = LightevalTaskConfig(
    name="wmt09:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_de_en_lighteval = LightevalTaskConfig(
    name="wmt09:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_cs_lighteval = LightevalTaskConfig(
    name="wmt09:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_de_lighteval = LightevalTaskConfig(
    name="wmt09:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_es_lighteval = LightevalTaskConfig(
    name="wmt09:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_fr_lighteval = LightevalTaskConfig(
    name="wmt09:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_hu_lighteval = LightevalTaskConfig(
    name="wmt09:en-hu",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-hu",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_en_it_lighteval = LightevalTaskConfig(
    name="wmt09:en-it",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_en-it",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_es_en_lighteval = LightevalTaskConfig(
    name="wmt09:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_fr_en_lighteval = LightevalTaskConfig(
    name="wmt09:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_hu_en_lighteval = LightevalTaskConfig(
    name="wmt09:hu-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_hu-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt09_it_en_lighteval = LightevalTaskConfig(
    name="wmt09:it-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt09_it-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_cs_en_lighteval = LightevalTaskConfig(
    name="wmt10:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_de_en_lighteval = LightevalTaskConfig(
    name="wmt10:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_en_cs_lighteval = LightevalTaskConfig(
    name="wmt10:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_en_de_lighteval = LightevalTaskConfig(
    name="wmt10:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_en_es_lighteval = LightevalTaskConfig(
    name="wmt10:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_en_fr_lighteval = LightevalTaskConfig(
    name="wmt10:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_es_en_lighteval = LightevalTaskConfig(
    name="wmt10:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt10_fr_en_lighteval = LightevalTaskConfig(
    name="wmt10:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt10_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_cs_en_lighteval = LightevalTaskConfig(
    name="wmt11:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_de_en_lighteval = LightevalTaskConfig(
    name="wmt11:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_en_cs_lighteval = LightevalTaskConfig(
    name="wmt11:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_en_de_lighteval = LightevalTaskConfig(
    name="wmt11:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_en_es_lighteval = LightevalTaskConfig(
    name="wmt11:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_en_fr_lighteval = LightevalTaskConfig(
    name="wmt11:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_es_en_lighteval = LightevalTaskConfig(
    name="wmt11:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt11_fr_en_lighteval = LightevalTaskConfig(
    name="wmt11:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt11_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_cs_en_lighteval = LightevalTaskConfig(
    name="wmt12:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_de_en_lighteval = LightevalTaskConfig(
    name="wmt12:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_en_cs_lighteval = LightevalTaskConfig(
    name="wmt12:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_en_de_lighteval = LightevalTaskConfig(
    name="wmt12:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_en_es_lighteval = LightevalTaskConfig(
    name="wmt12:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_en_fr_lighteval = LightevalTaskConfig(
    name="wmt12:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_es_en_lighteval = LightevalTaskConfig(
    name="wmt12:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt12_fr_en_lighteval = LightevalTaskConfig(
    name="wmt12:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt12_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_cs_en_lighteval = LightevalTaskConfig(
    name="wmt13:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_de_en_lighteval = LightevalTaskConfig(
    name="wmt13:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_en_cs_lighteval = LightevalTaskConfig(
    name="wmt13:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_en_de_lighteval = LightevalTaskConfig(
    name="wmt13:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_en_es_lighteval = LightevalTaskConfig(
    name="wmt13:en-es",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_en-es",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_en_fr_lighteval = LightevalTaskConfig(
    name="wmt13:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_en_ru_lighteval = LightevalTaskConfig(
    name="wmt13:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_es_en_lighteval = LightevalTaskConfig(
    name="wmt13:es-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_es-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_fr_en_lighteval = LightevalTaskConfig(
    name="wmt13:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt13_ru_en_lighteval = LightevalTaskConfig(
    name="wmt13:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt13_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_cs_en_lighteval = LightevalTaskConfig(
    name="wmt14:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_de_en_lighteval = LightevalTaskConfig(
    name="wmt14:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_cs_lighteval = LightevalTaskConfig(
    name="wmt14:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_de_lighteval = LightevalTaskConfig(
    name="wmt14:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_fr_lighteval = LightevalTaskConfig(
    name="wmt14:en-fr",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="wmt14",
    hf_subset="fr-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_fr_lighteval = LightevalTaskConfig(
    name="wmt14:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_hi_lighteval = LightevalTaskConfig(
    name="wmt14:en-hi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_en-hi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_en_ru_lighteval = LightevalTaskConfig(
    name="wmt14:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_fr_en_lighteval = LightevalTaskConfig(
    name="wmt14:fr-en",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="wmt14",
    hf_subset="fr-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_fr_en_lighteval = LightevalTaskConfig(
    name="wmt14:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_hi_en_lighteval = LightevalTaskConfig(
    name="wmt14:hi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_hi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_ru_en_lighteval = LightevalTaskConfig(
    name="wmt14:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt14_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_cs_en_helm = LightevalTaskConfig(
    name="wmt14:cs-en",
    suite=["helm"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/wmt14",
    hf_subset="cs-en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_de_en_helm = LightevalTaskConfig(
    name="wmt14:de-en",
    suite=["helm"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/wmt14",
    hf_subset="de-en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_fr_en_helm = LightevalTaskConfig(
    name="wmt14:fr-en",
    suite=["helm"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/wmt14",
    hf_subset="fr-en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_hi_en_helm = LightevalTaskConfig(
    name="wmt14:hi-en",
    suite=["helm"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/wmt14",
    hf_subset="hi-en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt14_ru_en_helm = LightevalTaskConfig(
    name="wmt14:ru-en",
    suite=["helm"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/wmt14",
    hf_subset="ru-en",
    hf_avail_splits=["train", "test", "validation"],
    evaluation_splits=["validation", "test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=100,
    metric=[Metrics.bleu],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_cs_en_lighteval = LightevalTaskConfig(
    name="wmt15:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_de_en_lighteval = LightevalTaskConfig(
    name="wmt15:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_en_cs_lighteval = LightevalTaskConfig(
    name="wmt15:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_en_de_lighteval = LightevalTaskConfig(
    name="wmt15:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_en_fi_lighteval = LightevalTaskConfig(
    name="wmt15:en-fi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_en-fi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_en_fr_lighteval = LightevalTaskConfig(
    name="wmt15:en-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_en-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_en_ru_lighteval = LightevalTaskConfig(
    name="wmt15:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_fi_en_lighteval = LightevalTaskConfig(
    name="wmt15:fi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_fi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_fr_en_lighteval = LightevalTaskConfig(
    name="wmt15:fr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_fr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt15_ru_en_lighteval = LightevalTaskConfig(
    name="wmt15:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt15_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_cs_en_lighteval = LightevalTaskConfig(
    name="wmt16:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_de_en_lighteval = LightevalTaskConfig(
    name="wmt16:de-en",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="wmt16",
    hf_subset="de-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_de_en_lighteval = LightevalTaskConfig(
    name="wmt16:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_cs_lighteval = LightevalTaskConfig(
    name="wmt16:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_de_lighteval = LightevalTaskConfig(
    name="wmt16:en-de",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="wmt16",
    hf_subset="de-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_de_lighteval = LightevalTaskConfig(
    name="wmt16:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_fi_lighteval = LightevalTaskConfig(
    name="wmt16:en-fi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-fi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_ro_lighteval = LightevalTaskConfig(
    name="wmt16:en-ro",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="wmt16",
    hf_subset="ro-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_ro_lighteval = LightevalTaskConfig(
    name="wmt16:en-ro",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-ro",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_ru_lighteval = LightevalTaskConfig(
    name="wmt16:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_en_tr_lighteval = LightevalTaskConfig(
    name="wmt16:en-tr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_en-tr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_fi_en_lighteval = LightevalTaskConfig(
    name="wmt16:fi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_fi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_ro_en_lighteval = LightevalTaskConfig(
    name="wmt16:ro-en",
    suite=["lighteval", "gpt3_benchmarks"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="wmt16",
    hf_subset="ro-en",
    hf_avail_splits=["train", "validation", "test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_ro_en_lighteval = LightevalTaskConfig(
    name="wmt16:ro-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_ro-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_ru_en_lighteval = LightevalTaskConfig(
    name="wmt16:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt16_tr_en_lighteval = LightevalTaskConfig(
    name="wmt16:tr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt16_tr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_cs_en_lighteval = LightevalTaskConfig(
    name="wmt17:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_de_en_lighteval = LightevalTaskConfig(
    name="wmt17:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_cs_lighteval = LightevalTaskConfig(
    name="wmt17:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_de_lighteval = LightevalTaskConfig(
    name="wmt17:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_fi_lighteval = LightevalTaskConfig(
    name="wmt17:en-fi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-fi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_lv_lighteval = LightevalTaskConfig(
    name="wmt17:en-lv",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-lv",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_ru_lighteval = LightevalTaskConfig(
    name="wmt17:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_tr_lighteval = LightevalTaskConfig(
    name="wmt17:en-tr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-tr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_en_zh_lighteval = LightevalTaskConfig(
    name="wmt17:en-zh",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_en-zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_fi_en_lighteval = LightevalTaskConfig(
    name="wmt17:fi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_fi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_lv_en_lighteval = LightevalTaskConfig(
    name="wmt17:lv-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_lv-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_ru_en_lighteval = LightevalTaskConfig(
    name="wmt17:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_tr_en_lighteval = LightevalTaskConfig(
    name="wmt17:tr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_tr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt17_zh_en_lighteval = LightevalTaskConfig(
    name="wmt17:zh-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt17_zh-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_cs_en_lighteval = LightevalTaskConfig(
    name="wmt18:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_de_en_lighteval = LightevalTaskConfig(
    name="wmt18:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_cs_lighteval = LightevalTaskConfig(
    name="wmt18:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_de_lighteval = LightevalTaskConfig(
    name="wmt18:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_et_lighteval = LightevalTaskConfig(
    name="wmt18:en-et",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-et",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_fi_lighteval = LightevalTaskConfig(
    name="wmt18:en-fi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-fi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_ru_lighteval = LightevalTaskConfig(
    name="wmt18:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_tr_lighteval = LightevalTaskConfig(
    name="wmt18:en-tr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-tr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_en_zh_lighteval = LightevalTaskConfig(
    name="wmt18:en-zh",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_en-zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_et_en_lighteval = LightevalTaskConfig(
    name="wmt18:et-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_et-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_fi_en_lighteval = LightevalTaskConfig(
    name="wmt18:fi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_fi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_ru_en_lighteval = LightevalTaskConfig(
    name="wmt18:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_tr_en_lighteval = LightevalTaskConfig(
    name="wmt18:tr-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_tr-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt18_zh_en_lighteval = LightevalTaskConfig(
    name="wmt18:zh-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt18_zh-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_cs_de_lighteval = LightevalTaskConfig(
    name="wmt19:cs-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_cs-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_de_cs_lighteval = LightevalTaskConfig(
    name="wmt19:de-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_de-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_de_en_lighteval = LightevalTaskConfig(
    name="wmt19:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_de_fr_lighteval = LightevalTaskConfig(
    name="wmt19:de-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_de-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_cs_lighteval = LightevalTaskConfig(
    name="wmt19:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_de_lighteval = LightevalTaskConfig(
    name="wmt19:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_fi_lighteval = LightevalTaskConfig(
    name="wmt19:en-fi",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-fi",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_gu_lighteval = LightevalTaskConfig(
    name="wmt19:en-gu",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-gu",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_kk_lighteval = LightevalTaskConfig(
    name="wmt19:en-kk",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-kk",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_lt_lighteval = LightevalTaskConfig(
    name="wmt19:en-lt",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-lt",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_ru_lighteval = LightevalTaskConfig(
    name="wmt19:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_en_zh_lighteval = LightevalTaskConfig(
    name="wmt19:en-zh",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_en-zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_fi_en_lighteval = LightevalTaskConfig(
    name="wmt19:fi-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_fi-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_fr_de_lighteval = LightevalTaskConfig(
    name="wmt19:fr-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_fr-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_gu_en_lighteval = LightevalTaskConfig(
    name="wmt19:gu-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_gu-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_kk_en_lighteval = LightevalTaskConfig(
    name="wmt19:kk-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_kk-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_lt_en_lighteval = LightevalTaskConfig(
    name="wmt19:lt-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_lt-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_ru_en_lighteval = LightevalTaskConfig(
    name="wmt19:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt19_zh_en_lighteval = LightevalTaskConfig(
    name="wmt19:zh-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt19_zh-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_cs_en_lighteval = LightevalTaskConfig(
    name="wmt20:cs-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_cs-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_de_en_lighteval = LightevalTaskConfig(
    name="wmt20:de-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_de-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_de_fr_lighteval = LightevalTaskConfig(
    name="wmt20:de-fr",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_de-fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_cs_lighteval = LightevalTaskConfig(
    name="wmt20:en-cs",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-cs",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_de_lighteval = LightevalTaskConfig(
    name="wmt20:en-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_iu_lighteval = LightevalTaskConfig(
    name="wmt20:en-iu",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-iu",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_ja_lighteval = LightevalTaskConfig(
    name="wmt20:en-ja",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-ja",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_km_lighteval = LightevalTaskConfig(
    name="wmt20:en-km",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-km",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_pl_lighteval = LightevalTaskConfig(
    name="wmt20:en-pl",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-pl",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_ps_lighteval = LightevalTaskConfig(
    name="wmt20:en-ps",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-ps",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_ru_lighteval = LightevalTaskConfig(
    name="wmt20:en-ru",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_ta_lighteval = LightevalTaskConfig(
    name="wmt20:en-ta",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-ta",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_en_zh_lighteval = LightevalTaskConfig(
    name="wmt20:en-zh",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_en-zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_fr_de_lighteval = LightevalTaskConfig(
    name="wmt20:fr-de",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_fr-de",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_iu_en_lighteval = LightevalTaskConfig(
    name="wmt20:iu-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_iu-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_ja_en_lighteval = LightevalTaskConfig(
    name="wmt20:ja-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_ja-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_km_en_lighteval = LightevalTaskConfig(
    name="wmt20:km-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_km-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_pl_en_lighteval = LightevalTaskConfig(
    name="wmt20:pl-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_pl-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_ps_en_lighteval = LightevalTaskConfig(
    name="wmt20:ps-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_ps-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_ru_en_lighteval = LightevalTaskConfig(
    name="wmt20:ru-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_ru-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_ta_en_lighteval = LightevalTaskConfig(
    name="wmt20:ta-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_ta-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wmt20_zh_en_lighteval = LightevalTaskConfig(
    name="wmt20:zh-en",
    suite=["lighteval", "sacrebleu"],
    prompt_function=prompt.wmt_reverse_alphabetical,
    hf_repo="lighteval/sacrebleu_manual",
    hf_subset="wmt20_zh-en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=None,
    metric=[Metrics.bleu, Metrics.chrf, Metrics.ter],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
word_sorting_bigbench = LightevalTaskConfig(
    name="word_sorting",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="word_sorting",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
word_unscrambling_bigbench = LightevalTaskConfig(
    name="word_unscrambling",
    suite=["bigbench", "bigbench_json"],
    prompt_function=prompt.bigbench,
    hf_repo="bigbench",
    hf_subset="word_unscrambling",
    hf_avail_splits=["default", "train", "validation"],
    evaluation_splits=["default"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=1,
    metric=[Metrics.perfect_exact_match],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
wsc273_lighteval = LightevalTaskConfig(
    name="wsc273",
    suite=["lighteval"],
    prompt_function=prompt.wsc273,
    hf_repo="winograd_wsc",
    hf_subset="wsc273",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_en_lighteval = LightevalTaskConfig(
    name="xcopa:en",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_en,
    hf_repo="xcopa",
    hf_subset="default",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_et_lighteval = LightevalTaskConfig(
    name="xcopa:et",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_et,
    hf_repo="xcopa",
    hf_subset="et",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_ht_lighteval = LightevalTaskConfig(
    name="xcopa:ht",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_ht,
    hf_repo="xcopa",
    hf_subset="ht",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_it_lighteval = LightevalTaskConfig(
    name="xcopa:it",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_it,
    hf_repo="xcopa",
    hf_subset="it",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_id_lighteval = LightevalTaskConfig(
    name="xcopa:id",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_id,
    hf_repo="xcopa",
    hf_subset="id",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_qu_lighteval = LightevalTaskConfig(
    name="xcopa:qu",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_qu,
    hf_repo="xcopa",
    hf_subset="qu",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_sw_lighteval = LightevalTaskConfig(
    name="xcopa:sw",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_sw,
    hf_repo="xcopa",
    hf_subset="sw",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_zh_lighteval = LightevalTaskConfig(
    name="xcopa:zh",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_zh,
    hf_repo="xcopa",
    hf_subset="zh",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_ta_lighteval = LightevalTaskConfig(
    name="xcopa:ta",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_ta,
    hf_repo="xcopa",
    hf_subset="ta",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_th_lighteval = LightevalTaskConfig(
    name="xcopa:th",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_th,
    hf_repo="xcopa",
    hf_subset="th",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_tr_lighteval = LightevalTaskConfig(
    name="xcopa:tr",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_tr,
    hf_repo="xcopa",
    hf_subset="tr",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xcopa_vi_lighteval = LightevalTaskConfig(
    name="xcopa:vi",
    suite=["lighteval"],
    prompt_function=prompt.xcopa_vi,
    hf_repo="xcopa",
    hf_subset="vi",
    hf_avail_splits=["test", "train", "validation"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_en_lighteval = LightevalTaskConfig(
    name="xstory_cloze:en",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="en",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_ru_lighteval = LightevalTaskConfig(
    name="xstory_cloze:ru",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="ru",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_zh_lighteval = LightevalTaskConfig(
    name="xstory_cloze:zh",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="zh",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_es_lighteval = LightevalTaskConfig(
    name="xstory_cloze:es",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="es",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_ar_lighteval = LightevalTaskConfig(
    name="xstory_cloze:ar",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="ar",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_hi_lighteval = LightevalTaskConfig(
    name="xstory_cloze:hi",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="hi",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_id_lighteval = LightevalTaskConfig(
    name="xstory_cloze:id",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="id",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_te_lighteval = LightevalTaskConfig(
    name="xstory_cloze:te",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="te",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_sw_lighteval = LightevalTaskConfig(
    name="xstory_cloze:sw",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="sw",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_eu_lighteval = LightevalTaskConfig(
    name="xstory_cloze:eu",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="eu",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xstory_cloze_my_lighteval = LightevalTaskConfig(
    name="xstory_cloze:my",
    suite=["lighteval"],
    prompt_function=prompt.storycloze,
    hf_repo="juletxara/xstory_cloze",
    hf_subset="my",
    hf_avail_splits=["training", "eval"],
    evaluation_splits=["eval"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_en_lighteval = LightevalTaskConfig(
    name="xwinograd:en",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="en",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_fr_lighteval = LightevalTaskConfig(
    name="xwinograd:fr",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="fr",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_jp_lighteval = LightevalTaskConfig(
    name="xwinograd:jp",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="jp",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_pt_lighteval = LightevalTaskConfig(
    name="xwinograd:pt",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="pt",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_ru_lighteval = LightevalTaskConfig(
    name="xwinograd:ru",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="ru",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
xwinograd_zh_lighteval = LightevalTaskConfig(
    name="xwinograd:zh",
    suite=["lighteval"],
    prompt_function=prompt.winogrande,
    hf_repo="Muennighoff/xwinograd",
    hf_subset="zh",
    hf_avail_splits=["test"],
    evaluation_splits=["test"],
    few_shots_split=None,
    few_shots_select=None,
    generation_size=-1,
    metric=[Metrics.loglikelihood_acc],
    stop_sequence=["\n"],
    trust_dataset=True,
    version=0,
)
