name: all
version: 1.0
description: Config of all benchmark in model evaluation.

device: cuda:0
model_format: hf
batch_size: 8
seed: 10
add_bos_token: true
# auto, float, half
dtype: auto
tasks:
  - hellaswag
  - sciq
  - arc_easy
  - arc_challenge
  - openbookqa
  - piqa
  - winogrande
  - lambada_openai
  - gsm8k
  - mathqa
  - gpqa_diamond_zeroshot
  - humaneval
  - mbpp
