python eval_llm.py \
    --model hf-causal-experimental \
    --model_args pretrained=huggyllama/llama-7b \
    --tasks openbookqa,arc_easy,winogrande,hellaswag,arc_challenge,piqa,boolq \
    --device cuda 