TASKS="boolq,piqa,hellaswag,winogrande,arc_easy,arc_challenge,openbookqa,mmlu"

lm_eval \
  --model hf \
  --model_args pretrained=../phi-4-local,dtype="bfloat16" \
  --tasks "$TASKS" \
  --num_fewshot 0 \
  --batch_size 16 \
  --output_path ./tf_phi-4_p50.json