
 gpt-4-0613 & math_word_problem_generation & 37.5 & 37.5 & -- & -- \\
 gpt-4-0613 & finegrained_fact_verification & 44.1 & 53.1 & 41.8 & -- \\
 gpt-4-0613 & answerability_classification & 41.1 & -- & -- & 27.5 \\
 Llama-2-70b-chat-hf & math_word_problem_generation & 49.7 & 47.8 & -- & -- \\
 Llama-2-70b-chat-hf & finegrained_fact_verification & 27.8 & 26.4 & 22.9 & -- \\
 Llama-2-70b-chat-hf & answerability_classification & 20.4 & -- & -- & 19.5 \\
