
 gpt-4-0613 & math_word_problem_generation & 52.5 & 49.1 & -- & -- \\
 gpt-4-0613 & finegrained_fact_verification & 48.5 & 40.1 & 40.9 & -- \\
 gpt-4-0613 & answerability_classification & 42.1 & -- & -- & 36.7 \\
 Llama-2-70b-chat-hf & math_word_problem_generation & 63.7 & 61.6 & -- & -- \\
 Llama-2-70b-chat-hf & finegrained_fact_verification & 45.8 & 46.1 & 44.8 & -- \\
 Llama-2-70b-chat-hf & answerability_classification & 35.3 & -- & -- & 31.1 \\
