
 gpt-4-0613 & math_word_problem_generation & 75.0 & 75.4 & -- & -- \\
 gpt-4-0613 & finegrained_fact_verification & 87.5 & 75.0 & 80.8 & -- \\
 gpt-4-0613 & answerability_classification & 82.1 & -- & -- & 77.1 \\
 Llama-2-70b-chat-hf & math_word_problem_generation & 72.5 & 73.0 & -- & -- \\
 Llama-2-70b-chat-hf & finegrained_fact_verification & 86.0 & 83.1 & 79.1 & -- \\
 Llama-2-70b-chat-hf & answerability_classification & 45.0 & -- & -- & 47.6 \\
