
 gpt-4-0613 & math_word_problem_generation & 69.6 & 64.8 & -- & -- \\
 gpt-4-0613 & finegrained_fact_verification & 48.5 & 18.8 & 33.2 & -- \\
 gpt-4-0613 & answerability_classification & 29.5 & -- & -- & 16.7 \\
 Llama-2-70b-chat-hf & math_word_problem_generation & 73.1 & 69.3 & -- & -- \\
 Llama-2-70b-chat-hf & finegrained_fact_verification & 23.0 & 28.9 & 26.7 & -- \\
 Llama-2-70b-chat-hf & answerability_classification & 1.2 & -- & -- & 2.4 \\
