
                    gpt-4-0613 &   math_word_problem_generation &                           52.5 &                           49.1 &                             -- &                             --  \\
                    gpt-4-0613 &  finegrained_fact_verification &                           48.5 &                           40.1 &                           40.9 &                             --  \\
                    gpt-4-0613 &   answerability_classification &                           42.1 &                             -- &                             -- &                           36.7  \\
           Llama-2-70b-chat-hf &   math_word_problem_generation &                           63.7 &                           61.6 &                             -- &                             --  \\
           Llama-2-70b-chat-hf &  finegrained_fact_verification &                           45.8 &                           46.1 &                           44.8 &                             --  \\
           Llama-2-70b-chat-hf &   answerability_classification &                           35.3 &                             -- &                             -- &                           31.1  \\
