
                    gpt-4-0613 &   math_word_problem_generation &                           75.0 &                           75.4 &                             -- &                             --  \\
                    gpt-4-0613 &  finegrained_fact_verification &                           87.5 &                           75.0 &                           80.8 &                             --  \\
                    gpt-4-0613 &   answerability_classification &                           82.1 &                             -- &                             -- &                           77.1  \\
           Llama-2-70b-chat-hf &   math_word_problem_generation &                           72.5 &                           73.0 &                             -- &                             --  \\
           Llama-2-70b-chat-hf &  finegrained_fact_verification &                           86.0 &                           83.1 &                           79.1 &                             --  \\
           Llama-2-70b-chat-hf &   answerability_classification &                           45.0 &                             -- &                             -- &                           47.6  \\
