
                    gpt-4-0613 &   math_word_problem_generation &                           25.9 &                           23.1 &                             -- &                             --  \\
                    gpt-4-0613 &  finegrained_fact_verification &                           19.9 &                           15.6 &                           18.3 &                             --  \\
                    gpt-4-0613 &   answerability_classification &                           17.9 &                             -- &                             -- &                            6.7  \\
           Llama-2-70b-chat-hf &   math_word_problem_generation &                           41.1 &                           41.8 &                             -- &                             --  \\
           Llama-2-70b-chat-hf &  finegrained_fact_verification &                           11.5 &                           14.8 &                           12.0 &                             --  \\
           Llama-2-70b-chat-hf &   answerability_classification &                            2.1 &                             -- &                             -- &                            2.1  \\
