                 initial model &                        dataset &                    gemma-7b-it &            Llama-2-13b-chat-hf &            Llama-2-70b-chat-hf &     Mixtral-8x7B-Instruct-v0.1 &               Qwen1.5-14B-Chat &               Qwen1.5-72B-Chat &             gpt-3.5-turbo-0125 &             gemini-1.0-pro-001 &         claude-3-opus-20240229 &                     gpt-4-0613 &             gpt-4-0125-preview &                         random &                          human  \\
                               &     \scalebox{0.9}[1]{MathGen} &                           25.7 & \cellcolor[RGB]{211,211,211}{-2.9} &                            0.0 &                            5.7 & \cellcolor[RGB]{211,211,211}{-0.7} &                            2.1 &                            2.1 & \cellcolor[RGB]{211,211,211}{-1.4} &                            2.1 &                            2.1 &                            2.1  \\
                               &     \scalebox{0.9}[1]{FgFactV} &                            5.7 &                            1.4 &                            0.0 &                            7.9 & \cellcolor[RGB]{211,211,211}{-3.6} &                            7.9 & \cellcolor[RGB]{211,211,211}{-1.4} & \cellcolor[RGB]{211,211,211}{-2.1} & \cellcolor[RGB]{211,211,211}{-14.3} & \cellcolor[RGB]{211,211,211}{-1.4} &                            4.3  \\
                               &      \scalebox{0.9}[1]{AnsCls} &                            0.7 & \cellcolor[RGB]{211,211,211}{-1.4} &                            2.1 &                            1.4 & \cellcolor[RGB]{211,211,211}{-3.6} & \cellcolor[RGB]{211,211,211}{-0.7} &                            1.4 & \cellcolor[RGB]{211,211,211}{-15.0} & \cellcolor[RGB]{211,211,211}{-0.7} & \cellcolor[RGB]{211,211,211}{-2.9} &                            1.4  \\
                               &     \scalebox{0.9}[1]{MathGen} &                           19.4 & \cellcolor[RGB]{211,211,211}{-20.6} &                            1.2 &                            5.6 & \cellcolor[RGB]{211,211,211}{-8.8} &                            1.2 & \cellcolor[RGB]{211,211,211}{-3.7} & \cellcolor[RGB]{211,211,211}{-16.9} &                            6.9 & \cellcolor[RGB]{211,211,211}{-1.9} &                            0.0  \\
                               &     \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{-1.3} & \cellcolor[RGB]{211,211,211}{-3.1} &                            0.6 &                           11.9 & \cellcolor[RGB]{211,211,211}{-49.4} &                            6.9 & \cellcolor[RGB]{211,211,211}{-28.8} & \cellcolor[RGB]{211,211,211}{-11.2} &                            5.6 & \cellcolor[RGB]{211,211,211}{-3.1} & \cellcolor[RGB]{211,211,211}{-6.9}  \\
                               &      \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{-1.3} & \cellcolor[RGB]{211,211,211}{-10.6} &                            5.6 & \cellcolor[RGB]{211,211,211}{-10.0} & \cellcolor[RGB]{211,211,211}{-37.5} &                            7.5 & \cellcolor[RGB]{211,211,211}{-1.3} & \cellcolor[RGB]{211,211,211}{-5.0} & \cellcolor[RGB]{211,211,211}{-3.8} & \cellcolor[RGB]{211,211,211}{-3.7} & \cellcolor[RGB]{211,211,211}{-1.2}  \\
