                 initial model &                        dataset &                    gemma-7b-it &            Llama-2-13b-chat-hf &            Llama-2-70b-chat-hf &     Mixtral-8x7B-Instruct-v0.1 &               Qwen1.5-14B-Chat &               Qwen1.5-72B-Chat &             gpt-3.5-turbo-0125 &             gemini-1.0-pro-001 &         claude-3-opus-20240229 &                     gpt-4-0613 &             gpt-4-0125-preview &                         random &                          human  \\
                               &     \scalebox{0.9}[1]{MathGen} &                            4.4 & \cellcolor[RGB]{211,211,211}{-12.5} &                            0.1 &                            4.1 &                            3.5 & \cellcolor[RGB]{211,211,211}{-5.0} &                            3.3 &                           11.0 & \cellcolor[RGB]{211,211,211}{-11.4} &                            2.0 &                            6.4  \\
                               &     \scalebox{0.9}[1]{FgFactV} &                            0.9 &                            1.8 &                            0.1 &                            2.1 & \cellcolor[RGB]{211,211,211}{-1.0} &                            5.9 &                            3.1 &                            3.9 & \cellcolor[RGB]{211,211,211}{-10.3} & \cellcolor[RGB]{211,211,211}{-4.2} &                           14.9  \\
                               &      \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{-1.7} &                            0.1 &                            0.7 &                            8.2 &                            0.8 & \cellcolor[RGB]{211,211,211}{-1.6} &                           12.9 & \cellcolor[RGB]{211,211,211}{-9.1} &                            2.7 & \cellcolor[RGB]{211,211,211}{-10.5} & \cellcolor[RGB]{211,211,211}{-8.1}  \\
                               &     \scalebox{0.9}[1]{MathGen} &                           11.7 & \cellcolor[RGB]{211,211,211}{-3.0} &                            0.7 &                            7.7 & \cellcolor[RGB]{211,211,211}{-0.4} & \cellcolor[RGB]{211,211,211}{-2.6} &                            3.2 &                           12.4 &                            2.4 & \cellcolor[RGB]{211,211,211}{-0.2} &                            1.2  \\
                               &     \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{-5.1} &                            5.4 &                            0.1 &                            2.6 & \cellcolor[RGB]{211,211,211}{-14.2} &                            1.2 &                            1.3 &                           10.8 &                            1.8 &                            4.5 & \cellcolor[RGB]{211,211,211}{-3.5}  \\
                               &      \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{-7.2} &                            1.3 & \cellcolor[RGB]{211,211,211}{-3.2} & \cellcolor[RGB]{211,211,211}{-1.3} & \cellcolor[RGB]{211,211,211}{-1.1} &                           15.0 &                            5.0 &                           11.1 &                            0.0 & \cellcolor[RGB]{211,211,211}{-0.5} & \cellcolor[RGB]{211,211,211}{-0.2}  \\
