                 initial model &                        dataset &                    gemma-7b-it &            Llama-2-13b-chat-hf &            Llama-2-70b-chat-hf &     Mixtral-8x7B-Instruct-v0.1 &               Qwen1.5-14B-Chat &               Qwen1.5-72B-Chat &             gpt-3.5-turbo-0125 &             gemini-1.0-pro-001 &         claude-3-opus-20240229 &                     gpt-4-0613 &             gpt-4-0125-preview &                         random &                          human  \\
                               &     \scalebox{0.9}[1]{MathGen} &                            7.8 & \cellcolor[RGB]{211,211,211}{-37.7} & \cellcolor[RGB]{211,211,211}{-1.3} &                            2.6 & \cellcolor[RGB]{211,211,211}{-20.8} &                           31.2 & \cellcolor[RGB]{211,211,211}{-7.8} & \cellcolor[RGB]{211,211,211}{-42.9} &                           16.9 &                            2.6 & \cellcolor[RGB]{211,211,211}{-3.9}  \\
                               &     \scalebox{0.9}[1]{FgFactV} &                           14.1 & \cellcolor[RGB]{211,211,211}{-15.4} & \cellcolor[RGB]{211,211,211}{-1.3} &                           16.7 & \cellcolor[RGB]{211,211,211}{-20.5} &                           16.7 & \cellcolor[RGB]{211,211,211}{-19.2} & \cellcolor[RGB]{211,211,211}{-26.9} & \cellcolor[RGB]{211,211,211}{-34.6} & \cellcolor[RGB]{211,211,211}{-2.6} &                            5.1  \\
                               &      \scalebox{0.9}[1]{AnsCls} &                           11.1 & \cellcolor[RGB]{211,211,211}{-9.9} &                            6.2 & \cellcolor[RGB]{211,211,211}{-6.2} & \cellcolor[RGB]{211,211,211}{-21.0} & \cellcolor[RGB]{211,211,211}{-1.2} & \cellcolor[RGB]{211,211,211}{-11.1} & \cellcolor[RGB]{211,211,211}{-35.8} & \cellcolor[RGB]{211,211,211}{-6.2} & \cellcolor[RGB]{211,211,211}{-3.7} &                            4.9  \\
                               &     \scalebox{0.9}[1]{MathGen} &                           18.3 & \cellcolor[RGB]{211,211,211}{-45.0} &                            0.8 &                            0.8 & \cellcolor[RGB]{211,211,211}{-10.8} &                            5.0 & \cellcolor[RGB]{211,211,211}{-11.7} & \cellcolor[RGB]{211,211,211}{-39.2} &                            7.5 & \cellcolor[RGB]{211,211,211}{-2.5} & \cellcolor[RGB]{211,211,211}{-1.7}  \\
                               &     \scalebox{0.9}[1]{FgFactV} &                            6.3 & \cellcolor[RGB]{211,211,211}{-13.5} &                            0.8 &                           15.1 & \cellcolor[RGB]{211,211,211}{-71.4} &                            8.7 & \cellcolor[RGB]{211,211,211}{-37.3} & \cellcolor[RGB]{211,211,211}{-25.4} &                            6.3 & \cellcolor[RGB]{211,211,211}{-6.3} & \cellcolor[RGB]{211,211,211}{-7.1}  \\
                               &      \scalebox{0.9}[1]{AnsCls} &                            2.4 & \cellcolor[RGB]{211,211,211}{-21.0} &                           16.1 & \cellcolor[RGB]{211,211,211}{-16.1} & \cellcolor[RGB]{211,211,211}{-64.5} &                            8.9 & \cellcolor[RGB]{211,211,211}{-3.2} & \cellcolor[RGB]{211,211,211}{-12.9} & \cellcolor[RGB]{211,211,211}{-4.8} & \cellcolor[RGB]{211,211,211}{-4.8} & \cellcolor[RGB]{211,211,211}{-1.6}  \\
