 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & 65.6 & \cellcolor[RGB]{211,211,211}{51.9} & \textbf{74.5} & 57.8 & 72.5 & \cellcolor[RGB]{211,211,211}{22.0} & 70.9 & \cellcolor[RGB]{211,211,211}{19.4} & \cellcolor[RGB]{211,211,211}{45.5} & 68.8 & 71.9 & 55.0 & 90.0 \\
 & \scalebox{0.9}[1]{FgFactV} & 57.6 & \cellcolor[RGB]{211,211,211}{52.1} & \textbf{69.7} & 64.1 & 69.0 & \cellcolor[RGB]{211,211,211}{29.7} & \cellcolor[RGB]{211,211,211}{49.6} & \cellcolor[RGB]{211,211,211}{34.2} & \cellcolor[RGB]{211,211,211}{35.5} & \cellcolor[RGB]{211,211,211}{14.1} & \cellcolor[RGB]{211,211,211}{23.7} & 55.7 & 95.5 \\
 & \scalebox{0.9}[1]{AnsCls} & 62.0 & 62.9 & \textbf{67.7} & \cellcolor[RGB]{211,211,211}{48.5} & 67.0 & \cellcolor[RGB]{211,211,211}{11.4} & \cellcolor[RGB]{211,211,211}{39.3} & \cellcolor[RGB]{211,211,211}{21.3} & \cellcolor[RGB]{211,211,211}{34.3} & \cellcolor[RGB]{211,211,211}{25.5} & \cellcolor[RGB]{211,211,211}{23.7} & 57.9 & 85.7 \\
 & \scalebox{0.9}[1]{MathGen} & 78.4 & \cellcolor[RGB]{211,211,211}{31.2} & 87.3 & \cellcolor[RGB]{211,211,211}{69.0} & 75.5 & \cellcolor[RGB]{211,211,211}{36.2} & 81.7 & \cellcolor[RGB]{211,211,211}{40.5} & 79.8 & \textbf{89.1} & 88.7 & 75.0 & 98.3 \\
 & \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{70.4} & \cellcolor[RGB]{211,211,211}{69.4} & 81.4 & \cellcolor[RGB]{211,211,211}{57.8} & \textbf{87.3} & \cellcolor[RGB]{211,211,211}{18.7} & \cellcolor[RGB]{211,211,211}{31.8} & \cellcolor[RGB]{211,211,211}{22.1} & \cellcolor[RGB]{211,211,211}{28.6} & \cellcolor[RGB]{211,211,211}{51.1} & \cellcolor[RGB]{211,211,211}{76.6} & 78.8 & 100.0 \\
 & \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{41.2} & \cellcolor[RGB]{211,211,211}{53.5} & \cellcolor[RGB]{211,211,211}{34.2} & \cellcolor[RGB]{211,211,211}{37.6} & 79.2 & \cellcolor[RGB]{211,211,211}{1.6} & \cellcolor[RGB]{211,211,211}{3.2} & \cellcolor[RGB]{211,211,211}{6.2} & \cellcolor[RGB]{211,211,211}{24.1} & \cellcolor[RGB]{211,211,211}{66.7} & \textbf{82.3} & 77.5 & 98.3 \\
