 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & \cellcolor[RGB]{211,211,211}{50.0} & \cellcolor[RGB]{211,211,211}{49.3} & 62.1 & 60.7 & 66.4 & \cellcolor[RGB]{211,211,211}{49.3} & 63.6 & \cellcolor[RGB]{211,211,211}{46.4} & 60.7 & \textbf{72.1} & 72.1 & 50.5 & 88.2 \\
 & \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{48.6} & \cellcolor[RGB]{211,211,211}{42.9} & 55.7 & \textbf{60.0} & 55.0 & \cellcolor[RGB]{211,211,211}{49.3} & \cellcolor[RGB]{211,211,211}{49.3} & \cellcolor[RGB]{211,211,211}{47.9} & 50.7 & \cellcolor[RGB]{211,211,211}{47.9} & \cellcolor[RGB]{211,211,211}{49.3} & 50.7 & 94.3 \\
 & \scalebox{0.9}[1]{AnsCls} & 53.6 & \cellcolor[RGB]{211,211,211}{50.0} & 53.6 & \cellcolor[RGB]{211,211,211}{49.3} & \textbf{56.4} & \cellcolor[RGB]{211,211,211}{44.3} & \cellcolor[RGB]{211,211,211}{49.3} & \cellcolor[RGB]{211,211,211}{47.1} & \cellcolor[RGB]{211,211,211}{50.7} & \cellcolor[RGB]{211,211,211}{50.0} & \cellcolor[RGB]{211,211,211}{49.3} & 51.2 & 82.9 \\
 & \scalebox{0.9}[1]{MathGen} & 66.2 & \cellcolor[RGB]{211,211,211}{31.2} & 78.1 & \cellcolor[RGB]{211,211,211}{59.4} & 68.8 & \cellcolor[RGB]{211,211,211}{40.6} & 73.1 & \cellcolor[RGB]{211,211,211}{43.1} & 73.8 & \textbf{84.4} & 83.1 & 62.5 & 97.1 \\
 & \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{60.6} & \cellcolor[RGB]{211,211,211}{54.4} & 70.6 & \cellcolor[RGB]{211,211,211}{51.9} & \textbf{78.8} & \cellcolor[RGB]{211,211,211}{29.4} & \cellcolor[RGB]{211,211,211}{35.6} & \cellcolor[RGB]{211,211,211}{29.4} & \cellcolor[RGB]{211,211,211}{34.4} & \cellcolor[RGB]{211,211,211}{46.2} & 68.8 & 66.5 & 100.0 \\
 & \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{37.5} & \cellcolor[RGB]{211,211,211}{45.0} & \cellcolor[RGB]{211,211,211}{35.0} & \cellcolor[RGB]{211,211,211}{34.4} & 66.9 & \cellcolor[RGB]{211,211,211}{23.1} & \cellcolor[RGB]{211,211,211}{23.8} & \cellcolor[RGB]{211,211,211}{24.4} & \cellcolor[RGB]{211,211,211}{33.1} & \cellcolor[RGB]{211,211,211}{60.0} & \textbf{75.0} & 65.1 & 97.1 \\
