 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & 64.9 & 69.1 & 71.0 & \cellcolor[RGB]{211,211,211}{45.0} & \cellcolor[RGB]{211,211,211}{29.7} & \cellcolor[RGB]{211,211,211}{37.4} & 64.7 & 67.1 & 67.7 & 63.9 & \textbf{72.5} & 55.0 & 90.0 \\
 & \scalebox{0.9}[1]{FgFactV} & 62.4 & 69.2 & \textbf{71.6} & \cellcolor[RGB]{211,211,211}{29.9} & \cellcolor[RGB]{211,211,211}{53.3} & \cellcolor[RGB]{211,211,211}{19.4} & \cellcolor[RGB]{211,211,211}{50.0} & 58.4 & 63.4 & \cellcolor[RGB]{211,211,211}{9.6} & \cellcolor[RGB]{211,211,211}{24.7} & 55.7 & 95.5 \\
 & \scalebox{0.9}[1]{AnsCls} & 61.3 & \textbf{74.1} & 70.8 & \cellcolor[RGB]{211,211,211}{37.0} & \cellcolor[RGB]{211,211,211}{47.6} & \cellcolor[RGB]{211,211,211}{26.8} & \cellcolor[RGB]{211,211,211}{26.5} & \cellcolor[RGB]{211,211,211}{53.6} & \cellcolor[RGB]{211,211,211}{46.3} & \cellcolor[RGB]{211,211,211}{19.8} & \cellcolor[RGB]{211,211,211}{21.7} & 57.9 & 85.7 \\
 & \scalebox{0.9}[1]{MathGen} & 84.6 & 79.5 & 85.3 & \cellcolor[RGB]{211,211,211}{58.8} & \cellcolor[RGB]{211,211,211}{40.8} & \cellcolor[RGB]{211,211,211}{63.7} & \cellcolor[RGB]{211,211,211}{72.2} & \cellcolor[RGB]{211,211,211}{74.5} & 86.8 & 88.1 & \textbf{90.2} & 75.0 & 98.3 \\
 & \scalebox{0.9}[1]{FgFactV} & 80.2 & 84.6 & \textbf{85.3} & \cellcolor[RGB]{211,211,211}{18.7} & \cellcolor[RGB]{211,211,211}{57.1} & \cellcolor[RGB]{211,211,211}{11.9} & \cellcolor[RGB]{211,211,211}{29.7} & \cellcolor[RGB]{211,211,211}{59.2} & \cellcolor[RGB]{211,211,211}{74.9} & \cellcolor[RGB]{211,211,211}{38.3} & \cellcolor[RGB]{211,211,211}{66.7} & 78.8 & 100.0 \\
 & \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{44.4} & \textbf{85.2} & 81.8 & \cellcolor[RGB]{211,211,211}{41.7} & \cellcolor[RGB]{211,211,211}{6.2} & \cellcolor[RGB]{211,211,211}{4.7} & \cellcolor[RGB]{211,211,211}{1.6} & \cellcolor[RGB]{211,211,211}{20.5} & \cellcolor[RGB]{211,211,211}{33.6} & \cellcolor[RGB]{211,211,211}{61.2} & \cellcolor[RGB]{211,211,211}{73.0} & 77.5 & 98.3 \\
