 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & \cellcolor[RGB]{211,211,211}{41.8} & \cellcolor[RGB]{211,211,211}{47.1} & 54.6 & 55.4 & 55.2 & 52.1 & 62.9 & 51.6 & 63.0 & 70.2 & \textbf{70.4} & 50.5 & 88.2 \\
 & \scalebox{0.9}[1]{FgFactV} & 51.1 & \cellcolor[RGB]{211,211,211}{48.9} & 53.8 & \cellcolor[RGB]{211,211,211}{49.3} & 51.1 & \cellcolor[RGB]{211,211,211}{47.5} & 51.8 & 51.6 & \textbf{57.3} & \cellcolor[RGB]{211,211,211}{47.5} & \cellcolor[RGB]{211,211,211}{49.1} & 50.7 & 94.3 \\
 & \scalebox{0.9}[1]{AnsCls} & 52.5 & 54.1 & \textbf{55.2} & \cellcolor[RGB]{211,211,211}{46.1} & 53.2 & \cellcolor[RGB]{211,211,211}{45.9} & \cellcolor[RGB]{211,211,211}{46.8} & 52.7 & \cellcolor[RGB]{211,211,211}{50.9} & \cellcolor[RGB]{211,211,211}{48.6} & \cellcolor[RGB]{211,211,211}{49.3} & 51.2 & 82.9 \\
 & \scalebox{0.9}[1]{MathGen} & \cellcolor[RGB]{211,211,211}{48.3} & \cellcolor[RGB]{211,211,211}{48.6} & 64.4 & \cellcolor[RGB]{211,211,211}{50.9} & \cellcolor[RGB]{211,211,211}{56.1} & \cellcolor[RGB]{211,211,211}{51.6} & 64.4 & \cellcolor[RGB]{211,211,211}{51.7} & 75.3 & 82.5 & \textbf{83.7} & 62.5 & 97.1 \\
 & \scalebox{0.9}[1]{FgFactV} & \cellcolor[RGB]{211,211,211}{59.5} & \cellcolor[RGB]{211,211,211}{65.9} & \textbf{70.2} & \cellcolor[RGB]{211,211,211}{39.1} & \cellcolor[RGB]{211,211,211}{58.8} & \cellcolor[RGB]{211,211,211}{29.7} & \cellcolor[RGB]{211,211,211}{40.0} & \cellcolor[RGB]{211,211,211}{40.9} & \cellcolor[RGB]{211,211,211}{46.6} & \cellcolor[RGB]{211,211,211}{38.0} & \cellcolor[RGB]{211,211,211}{60.3} & 66.5 & 100.0 \\
 & \scalebox{0.9}[1]{AnsCls} & \cellcolor[RGB]{211,211,211}{35.9} & 66.6 & \cellcolor[RGB]{211,211,211}{50.3} & \cellcolor[RGB]{211,211,211}{32.2} & \cellcolor[RGB]{211,211,211}{47.5} & \cellcolor[RGB]{211,211,211}{24.1} & \cellcolor[RGB]{211,211,211}{23.6} & \cellcolor[RGB]{211,211,211}{26.7} & \cellcolor[RGB]{211,211,211}{34.2} & \cellcolor[RGB]{211,211,211}{58.1} & \textbf{68.6} & 65.1 & 97.1 \\
