 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & 7.8 & \cellcolor[RGB]{211,211,211}{-37.7} & \cellcolor[RGB]{211,211,211}{-1.3} & 2.6 & \cellcolor[RGB]{211,211,211}{-20.8} & 31.2 & \cellcolor[RGB]{211,211,211}{-7.8} & \cellcolor[RGB]{211,211,211}{-42.9} & 16.9 & 2.6 & \cellcolor[RGB]{211,211,211}{-3.9} \\
 & \scalebox{0.9}[1]{FgFactV} & 14.1 & \cellcolor[RGB]{211,211,211}{-15.4} & \cellcolor[RGB]{211,211,211}{-1.3} & 16.7 & \cellcolor[RGB]{211,211,211}{-20.5} & 16.7 & \cellcolor[RGB]{211,211,211}{-19.2} & \cellcolor[RGB]{211,211,211}{-26.9} & \cellcolor[RGB]{211,211,211}{-34.6} & \cellcolor[RGB]{211,211,211}{-2.6} & 5.1 \\
 & \scalebox{0.9}[1]{AnsCls} & 11.1 & \cellcolor[RGB]{211,211,211}{-9.9} & 6.2 & \cellcolor[RGB]{211,211,211}{-6.2} & \cellcolor[RGB]{211,211,211}{-21.0} & \cellcolor[RGB]{211,211,211}{-1.2} & \cellcolor[RGB]{211,211,211}{-11.1} & \cellcolor[RGB]{211,211,211}{-35.8} & \cellcolor[RGB]{211,211,211}{-6.2} & \cellcolor[RGB]{211,211,211}{-3.7} & 4.9 \\
 & \scalebox{0.9}[1]{MathGen} & 18.3 & \cellcolor[RGB]{211,211,211}{-45.0} & 0.8 & 0.8 & \cellcolor[RGB]{211,211,211}{-10.8} & 5.0 & \cellcolor[RGB]{211,211,211}{-11.7} & \cellcolor[RGB]{211,211,211}{-39.2} & 7.5 & \cellcolor[RGB]{211,211,211}{-2.5} & \cellcolor[RGB]{211,211,211}{-1.7} \\
 & \scalebox{0.9}[1]{FgFactV} & 6.3 & \cellcolor[RGB]{211,211,211}{-13.5} & 0.8 & 15.1 & \cellcolor[RGB]{211,211,211}{-71.4} & 8.7 & \cellcolor[RGB]{211,211,211}{-37.3} & \cellcolor[RGB]{211,211,211}{-25.4} & 6.3 & \cellcolor[RGB]{211,211,211}{-6.3} & \cellcolor[RGB]{211,211,211}{-7.1} \\
 & \scalebox{0.9}[1]{AnsCls} & 2.4 & \cellcolor[RGB]{211,211,211}{-21.0} & 16.1 & \cellcolor[RGB]{211,211,211}{-16.1} & \cellcolor[RGB]{211,211,211}{-64.5} & 8.9 & \cellcolor[RGB]{211,211,211}{-3.2} & \cellcolor[RGB]{211,211,211}{-12.9} & \cellcolor[RGB]{211,211,211}{-4.8} & \cellcolor[RGB]{211,211,211}{-4.8} & \cellcolor[RGB]{211,211,211}{-1.6} \\
