 initial model & dataset & gemma-7b-it & Llama-2-13b-chat-hf & Llama-2-70b-chat-hf & Mixtral-8x7B-Instruct-v0.1 & Qwen1.5-14B-Chat & Qwen1.5-72B-Chat & gpt-3.5-turbo-0125 & gemini-1.0-pro-001 & claude-3-opus-20240229 & gpt-4-0613 & gpt-4-0125-preview & random & human \\
 & \scalebox{0.9}[1]{MathGen} & 6.6 & \cellcolor[RGB]{211,211,211}{-26.7} & \cellcolor[RGB]{211,211,211}{-0.3} & 3.2 & \cellcolor[RGB]{211,211,211}{-4.7} & 13.7 & \cellcolor[RGB]{211,211,211}{-0.3} & \cellcolor[RGB]{211,211,211}{-21.9} & 9.3 & 2.5 & 0.2 \\
 & \scalebox{0.9}[1]{FgFactV} & 5.0 & \cellcolor[RGB]{211,211,211}{-3.3} & \cellcolor[RGB]{211,211,211}{-0.3} & 6.9 & \cellcolor[RGB]{211,211,211}{-8.3} & 13.2 & \cellcolor[RGB]{211,211,211}{-8.4} & \cellcolor[RGB]{211,211,211}{-15.0} & \cellcolor[RGB]{211,211,211}{-30.8} & \cellcolor[RGB]{211,211,211}{-4.4} & 7.9 \\
 & \scalebox{0.9}[1]{AnsCls} & 3.3 & \cellcolor[RGB]{211,211,211}{-2.8} & 2.3 & \cellcolor[RGB]{211,211,211}{-2.3} & \cellcolor[RGB]{211,211,211}{-7.1} & \cellcolor[RGB]{211,211,211}{-1.7} & \cellcolor[RGB]{211,211,211}{-9.9} & \cellcolor[RGB]{211,211,211}{-31.2} & \cellcolor[RGB]{211,211,211}{-5.5} & \cellcolor[RGB]{211,211,211}{-6.0} & 6.5 \\
 & \scalebox{0.9}[1]{MathGen} & 17.3 & \cellcolor[RGB]{211,211,211}{-31.5} & 0.7 & 3.6 & \cellcolor[RGB]{211,211,211}{-5.2} & 1.9 & \cellcolor[RGB]{211,211,211}{-4.0} & \cellcolor[RGB]{211,211,211}{-22.4} & 5.2 & \cellcolor[RGB]{211,211,211}{-1.4} & \cellcolor[RGB]{211,211,211}{-0.2} \\
 & \scalebox{0.9}[1]{FgFactV} & 0.6 & \cellcolor[RGB]{211,211,211}{-3.1} & 0.4 & 12.9 & \cellcolor[RGB]{211,211,211}{-54.2} & 9.9 & \cellcolor[RGB]{211,211,211}{-37.9} & \cellcolor[RGB]{211,211,211}{-18.5} & 6.2 & \cellcolor[RGB]{211,211,211}{-7.0} & \cellcolor[RGB]{211,211,211}{-6.6} \\
 & \scalebox{0.9}[1]{AnsCls} & 1.1 & \cellcolor[RGB]{211,211,211}{-8.9} & 5.2 & \cellcolor[RGB]{211,211,211}{-21.0} & \cellcolor[RGB]{211,211,211}{-42.2} & 14.4 & \cellcolor[RGB]{211,211,211}{-5.8} & \cellcolor[RGB]{211,211,211}{-17.8} & \cellcolor[RGB]{211,211,211}{-7.2} & \cellcolor[RGB]{211,211,211}{-4.6} & \cellcolor[RGB]{211,211,211}{-1.2} \\
