benchamrk,,,,,,,,,,,
,base,dpo,,,,,simpo,,,,
,,"system1 (100)
system2(0)","system1 (75)
system2(25)","system1 (50)
system2 (50)","system1 (25)
system2 (75)","system1 (0)
system2 (100)","system1 (100)
system2(0)","system1 (75)
system2(25)","system1 (50)
system2 (50)","system1 (25)
system2 (75)","system1 (0)
system2 (100)"
MultiArith (Arithmetic),97.67,98.5,98.51,98.61,98.62,98.67,97.5,97.57,97.69,97.79,97.83
GSM8K (Arithmetic),78.49,77.01,77.5,78.19,78.87,79.37,77.79,78.49,78.63,78.99,79.38
AddSub (Arithmetic),82.47,80.76,83.23,85.32,87.4,89.87,80.51,81.9,85.32,88.74,90.13
Coin (Symbolic),94.2,93.4,93.49,93.7,93.72,93.8,90,91.1,92.4,93.3,94.4
CSQA (Common Sense),71.42,72.81,72.36,72.12,71.55,71.42,72.32,71.56,70.97,70.09,69.62
AQuA (Arithmetic),48.82,46.46,47.13,47.48,48.54,49.21,48.03,49.69,51.52,53.06,54.72
SingleEq (Arithmetic),90.72,77.24,79.53,84.81,90.08,94.37,87.4,88.16,90.49,93.73,94.49
SVAMP (Arithmetic),80.5,78,79.68,81.7,84.55,85.4,79.3,79.35,80.55,81.1,81.7
Letter (Symbolic),84,83.8,83.8,84,85.8,86.2,83.8,84,84.4,84.55,84.8
Strategy (Common Sense),67.55,68.21,66.35,64.54,62.72,60.87,67.73,67.65,67.51,67.46,67.38