group,MA-f1_mean,MA-f1_std,wMA-f1_mean,wMA-f1_std,MI-f1_mean,MI-f1_std,wMI-f1_mean,wMI-f1_std,dtype,model,MA-f1,wMA-f1,MI-f1,wMI-f1
language_claude_3_5_sonnet_temp0.25,80.0,9.7,79.5,12.9,84.8,5.0,80.5,10.9,language,claude_3_5_sonnet_temp0.25,80.0±9.7,79.5±12.9,84.8±5.0,80.5±10.9
language_claude_3_5_sonnet_temp0.5,79.2,10.1,79.0,12.9,83.7,6.3,80.0,10.9,language,claude_3_5_sonnet_temp0.5,79.2±10.1,79.0±12.9,83.7±6.3,80.0±10.9
language_claude_3_5_sonnet_temp0.75,80.0,10.4,80.1,12.6,84.1,6.8,80.7,10.6,language,claude_3_5_sonnet_temp0.75,80.0±10.4,80.1±12.6,84.1±6.8,80.7±10.6
language_claude_3_5_sonnet_temp1.0,81.7,9.6,81.2,12.3,86.3,4.8,82.2,10.0,language,claude_3_5_sonnet_temp1.0,81.7±9.6,81.2±12.3,86.3±4.8,82.2±10.0
language_claude_3_5_sonnet_temp1e-10,80.0,8.7,79.5,11.5,84.4,5.0,80.5,9.1,language,claude_3_5_sonnet_temp1e-10,80.0±8.7,79.5±11.5,84.4±5.0,80.5±9.1
language_claude_3_haiku_temp0.25,53.5,9.3,62.8,11.9,56.3,6.3,65.2,11.4,language,claude_3_haiku_temp0.25,53.5±9.3,62.8±11.9,56.3±6.3,65.2±11.4
language_claude_3_haiku_temp0.5,50.4,5.2,60.1,10.2,53.0,4.5,63.1,10.1,language,claude_3_haiku_temp0.5,50.4±5.2,60.1±10.2,53.0±4.5,63.1±10.1
language_claude_3_haiku_temp0.75,53.0,6.9,59.2,12.5,57.0,4.2,62.2,11.8,language,claude_3_haiku_temp0.75,53.0±6.9,59.2±12.5,57.0±4.2,62.2±11.8
language_claude_3_haiku_temp1.0,52.7,12.4,59.2,15.3,55.6,10.9,61.5,14.6,language,claude_3_haiku_temp1.0,52.7±12.4,59.2±15.3,55.6±10.9,61.5±14.6
language_claude_3_haiku_temp1e-10,45.8,2.3,55.6,10.4,47.8,2.4,59.0,11.8,language,claude_3_haiku_temp1e-10,45.8±2.3,55.6±10.4,47.8±2.4,59.0±11.8
language_gpt_4o_mini_temp0.25,56.4,5.7,57.3,9.2,60.7,10.0,59.8,6.8,language,gpt_4o_mini_temp0.25,56.4±5.7,57.3±9.2,60.7±10.0,59.8±6.8
language_gpt_4o_mini_temp0.5,54.7,5.8,62.0,11.3,55.2,6.7,63.1,11.8,language,gpt_4o_mini_temp0.5,54.7±5.8,62.0±11.3,55.2±6.7,63.1±11.8
language_gpt_4o_mini_temp0.75,61.5,6.9,68.1,12.3,64.1,7.6,68.5,12.1,language,gpt_4o_mini_temp0.75,61.5±6.9,68.1±12.3,64.1±7.6,68.5±12.1
language_gpt_4o_mini_temp1.0,59.7,5.9,70.3,10.0,61.5,6.0,70.8,10.4,language,gpt_4o_mini_temp1.0,59.7±5.9,70.3±10.0,61.5±6.0,70.8±10.4
language_gpt_4o_mini_temp1e-10,54.9,6.2,63.4,11.2,56.3,6.5,65.6,10.1,language,gpt_4o_mini_temp1e-10,54.9±6.2,63.4±11.2,56.3±6.5,65.6±10.1
language_gpt_4o_temp0,71.9,7.0,81.7,9.1,74.8,9.7,82.1,8.9,language,gpt_4o_temp0,71.9±7.0,81.7±9.1,74.8±9.7,82.1±8.9
language_gpt_4o_temp0.25,74.3,6.7,80.9,13.3,79.3,7.6,82.1,11.0,language,gpt_4o_temp0.25,74.3±6.7,80.9±13.3,79.3±7.6,82.1±11.0
language_gpt_4o_temp0.5,73.1,4.8,83.0,9.2,76.7,8.5,83.3,9.4,language,gpt_4o_temp0.5,73.1±4.8,83.0±9.2,76.7±8.5,83.3±9.4
language_gpt_4o_temp0.75,71.0,5.9,77.2,12.7,75.9,8.6,78.5,10.8,language,gpt_4o_temp0.75,71.0±5.9,77.2±12.7,75.9±8.6,78.5±10.8
language_gpt_4o_temp1.0,70.3,7.7,74.0,14.5,76.3,6.3,76.1,11.6,language,gpt_4o_temp1.0,70.3±7.7,74.0±14.5,76.3±6.3,76.1±11.6
symbolic_claude_3_5_sonnet_temp0.25,73.8,7.3,77.5,8.3,77.4,8.0,77.6,8.3,symbolic,claude_3_5_sonnet_temp0.25,73.8±7.3,77.5±8.3,77.4±8.0,77.6±8.3
symbolic_claude_3_5_sonnet_temp0.5,75.0,6.3,79.8,7.1,78.5,7.5,79.9,7.4,symbolic,claude_3_5_sonnet_temp0.5,75.0±6.3,79.8±7.1,78.5±7.5,79.9±7.4
symbolic_claude_3_5_sonnet_temp0.75,69.1,9.3,73.9,12.2,72.6,11.0,74.1,12.2,symbolic,claude_3_5_sonnet_temp0.75,69.1±9.3,73.9±12.2,72.6±11.0,74.1±12.2
symbolic_claude_3_5_sonnet_temp1.0,70.5,9.4,73.4,14.0,75.9,8.0,74.2,12.0,symbolic,claude_3_5_sonnet_temp1.0,70.5±9.4,73.4±14.0,75.9±8.0,74.2±12.0
symbolic_claude_3_5_sonnet_temp1e-10,73.5,7.8,77.3,10.2,76.7,8.0,77.4,10.3,symbolic,claude_3_5_sonnet_temp1e-10,73.5±7.8,77.3±10.2,76.7±8.0,77.4±10.3
symbolic_claude_3_haiku_temp0.25,36.8,5.1,41.6,8.9,40.0,4.4,47.7,7.3,symbolic,claude_3_haiku_temp0.25,36.8±5.1,41.6±8.9,40.0±4.4,47.7±7.3
symbolic_claude_3_haiku_temp0.5,32.6,6.3,37.4,8.2,37.4,4.9,44.8,7.1,symbolic,claude_3_haiku_temp0.5,32.6±6.3,37.4±8.2,37.4±4.9,44.8±7.1
symbolic_claude_3_haiku_temp0.75,32.9,7.5,38.5,11.4,37.4,6.8,47.5,9.1,symbolic,claude_3_haiku_temp0.75,32.9±7.5,38.5±11.4,37.4±6.8,47.5±9.1
symbolic_claude_3_haiku_temp1.0,34.6,7.5,40.0,10.9,38.9,6.9,48.3,11.3,symbolic,claude_3_haiku_temp1.0,34.6±7.5,40.0±10.9,38.9±6.9,48.3±11.3
symbolic_claude_3_haiku_temp1e-10,33.0,5.1,38.3,9.9,36.3,2.6,45.6,7.3,symbolic,claude_3_haiku_temp1e-10,33.0±5.1,38.3±9.9,36.3±2.6,45.6±7.3
symbolic_gpt_4o_mini_temp0.25,35.7,10.2,43.4,19.9,39.6,12.7,48.6,19.8,symbolic,gpt_4o_mini_temp0.25,35.7±10.2,43.4±19.9,39.6±12.7,48.6±19.8
symbolic_gpt_4o_mini_temp0.5,36.9,7.3,38.9,8.5,40.0,9.4,43.6,9.4,symbolic,gpt_4o_mini_temp0.5,36.9±7.3,38.9±8.5,40.0±9.4,43.6±9.4
symbolic_gpt_4o_mini_temp0.75,37.5,7.9,41.2,14.7,40.4,8.2,45.5,14.4,symbolic,gpt_4o_mini_temp0.75,37.5±7.9,41.2±14.7,40.4±8.2,45.5±14.4
symbolic_gpt_4o_mini_temp1.0,39.6,7.2,45.0,13.6,42.2,7.5,48.7,13.3,symbolic,gpt_4o_mini_temp1.0,39.6±7.2,45.0±13.6,42.2±7.5,48.7±13.3
symbolic_gpt_4o_mini_temp1e-10,35.4,8.9,37.3,6.5,40.0,13.2,42.2,7.0,symbolic,gpt_4o_mini_temp1e-10,35.4±8.9,37.3±6.5,40.0±13.2,42.2±7.0
symbolic_gpt_4o_temp0,51.8,10.8,60.3,21.6,56.3,13.3,62.4,20.7,symbolic,gpt_4o_temp0,51.8±10.8,60.3±21.6,56.3±13.3,62.4±20.7
symbolic_gpt_4o_temp0.25,48.1,10.8,53.0,21.0,52.6,12.2,55.5,20.2,symbolic,gpt_4o_temp0.25,48.1±10.8,53.0±21.0,52.6±12.2,55.5±20.2
symbolic_gpt_4o_temp0.5,51.2,8.1,59.9,19.3,55.9,9.2,62.1,18.5,symbolic,gpt_4o_temp0.5,51.2±8.1,59.9±19.3,55.9±9.2,62.1±18.5
symbolic_gpt_4o_temp0.75,46.9,7.8,56.6,18.4,50.7,9.2,59.2,18.6,symbolic,gpt_4o_temp0.75,46.9±7.8,56.6±18.4,50.7±9.2,59.2±18.6
symbolic_gpt_4o_temp1.0,45.5,8.2,53.6,20.5,50.0,10.1,56.6,19.6,symbolic,gpt_4o_temp1.0,45.5±8.2,53.6±20.5,50.0±10.1,56.6±19.6
