\begin{table}
\caption{Benchmark Resuts. Autogenerated on 2024-07-06T04:46:57.307158}
\begin{tabular}{llrrrrrr}
\toprule
 & \rotatebox{90}{program\_name} & \rotatebox{90}{Llama\_3\_70B\_Instruct} & \rotatebox{90}{Llama\_3\_8B\_Instruct} & \rotatebox{90}{Claude\_3\_haiku} & \rotatebox{90}{claude\_3\_sonnet\_20240229} & \rotatebox{90}{gpt\_3\_5\_turbo} & \rotatebox{90}{gpt\_4o\_2024\_05\_13} \\
\midrule
0 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_002\_00060 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FB7D5D}} \color[HTML]{F1F1F1} 0.88 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 \\
1 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_035\_00693 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 \\
2 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_053\_01044 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
3 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_061\_01196 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 \\
4 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_064\_01254 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 \\
5 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_065\_01278 & {\cellcolor[HTML]{FDC7B2}} \color[HTML]{000000} 0.42 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 \\
6 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_067\_01313 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
7 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_078\_01513 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
8 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_079\_01539 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
9 & claude-3-haiku-20240307\_\_synth\_bandit\_self\_score\_mixed\_iter\_088\_01713 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
10 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_045\_00851 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 \\
11 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_046\_00860 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 \\
12 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_054\_01013 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEDCCD}} \color[HTML]{000000} 0.28 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
13 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_066\_01216 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 \\
14 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_073\_01353 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCB89E}} \color[HTML]{000000} 0.52 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
15 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_080\_01481 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FCA183}} \color[HTML]{000000} 0.66 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
16 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_085\_01565 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
17 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_086\_01580 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 \\
18 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_092\_01700 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 \\
19 & claude-3-sonnet-20240229\_\_synth\_bandit\_self\_score\_mixed\_iter\_094\_01728 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
20 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_040\_00717 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 \\
21 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_041\_00725 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 \\
22 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_041\_00727 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FDD7C6}} \color[HTML]{000000} 0.32 \\
23 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_042\_00734 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 \\
24 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_042\_00737 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 \\
25 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_042\_00743 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 \\
26 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_043\_00753 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 \\
27 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_046\_00803 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCA183}} \color[HTML]{000000} 0.66 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
28 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_089\_01525 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 \\
29 & gpt-3.5-turbo\_\_synth\_bandit\_self\_score\_mixed\_iter\_089\_01537 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 \\
30 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_000\_00001 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
31 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_003\_00077 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
32 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_027\_00547 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 \\
33 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_034\_00676 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 \\
34 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_035\_00706 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
35 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_039\_00780 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 \\
36 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_039\_00785 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 \\
37 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_040\_00795 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FB7D5D}} \color[HTML]{F1F1F1} 0.88 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 \\
38 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_041\_00819 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 \\
39 & gpt-4o-2024-05-13\_\_synth\_bandit\_offspring\_score\_mixed\_iter\_041\_00823 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 \\
40 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_026\_00496 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FDC7B2}} \color[HTML]{000000} 0.42 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FB7A5A}} \color[HTML]{F1F1F1} 0.90 & {\cellcolor[HTML]{FB7353}} \color[HTML]{F1F1F1} 0.94 \\
41 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_026\_00500 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 \\
42 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_026\_00504 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 \\
43 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_062\_01139 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
44 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_062\_01148 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FB7D5D}} \color[HTML]{F1F1F1} 0.88 \\
45 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_063\_01161 & {\cellcolor[HTML]{FCB89E}} \color[HTML]{000000} 0.52 & {\cellcolor[HTML]{FCA183}} \color[HTML]{000000} 0.66 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 \\
46 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_073\_01341 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 \\
47 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_088\_01604 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 & {\cellcolor[HTML]{FCBEA5}} \color[HTML]{000000} 0.48 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 \\
48 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_089\_01622 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 \\
49 & gpt-4o-2024-05-13\_\_synth\_bandit\_random\_mixed\_iter\_090\_01635 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 \\
50 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_007\_00142 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDC7B2}} \color[HTML]{000000} 0.42 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 \\
51 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_028\_00573 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 \\
52 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_037\_00751 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 \\
53 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_040\_00809 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 \\
54 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_048\_00977 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 \\
55 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_059\_01186 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 \\
56 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_068\_01364 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 \\
57 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_073\_01467 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 \\
58 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_077\_01545 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 \\
59 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_lle\_iter\_077\_01550 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FDC7B2}} \color[HTML]{000000} 0.42 \\
60 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_022\_00439 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
61 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_028\_00572 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FB7D5D}} \color[HTML]{F1F1F1} 0.88 \\
62 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_037\_00739 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 \\
63 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_038\_00750 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 \\
64 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_053\_01041 & {\cellcolor[HTML]{FCA183}} \color[HTML]{000000} 0.66 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FB7757}} \color[HTML]{F1F1F1} 0.92 \\
65 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_053\_01052 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 \\
66 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_057\_01130 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FEDCCD}} \color[HTML]{000000} 0.28 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 \\
67 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_080\_01570 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 \\
68 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_081\_01592 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 \\
69 & gpt-4o-2024-05-13\_\_synth\_bandit\_self\_score\_mixed\_iter\_091\_01776 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCC2AA}} \color[HTML]{000000} 0.46 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 \\
70 & handcrafted\_02 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 \\
71 & handcrafted\_03 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 \\
72 & handcrafted\_04 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 \\
73 & handcrafted\_05 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 \\
74 & handcrafted\_06\_persuasion & {\cellcolor[HTML]{FEDFD0}} \color[HTML]{000000} 0.26 & {\cellcolor[HTML]{FEDCCD}} \color[HTML]{000000} 0.28 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 \\
75 & identity & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
76 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_034\_00659 & {\cellcolor[HTML]{FCBEA5}} \color[HTML]{000000} 0.48 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
77 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_034\_00665 & {\cellcolor[HTML]{FCBEA5}} \color[HTML]{000000} 0.48 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
78 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_034\_00667 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FB7353}} \color[HTML]{F1F1F1} 0.94 \\
79 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_038\_00734 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 \\
80 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_039\_00747 & {\cellcolor[HTML]{FB7D5D}} \color[HTML]{F1F1F1} 0.88 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
81 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_040\_00764 & {\cellcolor[HTML]{FB7A5A}} \color[HTML]{F1F1F1} 0.90 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
82 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_044\_00837 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 & {\cellcolor[HTML]{FC8A6A}} \color[HTML]{F1F1F1} 0.80 \\
83 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_044\_00848 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 \\
84 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_052\_00984 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 \\
85 & meta-llama/Meta-Llama-3-70B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_067\_01249 & {\cellcolor[HTML]{FC8E6E}} \color[HTML]{000000} 0.78 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FDC7B2}} \color[HTML]{000000} 0.42 & {\cellcolor[HTML]{FC9B7C}} \color[HTML]{000000} 0.70 \\
86 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_028\_00536 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 \\
87 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_036\_00679 & {\cellcolor[HTML]{FC9777}} \color[HTML]{000000} 0.72 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
88 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_060\_01137 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FCB89E}} \color[HTML]{000000} 0.52 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 \\
89 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_066\_01252 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FDD0BC}} \color[HTML]{000000} 0.36 \\
90 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_067\_01272 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 & {\cellcolor[HTML]{FC8060}} \color[HTML]{F1F1F1} 0.86 \\
91 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_070\_01323 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FC9474}} \color[HTML]{000000} 0.74 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCB296}} \color[HTML]{000000} 0.56 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 \\
92 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_072\_01358 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FCA183}} \color[HTML]{000000} 0.66 & {\cellcolor[HTML]{FCBBA1}} \color[HTML]{000000} 0.50 \\
93 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_073\_01371 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FCB89E}} \color[HTML]{000000} 0.52 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC9D7F}} \color[HTML]{000000} 0.68 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 \\
94 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_082\_01552 & {\cellcolor[HTML]{FCB89E}} \color[HTML]{000000} 0.52 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FCB499}} \color[HTML]{000000} 0.54 \\
95 & meta-llama/Meta-Llama-3-8B-Instruct\_\_synth\_bandit\_self\_score\_mixed\_iter\_091\_01713 & {\cellcolor[HTML]{FCC4AD}} \color[HTML]{000000} 0.44 & {\cellcolor[HTML]{FCA78B}} \color[HTML]{000000} 0.62 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FCAB8F}} \color[HTML]{000000} 0.60 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 \\
96 & sota\_AIM & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
97 & sota\_DAN & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
98 & sota\_PAP & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE3D6}} \color[HTML]{000000} 0.22 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 \\
99 & sota\_aff\_prfx\_inj & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FC8767}} \color[HTML]{F1F1F1} 0.82 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
100 & sota\_b64 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 \\
101 & sota\_cipher & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF0E9}} \color[HTML]{000000} 0.06 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC9070}} \color[HTML]{000000} 0.76 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 \\
102 & sota\_combination\_3 & {\cellcolor[HTML]{FCAE92}} \color[HTML]{000000} 0.58 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEDCCD}} \color[HTML]{000000} 0.28 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FED9C9}} \color[HTML]{000000} 0.30 \\
103 & sota\_cot & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
104 & sota\_few\_shots & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDCAB5}} \color[HTML]{000000} 0.40 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FCBEA5}} \color[HTML]{000000} 0.48 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
105 & sota\_lr\_translation & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 \\
106 & sota\_obf\_pyld\_splitting & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FEE6DA}} \color[HTML]{000000} 0.18 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FEE5D8}} \color[HTML]{000000} 0.20 \\
107 & sota\_sota\_ref\_suppr & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FEE1D4}} \color[HTML]{000000} 0.24 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDCDB9}} \color[HTML]{000000} 0.38 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 \\
108 & sota\_style\_short & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FCA588}} \color[HTML]{000000} 0.64 & {\cellcolor[HTML]{FEE8DD}} \color[HTML]{000000} 0.16 \\
109 & sota\_uta\_bard & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFEDE5}} \color[HTML]{000000} 0.10 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
110 & sota\_uta\_gpt & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FEEAE0}} \color[HTML]{000000} 0.14 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FC8464}} \color[HTML]{F1F1F1} 0.84 & {\cellcolor[HTML]{FFEBE2}} \color[HTML]{000000} 0.12 \\
111 & sota\_uta\_llama & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FDD3C1}} \color[HTML]{000000} 0.34 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 \\
112 & sota\_wikipedia & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF4EE}} \color[HTML]{000000} 0.02 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF5F0}} \color[HTML]{000000} 0.00 & {\cellcolor[HTML]{FFF2EB}} \color[HTML]{000000} 0.04 & {\cellcolor[HTML]{FFEEE7}} \color[HTML]{000000} 0.08 \\
\bottomrule
\end{tabular}
\end{table}
