
% median: 0.03907975460122699

\begin{tikzpicture}
\definecolor{bailtool}{RGB}{155, 89, 182}                  % Purple (warm undertones)
\definecolor{bailstring}{RGB}{231, 76, 60}                 % Bright Red
\definecolor{bailpromptcontinuefirst}{RGB}{230, 126, 34}   % Standard Orange
\definecolor{bailpromptbailfirst}{RGB}{243, 156, 18}       % Golden Orange
\definecolor{bailpromptunknown}{RGB}{149,165,166}          % Gray
\usetikzlibrary{patterns}
\pgfplotstableread{
Label toolBailPr toolBailPr_err strBailPr strBailPr_err promptBailFirstBailPr promptBailFirstBailPr_err promptBailFirstUnknownPr promptContinueFirstBailPr promptContinueFirstBailPr_err promptContinueFirstUnknownPr
Llama-3.1-8B-Instruct 81.36196319018406 0.5977988480405422 0 0 0 0 0 0 0 0
{} 0 0 4.521472392638037 0.3191162293810649 0 0 0 0 0 0
{} 0 0 0 0 33.23312883435583 0.7230756025791906 6.251533742331289 0 0 0
{} 0 0 0 0 0 0 0 71.08588957055215 0.6959351694248739 2.6503067484662575
Hermes-3-Llama-3.2-3B 0.0 0.011781272457897283 0 0 0 0 0 0 0 0
{} 0 0 0.18404907975460122 0.06683159095640372 0 0 0 0 0 0
{} 0 0 0 0 65.14110429447852 0.7314769309619615 16.625766871165645 0 0 0
{} 0 0 0 0 0 0 0 19.97546012269939 0.6137619186442415 12.171779141104293
zai-org/GLM-4-32B-0414 71.47239263803681 0.6931456501085884 0 0 0 0 0 0 0 0
{} 0 0 0.8404907975460122 0.14061231013937578 0 0 0 0 0 0
{} 0 0 0 0 0.147239263803681 0.06001837776621577 0.4969325153374233 0 0 0
{} 0 0 0 0 0 0 0 0.8895705521472393 0.14459595679767218 0.8282208588957056
zai-org/GLM-Z1-9B-0414 25.25153374233129 0.6669184370978364 0 0 0 0 0 0 0 0
{} 0 0 1.5276073619631902 0.18861312787035442 0 0 0 0 0 0
{} 0 0 0 0 19.96319018404908 0.6136204769527814 5.079754601226994 0 0 0
{} 0 0 0 0 0 0 0 23.055214723926383 0.6465561074176819 5.779141104294479
Qwen3-1.7B 6.717791411042945 0.3843940605071722 0 0 0 0 0 0 0 0
{} 0 0 1.0 0.15316729883198646 0 0 0 0 0 0
{} 0 0 0 0 38.47239263803681 0.7468331409727462 3.153374233128834 0 0 0
{} 0 0 0 0 0 0 0 22.515337423312882 0.6411805713135779 1.1717791411042944
google/gemma-2-2b-it 0 0 0 0 0 0 0 0 0 0
{} 0 0 2.079754601226994 0.2193459223425212 0 0 0 0 0 0
{} 0 0 0 0 52.52147239263804 0.7665284802084759 0.049079754601226995 0 0 0
{} 0 0 0 0 0 0 0 5.226993865030675 0.34181126460118577 0.049079754601226995
QwQ-32B 22.503067484662576 0.6410566313952433 0 0 0 0 0 0 0 0
{} 0 0 12.208588957055214 0.5026179747025736 0 0 0 0 0 0
{} 0 0 0 0 5.319018404907975 0.3446362478514788 0.049079754601226995 0 0 0
{} 0 0 0 0 0 0 0 10.742331288343559 0.4754067704634073 0.0736196319018405
Qwen3-32B 25.699386503067483 0.670786785211064 0 0 0 0 0 0 0 0
{} 0 0 5.552147239263803 0.3516659400860926 0 0 0 0 0 0
{} 0 0 0 0 3.9079754601226995 0.2976596391411812 0.5153374233128835 0 0 0
{} 0 0 0 0 0 0 0 4.564417177914111 0.320554052317806 0.754601226993865
Qwen3-8B 7.629447852760736 0.40761928515809015 0 0 0 0 0 0 0 0
{} 0 0 1.6294478527607361 0.19467467276963632 0 0 0 0 0 0
{} 0 0 0 0 1.1533742331288344 0.1643029366267986 0.28957055214723926 0 0 0
{} 0 0 0 0 0 0 0 6.088343558282209 0.3671917425789903 0.18650306748466258
google/gemma-2-9b-it 0 0 0 0 0 0 0 0 0 0
{} 0 0 6.159509202453988 0.3691895115583509 0 0 0 0 0 0
{} 0 0 0 0 0.9693251533742332 0.15083714651617766 0.018404907975460124 0 0 0
{} 0 0 0 0 0 0 0 4.938650306748467 0.3327655864717983 0.006134969325153374
Qwen3-4B 2.3803680981595092 0.23426125984618243 0 0 0 0 0 0 0 0
{} 0 0 0.7423312883435583 0.1322727453677066 0 0 0 0 0 0
{} 0 0 0 0 1.3558282208588956 0.17789039628024622 0.2331288343558282 0 0 0
{} 0 0 0 0 0 0 0 5.98159509202454 0.3641683984375645 0.4049079754601227
Qwen3-30B-A3B 1.1533742331288344 0.1643029366267986 0 0 0 0 0 0 0 0
{} 0 0 0.09815950920245399 0.04948607345075612 0 0 0 0 0 0
{} 0 0 0 0 2.110429447852761 0.22091845152802042 0.5337423312883436 0 0 0
{} 0 0 0 0 0 0 0 2.9263803680981595 0.25895592188172567 0.5950920245398773
Qwen2.5-7B-Instruct 1.0085889570552147 0.15381315761172387 0 0 0 0 0 0 0 0
{} 0 0 0.498159509202454 0.1086990281744821 0 0 0 0 0 0
{} 0 0 0 0 4.355828220858895 0.3134955956776603 0.32147239263803684 0 0 0
{} 0 0 0 0 0 0 0 0.36073619631901843 0.09276854837797718 0.294478527607362
Hermes-3-Llama-3.1-8B 1.2024539877300613 0.16770327667007667 0 0 0 0 0 0 0 0
{} 0 0 1.0858895705521474 0.15950334679305525 0 0 0 0 0 0
{} 0 0 0 0 0.22699386503067487 0.07398616924190433 0.754601226993865 0 0 0
{} 0 0 0 0 0 0 0 0.09202453987730061 0.04800650886026906 0.10429447852760736
google/gemma-2-27b-it 0 0 0 0 0 0 0 0 0 0
{} 0 0 2.331288343558282 0.2318979112798747 0 0 0 0 0 0
{} 0 0 0 0 0.0 0.011781272457897283 0.0 0 0 0
{} 0 0 0 0 0 0 0 0.006134969325153374 0.016831843146562237 0.006134969325153374
}\datatable


\begin{axis}[
  ybar stacked,
  width = \linewidth,
  bar width = 4pt,
  ymin=0, ymax=100,
  xtick=data,
  ylabel = {Average bail \% on BailBench},
  enlarge x limits = {abs = 20pt},
  xticklabels from table={\datatable}{Label},
  xticklabel style={xshift=9pt,rotate=90,align=center}, % ← rightwards shift
  xtick style={draw=none},
  enlarge y limits={value=0.05,upper},
  legend style={cells={anchor=east},legend pos=north east},
  reverse legend=false
]
    \addplot[fill=bailtool,
           error bars/.cd,
           y dir=both,
           y explicit,
          ]
    table[
        x expr=\coordindex,
        y=toolBailPr,
        y error plus=toolBailPr_err,
        y error minus=toolBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Tool}
    \addplot[fill=bailstring,
           error bars/.cd,
           y dir=both,
           y explicit,
          ]
    table[
        x expr=\coordindex,
        y=strBailPr,
        y error plus=strBailPr_err,
        y error minus=strBailPr_err
    ]{\datatable};
    \addlegendentry{Bail String}
    \addplot[fill=bailpromptbailfirst,
           error bars/.cd,
           y dir=both,
           y explicit
          ]
    table[
        x expr=\coordindex,
        y=promptBailFirstBailPr,
        y error plus=promptBailFirstBailPr_err,
        y error minus=promptBailFirstBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Prompt Bail-first}
    \addplot[fill=bailpromptunknown
          ]
    table[
        x expr=\coordindex,
        y=promptBailFirstUnknownPr,
        forget plot,
    ]{\datatable};
    \addplot[fill=bailpromptcontinuefirst,
           error bars/.cd,
           y dir=both,
           y explicit
          ]
    table[
        x expr=\coordindex,
        y=promptContinueFirstBailPr,
        y error plus=promptContinueFirstBailPr_err,
        y error minus=promptContinueFirstBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Prompt Continue-first}
    \addplot[fill=bailpromptunknown
          ]
    table[
        x expr=\coordindex,
        y=promptContinueFirstUnknownPr,
        forget plot,
    ]{\datatable};
    \addplot[
      draw=none,                       % nothing visible
      fill=bailpromptunknown
    ] coordinates {(0,0)};             % single dummy point
    \addlegendentry{Refusal Classifier/Invalid Outputs}

\end{axis}
\end{tikzpicture}
