
% median: 0.022085889570552145

\begin{tikzpicture}
\definecolor{bailtool}{RGB}{155, 89, 182}                  % Purple (warm undertones)
\definecolor{bailstring}{RGB}{231, 76, 60}                 % Bright Red
\definecolor{bailpromptcontinuefirst}{RGB}{230, 126, 34}   % Standard Orange
\definecolor{bailpromptbailfirst}{RGB}{243, 156, 18}       % Golden Orange
\definecolor{bailpromptunknown}{RGB}{149,165,166}          % Gray
\usetikzlibrary{patterns}
\pgfplotstableread{
Label toolBailPr toolBailPr_err strBailPr strBailPr_err promptBailFirstBailPr promptBailFirstBailPr_err promptBailFirstUnknownPr promptContinueFirstBailPr promptContinueFirstBailPr_err promptContinueFirstUnknownPr
claude-3-haiku 2.2392638036809815 0.22739371412906362 0 0 0 0 0 0 0 0
{} 0 0 0.05644171779141104 0.038309856213244536 0 0 0 0 0 0
{} 0 0 0 0 31.435582822085887 0.7126551558442284 0.0 0 0 0
{} 0 0 0 0 0 0 0 81.5521472392638 0.5954366525481081 0.0
claude-3-5-haiku 0.0 0.011781272457897283 0 0 0 0 0 0 0 0
{} 0 0 0.0 0.011781272457897283 0 0 0 0 0 0
{} 0 0 0 0 10.521472392638037 0.471078690937039 0.018404907975460124 0 0 0
{} 0 0 0 0 0 0 0 13.754601226993865 0.5287613739197475 0.19018404907975459
claude-3-5-sonnet 1.2147239263803682 0.16854211694527796 0 0 0 0 0 0 0 0
{} 0 0 0.3067484662576687 0.08568957920169977 0 0 0 0 0 0
{} 0 0 0 0 1.687116564417178 0.19801937803768377 0.012269938650306749 0 0 0
{} 0 0 0 0 0 0 0 0.38650306748466257 0.09596071020842946 0.012269938650306749
claude-3-6-sonnet 17.03558282208589 0.5771316133293325 0 0 0 0 0 0 0 0
{} 0 0 13.271165644171779 0.5208436178494069 0 0 0 0 0 0
{} 0 0 0 0 20.80368098159509 0.6231034545421195 0.1411042944785276 0 0 0
{} 0 0 0 0 0 0 0 25.693251533742334 0.670734420735604 0.2638036809815951
claude-3-7-sonnet 2.1779141104294477 0.22433574084104094 0 0 0 0 0 0 0 0
{} 0 0 5.079754601226994 0.3372297693697766 0 0 0 0 0 0
{} 0 0 0 0 0.4171779141104294 0.0996255870453975 0.0 0 0 0
{} 0 0 0 0 0 0 0 0.12883435582822086 0.05630131737646633 0.0
claude-sonnet-4 0.03680981595092025 0.031711319245122424 0 0 0 0 0 0 0 0
{} 0 0 0.3460122699386503 0.09089336337122124 0 0 0 0 0 0
{} 0 0 0 0 2.4171779141104297 0.23601666370888855 0.5828220858895705 0 0 0
{} 0 0 0 0 0 0 0 1.7975460122699387 0.20426064303093336 0.852760736196319
claude-3-opus 12.85276073619632 0.5138059764065814 0 0 0 0 0 0 0 0
{} 0 0 1.4184049079754601 0.18187417678690357 0 0 0 0 0 0
{} 0 0 0 0 12.94478527607362 0.515368953369933 0.0 0 0 0
{} 0 0 0 0 0 0 0 14.269938650306749 0.536960210353853 0.012269938650306749
claude-opus-4 0.18404907975460122 0.06683159095640372 0 0 0 0 0 0 0 0
{} 0 0 0.18159509202453988 0.06639927975291551 0 0 0 0 0 0
{} 0 0 0 0 8.49079754601227 0.42798757243439217 1.834355828220859 0 0 0
{} 0 0 0 0 0 0 0 2.8711656441717794 0.2565791980674253 1.9386503067484664
claude-opus-4-1 0.6134969325153374 0.1204254037153857 0 0 0 0 0 0 0 0
{} 0 0 0.11042944785276074 0.052319321600595686 0 0 0 0 0 0
{} 0 0 0 0 13.447852760736195 0.5237634846463091 2.0 0 0 0
{} 0 0 0 0 0 0 0 2.8098159509202456 0.25390898555515534 2.067484662576687
}\datatable


\begin{axis}[
  ybar stacked,
  width = \linewidth,
  bar width = 8pt,
  ymin=0, ymax=100,
  xtick=data,
  ylabel = {Average bail \% on BailBench},
  enlarge x limits = {abs = 20pt},
  xticklabels from table={\datatable}{Label},
  xticklabel style={xshift=12pt,rotate=90,align=center}, % ← rightwards shift
  xtick style={draw=none},
  enlarge y limits={value=0.05,upper},
  legend style={cells={anchor=east},legend pos=north east},
  reverse legend=false
]
    \addplot[fill=bailtool,
           error bars/.cd,
           y dir=both,
           y explicit,
          ]
    table[
        x expr=\coordindex,
        y=toolBailPr,
        y error plus=toolBailPr_err,
        y error minus=toolBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Tool}
    \addplot[fill=bailstring,
           error bars/.cd,
           y dir=both,
           y explicit,
          ]
    table[
        x expr=\coordindex,
        y=strBailPr,
        y error plus=strBailPr_err,
        y error minus=strBailPr_err
    ]{\datatable};
    \addlegendentry{Bail String}
    \addplot[fill=bailpromptbailfirst,
           error bars/.cd,
           y dir=both,
           y explicit
          ]
    table[
        x expr=\coordindex,
        y=promptBailFirstBailPr,
        y error plus=promptBailFirstBailPr_err,
        y error minus=promptBailFirstBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Prompt Bail-first}
    \addplot[fill=bailpromptunknown
          ]
    table[
        x expr=\coordindex,
        y=promptBailFirstUnknownPr,
        forget plot,
    ]{\datatable};
    \addplot[fill=bailpromptcontinuefirst,
           error bars/.cd,
           y dir=both,
           y explicit
          ]
    table[
        x expr=\coordindex,
        y=promptContinueFirstBailPr,
        y error plus=promptContinueFirstBailPr_err,
        y error minus=promptContinueFirstBailPr_err
    ]{\datatable};
    \addlegendentry{Bail Prompt Continue-first}
    \addplot[fill=bailpromptunknown
          ]
    table[
        x expr=\coordindex,
        y=promptContinueFirstUnknownPr,
        forget plot,
    ]{\datatable};
    \addplot[
      draw=none,                       % nothing visible
      fill=bailpromptunknown
    ] coordinates {(0,0)};             % single dummy point
    \addlegendentry{Refusal Classifier/Invalid Outputs}

\end{axis}
\end{tikzpicture}
