% pearsonr bail tool value: -0.050007784369912905 p value: 0.7857744066145442
% distance corr bail tool value: 0.295144890103747 p value: 0.196

\begin{figure}[H]
\centering
\pgfplotstableread{
Label refusePr bailPr
gpt-3.5-turbo 2.1226993865030677 83.77975178675092
gpt-4 0.049079754601226995 93.97783950638302
gpt-4-turbo 0.36809815950920244 87.79797429423151
gpt-4o-mini 1.392638036809816 86.32249874150898
gpt-4o 2.042944785276074 88.27282715478749
gpt-4.1-nano 6.104294478527607 91.87518332115332
gpt-4.1-mini 0.006134969325153374 90.07945984469346
gpt-4.1 0.0 92.55439197295846
claude-3-haiku-20240307 2.2392638036809815 97.52805055921178
claude-3-5-haiku-20241022 0.0 97.08691879433503
claude-3-5-sonnet-20240620 1.2147239263803682 97.56141413151617
claude-3-5-sonnet-20241022 17.03558282208589 97.30493857151818
claude-3-7-sonnet-20250219 2.1779141104294477 93.72049780523525
claude-sonnet-4-20250514 0.03680981595092025 91.71525801318981
claude-3-opus-20240229 12.85276073619632 97.65222037089703
claude-opus-4-20250514 0.18404907975460122 88.94525920002395
claude-opus-4-1-20250805 0.6134969325153374 90.29761679782575
NousResearch/Hermes-3-Llama-3.2-3B 0.0 53.06528862214296
NousResearch/Hermes-3-Llama-3.1-8B 1.2024539877300613 69.03809739037938
unsloth/Llama-3.1-8B-Instruct 81.36196319018406 89.73584828052694
Qwen/Qwen3-1.7B 6.717791411042945 44.76861113689245
Qwen/Qwen3-4B 2.3803680981595092 57.59240693196745
Qwen/Qwen3-8B 7.629447852760736 57.219730176900065
Qwen/Qwen3-32B 25.699386503067483 64.80506839774594
Qwen/Qwen3-30B-A3B 1.1533742331288344 66.6867686539797
Qwen/QwQ-32B 22.503067484662576 64.9813748272193
Qwen/Qwen2.5-7B-Instruct 1.0085889570552147 80.1666466893057
zai-org/GLM-4-32B-0414 71.47239263803681 86.36646310718476
zai-org/GLM-Z1-9B-0414 25.25153374233129 62.884260388853974
google/gemma-2-2b-it 0 90.20442422800069
google/gemma-2-9b-it 0 95.48484590646319
google/gemma-2-27b-it 0 95.5098134642681
}\datatable
\begin{tikzpicture}
  \begin{axis}[
      width=15cm,
      height=9cm,
      xlabel={Refusal probability (\texttt{refusePr})},
      ylabel={Bail-out probability (\texttt{bailPr})},
      title={LLM trade-off scatterplot},
      grid=both,
      enlargelimits=0.03,
      % nodes-near-coords settings
      nodes near coords,
      point meta=explicit symbolic,      % meta column holds the label
      every node near coord/.style={
        font=\scriptsize,
        anchor=west,
        xshift=2pt,
        draw=white, fill=white,  % tiny white halo for readability
        inner sep=1pt
      },
      % visual style of the marks
      only marks,
      mark=*,
      mark size=2pt,
      color=blue!60!black
  ]
    % ----------------------------------------------------------------------
    % 2.  The actual plot ---------------------------------------------------
    % ----------------------------------------------------------------------
    \addplot table[
        x=refusePr,
        y=bailPr,
        meta=label                % <-- use "label" column as point meta
    ] {\datatable};
  \end{axis}
\end{tikzpicture}
\end{figure}
