% pearsonr bail prompt continue first value: 0.08943359013826357 p value: 0.6264270379415546
% distance corr bail prompt continue first value: 0.2532423200508042 p value: 0.469

\begin{figure}[H]
\centering
\pgfplotstableread{
Label refusePr bailPr
gpt-3.5-turbo 1.2515337423312882 83.77975178675092
gpt-4 11.662576687116564 93.97783950638302
gpt-4-turbo 6.269938650306749 87.79797429423151
gpt-4o-mini 18.374233128834355 86.32249874150898
gpt-4o 51.93251533742331 88.27282715478749
gpt-4.1-nano 7.828220858895706 91.87518332115332
gpt-4.1-mini 3.6871165644171775 90.07945984469346
gpt-4.1 6.809815950920245 92.55439197295846
claude-3-haiku-20240307 81.5521472392638 97.52805055921178
claude-3-5-haiku-20241022 13.754601226993865 97.08691879433503
claude-3-5-sonnet-20240620 0.38650306748466257 97.56141413151617
claude-3-5-sonnet-20241022 25.693251533742334 97.30493857151818
claude-3-7-sonnet-20250219 0.12883435582822086 93.72049780523525
claude-sonnet-4-20250514 1.7975460122699387 91.71525801318981
claude-3-opus-20240229 14.269938650306749 97.65222037089703
claude-opus-4-20250514 2.8711656441717794 88.94525920002395
claude-opus-4-1-20250805 2.8098159509202456 90.29761679782575
NousResearch/Hermes-3-Llama-3.2-3B 19.97546012269939 53.06528862214296
NousResearch/Hermes-3-Llama-3.1-8B 0.09202453987730061 69.03809739037938
unsloth/Llama-3.1-8B-Instruct 71.08588957055215 89.73584828052694
Qwen/Qwen3-1.7B 22.515337423312882 44.76861113689245
Qwen/Qwen3-4B 5.98159509202454 57.59240693196745
Qwen/Qwen3-8B 6.088343558282209 57.219730176900065
Qwen/Qwen3-32B 4.564417177914111 64.80506839774594
Qwen/Qwen3-30B-A3B 2.9263803680981595 66.6867686539797
Qwen/QwQ-32B 10.742331288343559 64.9813748272193
Qwen/Qwen2.5-7B-Instruct 0.36073619631901843 80.1666466893057
zai-org/GLM-4-32B-0414 0.8895705521472393 86.36646310718476
zai-org/GLM-Z1-9B-0414 23.055214723926383 62.884260388853974
google/gemma-2-2b-it 5.226993865030675 90.20442422800069
google/gemma-2-9b-it 4.938650306748467 95.48484590646319
google/gemma-2-27b-it 0.006134969325153374 95.5098134642681
}\datatable
\begin{tikzpicture}
  \begin{axis}[
      width=15cm,
      height=9cm,
      xlabel={Refusal probability (\texttt{refusePr})},
      ylabel={Bail-out probability (\texttt{bailPr})},
      title={LLM trade-off scatterplot},
      grid=both,
      enlargelimits=0.03,
      % nodes-near-coords settings
      nodes near coords,
      point meta=explicit symbolic,      % meta column holds the label
      every node near coord/.style={
        font=\scriptsize,
        anchor=west,
        xshift=2pt,
        draw=white, fill=white,  % tiny white halo for readability
        inner sep=1pt
      },
      % visual style of the marks
      only marks,
      mark=*,
      mark size=2pt,
      color=blue!60!black
  ]
    % ----------------------------------------------------------------------
    % 2.  The actual plot ---------------------------------------------------
    % ----------------------------------------------------------------------
    \addplot table[
        x=refusePr,
        y=bailPr,
        meta=label                % <-- use "label" column as point meta
    ] {\datatable};
  \end{axis}
\end{tikzpicture}
\end{figure}
