{
  "categories": [
    {
      "id": "heil_tri_n8",
      "label": "N = 8"
    },
    {
      "id": "heil_tri_n13",
      "label": "N = 13"
    },
    {
      "id": "heil_tri_n14",
      "label": "N = 14"
    },
    {
      "id": "average",
      "label": "Average"
    }
  ],
  "baseline_reference": {
    "label": "Single-task 0 / 30 / 120",
    "values": [
      0.608747574245084,
      0.4144063384467872,
      0.3311618089347198,
      0.4514385738755303
    ],
    "source_setting_prefix": "s60-a15-b30",
    "aggregation": "For each holdout N, use the single-task post-hoc OOD score from the specified baseline reference setting so this figure mirrors the fixed-b30 comparison setup."
  },
  "adapt_by_budget": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "values": [
        0.6843043335427696,
        0.48025468917256725,
        0.38944649582688085,
        0.5180018395140726
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "values": [
        0.6746650379150843,
        0.5119043460702313,
        0.4231806890243751,
        0.5365833576698968
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "values": [
        0.701003997065258,
        0.5078798697398554,
        0.41174843482038337,
        0.5402107672084989
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "values": [
        0.6906960124284898,
        0.53764651063828,
        0.4432778789183347,
        0.5572068006617015
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    }
  ],
  "raw_budgets": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "heil_tri_n8",
          "label": "N = 8"
        },
        {
          "id": "heil_tri_n13",
          "label": "N = 13"
        },
        {
          "id": "heil_tri_n14",
          "label": "N = 14"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.6715010447410703,
          0.5191032965842902,
          0.41633820113262593,
          0.5356475141526622
        ],
        "adapt": [
          0.6843043335427696,
          0.48025468917256725,
          0.38944649582688085,
          0.5180018395140726
        ],
        "baseline": [
          0.6619993861963762,
          0.4581672953013082,
          0.36318111658223723,
          0.4944492660266405
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.5502161691457051,
            0.4182030976319102,
            0.36226535115143227,
            0.44356153930968256
          ],
          "adapt_mean": [
            0.6028204612884289,
            0.3965347078529812,
            0.3181735268056851,
            0.4391762319823651
          ],
          "baseline_mean": [
            0.5450567932889869,
            0.4144882904976425,
            0.2861559564527917,
            0.415233680079807
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.5814607792313946,
            0.4072498318701493,
            0.31512452973441957,
            0.43461171361198786
          ],
          "adapt_mean": [
            0.6584248254676892,
            0.36994665904744795,
            0.301896581024567,
            0.44342268851323474
          ],
          "baseline_mean": [
            0.7321921211925422,
            0.4112061687817377,
            0.3320627503220781,
            0.49182034676545267
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.874813568310014,
            0.7277968664031571,
            0.6176124322671876,
            0.7400742889934528
          ],
          "adapt_mean": [
            0.8398604889164067,
            0.592266806926237,
            0.5243858891761554,
            0.6521710616729331
          ],
          "baseline_mean": [
            0.8117012159050324,
            0.6088626289989398,
            0.4947359251997462,
            0.6384332567012395
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.5472340660283965,
            0.5237879661589969,
            0.4092410059606757,
            0.493421012716023
          ],
          "adapt_mean": [
            0.5358397235237394,
            0.4745030291096361,
            0.3458967316299909,
            0.4520798280877888
          ],
          "baseline_mean": [
            0.573652405645004,
            0.4229976839253129,
            0.29376520503658876,
            0.43013843153563525
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.8037806409898411,
            0.5184787208572378,
            0.3774476865494146,
            0.5665690161321646
          ],
          "adapt_mean": [
            0.7845761685175836,
            0.5680222429265337,
            0.45687975049800594,
            0.6031593873140411
          ],
          "baseline_mean": [
            0.6473943949503156,
            0.433281704302908,
            0.4091857458999814,
            0.49662061505106836
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "heil_tri_n8",
          "label": "N = 8"
        },
        {
          "id": "heil_tri_n13",
          "label": "N = 13"
        },
        {
          "id": "heil_tri_n14",
          "label": "N = 14"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.6699158026552788,
          0.5228930156560628,
          0.4057550951220496,
          0.5328546378111304
        ],
        "adapt": [
          0.6746650379150843,
          0.5119043460702313,
          0.4231806890243751,
          0.5365833576698968
        ],
        "baseline": [
          0.593550341229766,
          0.40684670806647655,
          0.35972508353129046,
          0.4533740442758443
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.5935769791409555,
            0.5490810106959756,
            0.4536917775972835,
            0.5321165891447383
          ],
          "adapt_mean": [
            0.5555187667008417,
            0.5404314862864462,
            0.47553736489985143,
            0.5238292059623798
          ],
          "baseline_mean": [
            0.5084113747397968,
            0.3408920304723963,
            0.24779142844582536,
            0.3656982778860062
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.612562822402251,
            0.49609970199788656,
            0.3153352688359204,
            0.47466593107868604
          ],
          "adapt_mean": [
            0.6739853021736465,
            0.43675428602540867,
            0.32942737517006243,
            0.4800556544563726
          ],
          "baseline_mean": [
            0.5936922977794701,
            0.33666219886321813,
            0.31134484818096325,
            0.4138997816078838
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.8948320355715591,
            0.6739852650735746,
            0.6164133629011179,
            0.7284102211820839
          ],
          "adapt_mean": [
            0.8730877115650664,
            0.6580663731335633,
            0.5778557242303097,
            0.7030032696429798
          ],
          "baseline_mean": [
            0.7642758099585333,
            0.5360534505257722,
            0.4287082605486,
            0.5763458403443018
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.4081676344790064,
            0.35679686496544016,
            0.167400175319105,
            0.31078822492118385
          ],
          "adapt_mean": [
            0.4869896061584025,
            0.2580642479734495,
            0.1815100098123675,
            0.30885462131473984
          ],
          "baseline_mean": [
            0.5298938453479536,
            0.33501778911638797,
            0.33139788442211,
            0.3987698396288172
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.840439541682622,
            0.5385022355474373,
            0.4759348909568214,
            0.6182922227289602
          ],
          "adapt_mean": [
            0.7837438029774642,
            0.6662053369322891,
            0.5515729710092847,
            0.6671740369730126
          ],
          "baseline_mean": [
            0.5714783783230759,
            0.4856080713546083,
            0.4793829960589536,
            0.5121564819122126
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "heil_tri_n8",
          "label": "N = 8"
        },
        {
          "id": "heil_tri_n13",
          "label": "N = 13"
        },
        {
          "id": "heil_tri_n14",
          "label": "N = 14"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.6617082352449093,
          0.5126946505964958,
          0.42754750481008585,
          0.533983463550497
        ],
        "adapt": [
          0.701003997065258,
          0.5078798697398554,
          0.41174843482038337,
          0.5402107672084989
        ],
        "baseline": [
          0.608747574245084,
          0.4144063384467872,
          0.3311618089347198,
          0.4514385738755303
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.46969103587696,
            0.4812441403711748,
            0.3162740171376662,
            0.42240306446193365
          ],
          "adapt_mean": [
            0.545424222196359,
            0.46939207909205,
            0.3638116107267243,
            0.4595426373383778
          ],
          "baseline_mean": [
            0.5208067164751784,
            0.26398088032482325,
            0.2351872363689449,
            0.3399916110563155
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.5286719793717802,
            0.4473181083513819,
            0.3792108996144499,
            0.4517336624458707
          ],
          "adapt_mean": [
            0.6627449155161005,
            0.4294733670602908,
            0.31917523909647694,
            0.47046450722428934
          ],
          "baseline_mean": [
            0.6457923903362948,
            0.39043787914825606,
            0.25094327332761396,
            0.4290578476040549
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.8257387552327689,
            0.7069753839940186,
            0.6086127565602519,
            0.713775631929013
          ],
          "adapt_mean": [
            0.8825723749867755,
            0.7210483239329317,
            0.6150609240406032,
            0.7395605409867702
          ],
          "baseline_mean": [
            0.7223988049470237,
            0.579899210348821,
            0.5433350962506319,
            0.6152110371821589
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.582940081123094,
            0.28115235006179756,
            0.2807066627707179,
            0.38159969798520316
          ],
          "adapt_mean": [
            0.5930079202685175,
            0.30154564055026156,
            0.23873802799474858,
            0.3777638629378425
          ],
          "baseline_mean": [
            0.5109446522260926,
            0.36776303446060166,
            0.2714607216668286,
            0.38338946945117425
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.9014993246199434,
            0.6467832702041061,
            0.5529331879673433,
            0.7004052609304643
          ],
          "adapt_mean": [
            0.8212705523585375,
            0.6179399380637435,
            0.5219563722433638,
            0.653722287555215
          ],
          "baseline_mean": [
            0.6437953072408306,
            0.4699506879514342,
            0.35488271705957974,
            0.4895429040839481
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "heil_tri_n8",
          "label": "N = 8"
        },
        {
          "id": "heil_tri_n13",
          "label": "N = 13"
        },
        {
          "id": "heil_tri_n14",
          "label": "N = 14"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.7225544649583717,
          0.6061311837346769,
          0.46063633665401926,
          0.596440661782356
        ],
        "adapt": [
          0.6906960124284898,
          0.53764651063828,
          0.4432778789183347,
          0.5572068006617015
        ],
        "baseline": [
          0.6142384881936801,
          0.39508714685787705,
          0.3215779582343474,
          0.44363453109530154
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.5668138766847196,
            0.5043103900464928,
            0.4085520776480598,
            0.4932254481264241
          ],
          "adapt_mean": [
            0.5782035485395625,
            0.4733773189270167,
            0.3915163899317911,
            0.48103241913279005
          ],
          "baseline_mean": [
            0.4939714241903667,
            0.3804719222175924,
            0.31365673854884824,
            0.3960333616522691
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.7041864730431433,
            0.6151643389706558,
            0.43455068442123757,
            0.5846338321450122
          ],
          "adapt_mean": [
            0.6635796043484085,
            0.5216325372180143,
            0.41634041652254145,
            0.5338508526963215
          ],
          "baseline_mean": [
            0.6599431395530291,
            0.3284800531173323,
            0.25466387675914204,
            0.41436235647650116
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.8960475751976995,
            0.7238211464740558,
            0.5360905120903597,
            0.718653077920705
          ],
          "adapt_mean": [
            0.8546765091156916,
            0.6413318770102978,
            0.5615443022699557,
            0.6858508961319817
          ],
          "baseline_mean": [
            0.7549154925765617,
            0.4725420743695752,
            0.3932234050476302,
            0.540226990664589
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.5755547885515105,
            0.45871639714699963,
            0.381233811400108,
            0.47183499903287274
          ],
          "adapt_mean": [
            0.5238288012558199,
            0.39673556680331973,
            0.3278769361186709,
            0.4161471013926035
          ],
          "baseline_mean": [
            0.5022633341884611,
            0.27263790714231595,
            0.22544143580004242,
            0.33344755904360646
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.8701696113147855,
            0.7286436460351802,
            0.5427545977103312,
            0.7138559516867656
          ],
          "adapt_mean": [
            0.8331915988829666,
            0.6551552532327511,
            0.5191113497487144,
            0.6691527339548107
          ],
          "baseline_mean": [
            0.660099050459982,
            0.5213037774425693,
            0.42090433501607427,
            0.5341023876395419
          ],
          "run_count": 5
        }
      }
    }
  ],
  "family": "heilbronn_triangle"
}