{
  "categories": [
    {
      "id": "cp_rect_n19",
      "label": "N = 19"
    },
    {
      "id": "cp_rect_n24",
      "label": "N = 24"
    },
    {
      "id": "cp_rect_n25",
      "label": "N = 25"
    },
    {
      "id": "average",
      "label": "Average"
    }
  ],
  "baseline_reference": {
    "label": "Single-task 0 / 30 / 120",
    "values": [
      0.8196904819211144,
      0.8275346984154544,
      0.8466130658946309,
      0.8312794154104
    ],
    "source_setting_prefix": "s60-a15-b30",
    "aggregation": "For each holdout N, use the single-task post-hoc OOD score from the specified baseline reference setting so this figure mirrors the fixed-b30 comparison setup."
  },
  "adapt_by_budget": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "values": [
        0.8636693819411201,
        0.8778524159550976,
        0.8863422491962465,
        0.8759546823641546
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "values": [
        0.8633415131296485,
        0.8911130905065292,
        0.9065066761396515,
        0.8869870932586098
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "values": [
        0.8447054734717098,
        0.8662300086696539,
        0.8954286487662377,
        0.8687880436358671
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "values": [
        0.8476740592441466,
        0.8766008779921991,
        0.897979884227575,
        0.8740849404879736
      ],
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ]
    }
  ],
  "raw_budgets": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "cp_rect_n19",
          "label": "N = 19"
        },
        {
          "id": "cp_rect_n24",
          "label": "N = 24"
        },
        {
          "id": "cp_rect_n25",
          "label": "N = 25"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.886872649088898,
          0.9034570419997424,
          0.8995742563582645,
          0.8966346491489684
        ],
        "adapt": [
          0.8636693819411201,
          0.8778524159550976,
          0.8863422491962465,
          0.8759546823641546
        ],
        "baseline": [
          0.8293718230189777,
          0.841708743938268,
          0.8823114410312936,
          0.8511306693295131
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.7952555121488615,
            0.8459967842668252,
            0.8542611800935216,
            0.8318378255030694
          ],
          "adapt_mean": [
            0.6557444273411396,
            0.7197834376333353,
            0.7653704644349186,
            0.7136327764697978
          ],
          "baseline_mean": [
            0.5513425555534447,
            0.5923975962965546,
            0.7653806636207212,
            0.6363736051569069
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.9062066073546393,
            0.9186275526296603,
            0.9073635518623565,
            0.9107325706155521
          ],
          "adapt_mean": [
            0.9303507505354596,
            0.9331763152529893,
            0.9252907169193436,
            0.9296059275692642
          ],
          "baseline_mean": [
            0.8603315671469296,
            0.8783447987902454,
            0.9275077698060393,
            0.8887280452477381
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.9436285722056666,
            0.9370473328158073,
            0.9360344749814157,
            0.9389034600009631
          ],
          "adapt_mean": [
            0.9499286213371448,
            0.9469818205733066,
            0.9371530705743657,
            0.944687837494939
          ],
          "baseline_mean": [
            0.9483120139883464,
            0.9494609939139578,
            0.9325340415612727,
            0.9434356831545255
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.8166491932177782,
            0.8449879938445459,
            0.8290806945059275,
            0.8302392938560839
          ],
          "adapt_mean": [
            0.7970104151037999,
            0.808106937208195,
            0.8236730866725639,
            0.809596812994853
          ],
          "baseline_mean": [
            0.8189972712923994,
            0.8231614332182271,
            0.8189453959130243,
            0.8203680334745502
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.9726233605175446,
            0.9706255464418735,
            0.971131380348101,
            0.9714600957691731
          ],
          "adapt_mean": [
            0.9853126953880569,
            0.9812135691076614,
            0.9802239073800407,
            0.9822500572919196
          ],
          "baseline_mean": [
            0.9678757071137685,
            0.9651788974723547,
            0.9671893342554101,
            0.9667479796138444
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "cp_rect_n19",
          "label": "N = 19"
        },
        {
          "id": "cp_rect_n24",
          "label": "N = 24"
        },
        {
          "id": "cp_rect_n25",
          "label": "N = 25"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.8895164044000199,
          0.9035316601209681,
          0.9046078005045629,
          0.8992186216751836
        ],
        "adapt": [
          0.8633415131296485,
          0.8911130905065292,
          0.9065066761396515,
          0.8869870932586098
        ],
        "baseline": [
          0.8159505931205002,
          0.8502280399683088,
          0.8588902652451607,
          0.8416896327779899
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.780799393188966,
            0.8434978485373895,
            0.8499902291532726,
            0.8247624902932094
          ],
          "adapt_mean": [
            0.6300702085920474,
            0.7636246495680277,
            0.8533291234560023,
            0.7490079938720259
          ],
          "baseline_mean": [
            0.6313605338995749,
            0.7548736597239718,
            0.8436850739116961,
            0.7433064225117476
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.8994502177334658,
            0.8922087113750339,
            0.916487495000809,
            0.9027154747031029
          ],
          "adapt_mean": [
            0.8824266091651495,
            0.8836755725650404,
            0.9028385506515771,
            0.8896469107939223
          ],
          "baseline_mean": [
            0.8208430172518645,
            0.8736370520212923,
            0.8307408466835426,
            0.8417403053188999
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.9361593644006447,
            0.9281469525372531,
            0.918838122979696,
            0.9277148133058646
          ],
          "adapt_mean": [
            0.9350814492953636,
            0.9409351522116423,
            0.9185121405389183,
            0.9315095806819749
          ],
          "baseline_mean": [
            0.9322876838246967,
            0.9351223716734747,
            0.9363291010506906,
            0.9345797188496207
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.8854367332052698,
            0.9049359982947237,
            0.8999052614107781,
            0.8967593309702572
          ],
          "adapt_mean": [
            0.9034045551010561,
            0.9020903215845217,
            0.8931236635996174,
            0.8995395134283983
          ],
          "baseline_mean": [
            0.758657466687855,
            0.7592065620862779,
            0.7532731003328534,
            0.7570457097023289
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.9457363134717532,
            0.9488687898604402,
            0.937817893978259,
            0.9441409991034841
          ],
          "adapt_mean": [
            0.9657247434946259,
            0.9652397566034144,
            0.9647299024521427,
            0.9652314675167277
          ],
          "baseline_mean": [
            0.9366042639385099,
            0.9283005543365274,
            0.9304232042470204,
            0.9317760075073526
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "cp_rect_n19",
          "label": "N = 19"
        },
        {
          "id": "cp_rect_n24",
          "label": "N = 24"
        },
        {
          "id": "cp_rect_n25",
          "label": "N = 25"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.8880434404265738,
          0.9063802540691294,
          0.897321949711014,
          0.8972485480689057
        ],
        "adapt": [
          0.8447054734717098,
          0.8662300086696539,
          0.8954286487662377,
          0.8687880436358671
        ],
        "baseline": [
          0.8196904819211144,
          0.8275346984154544,
          0.8466130658946309,
          0.8312794154104
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.7640773340070123,
            0.8299964300656797,
            0.807293307355869,
            0.800455690476187
          ],
          "adapt_mean": [
            0.6050519419944023,
            0.6868145433109272,
            0.8103792531690335,
            0.7007485794914543
          ],
          "baseline_mean": [
            0.5565428124700731,
            0.6317186469722278,
            0.7297943383466546,
            0.6393519325963185
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.9334533649546944,
            0.949121957826873,
            0.9431129198057423,
            0.9418960808624366
          ],
          "adapt_mean": [
            0.9304543444695851,
            0.9483494390379889,
            0.9315271975393058,
            0.9367769936822933
          ],
          "baseline_mean": [
            0.9147923924928538,
            0.9257885474667982,
            0.9333949120269225,
            0.924658617328858
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.9530484645252676,
            0.9480327876788885,
            0.9433974198458872,
            0.9481595573500146
          ],
          "adapt_mean": [
            0.9063045349974377,
            0.8994489963904974,
            0.9446090146498486,
            0.916787515345928
          ],
          "baseline_mean": [
            0.9408605241565542,
            0.939304614935639,
            0.9357321485316739,
            0.9386324292079558
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.8043996361436587,
            0.8208169564889312,
            0.8163707681957266,
            0.8138624536094389
          ],
          "adapt_mean": [
            0.797772432466533,
            0.8145704421209675,
            0.8128324405789797,
            0.80839177172216
          ],
          "baseline_mean": [
            0.7189513876456475,
            0.6790120755860549,
            0.6738098129184953,
            0.690591092050066
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.985238402502236,
            0.9839331382852741,
            0.9764353333518445,
            0.9818689580464515
          ],
          "adapt_mean": [
            0.9839441134305906,
            0.9819666224878892,
            0.9777953378940216,
            0.9812353579375005
          ],
          "baseline_mean": [
            0.9673052928404433,
            0.9618496071165522,
            0.9603341176494086,
            0.9631630058688012
          ],
          "run_count": 5
        }
      }
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "model_count": 5,
      "models": [
        "claude-haiku-4-5",
        "claude-opus-4-5",
        "claude-opus-4-6",
        "claude-sonnet-4-5",
        "claude-sonnet-4-6"
      ],
      "categories": [
        {
          "id": "cp_rect_n19",
          "label": "N = 19"
        },
        {
          "id": "cp_rect_n24",
          "label": "N = 24"
        },
        {
          "id": "cp_rect_n25",
          "label": "N = 25"
        },
        {
          "id": "average",
          "label": "Average"
        }
      ],
      "series": {
        "shared": [
          0.8593340245482363,
          0.8949798869938114,
          0.9016761330992018,
          0.8853300148804166
        ],
        "adapt": [
          0.8476740592441466,
          0.8766008779921991,
          0.897979884227575,
          0.8740849404879736
        ],
        "baseline": [
          0.8433610195410692,
          0.8505236978350602,
          0.8674301865438846,
          0.8537716346400048
        ]
      },
      "per_model": {
        "claude-haiku-4-5": {
          "shared_mean": [
            0.6642601766794902,
            0.8386537958179275,
            0.8571935954150863,
            0.7867025226375014
          ],
          "adapt_mean": [
            0.6185986244228504,
            0.7555017038096731,
            0.8562361720387381,
            0.7434455000904205
          ],
          "baseline_mean": [
            0.6598212707200707,
            0.7017580032275311,
            0.7553973814987025,
            0.7056588851487681
          ],
          "run_count": 5
        },
        "claude-opus-4-5": {
          "shared_mean": [
            0.9438122434346313,
            0.945670218688471,
            0.9483134573409225,
            0.9459319731546749
          ],
          "adapt_mean": [
            0.9450979424824654,
            0.9468567621442403,
            0.9409043616482098,
            0.9442863554249719
          ],
          "baseline_mean": [
            0.9149049677792129,
            0.9239995732649146,
            0.9309414914134748,
            0.9232820108192008
          ],
          "run_count": 5
        },
        "claude-opus-4-6": {
          "shared_mean": [
            0.9563122284332952,
            0.9486296598068478,
            0.953077043704282,
            0.9526729773148084
          ],
          "adapt_mean": [
            0.9533125109463395,
            0.9504454623578485,
            0.9519059674865226,
            0.9518879802635702
          ],
          "baseline_mean": [
            0.9476201044302647,
            0.9469405994050593,
            0.936823711133345,
            0.9437948049895564
          ],
          "run_count": 5
        },
        "claude-sonnet-4-5": {
          "shared_mean": [
            0.7560639761399555,
            0.7667073558902037,
            0.774908058150779,
            0.7658931300603127
          ],
          "adapt_mean": [
            0.7423353907306727,
            0.7500545337193707,
            0.7605182203565406,
            0.7509693816021946
          ],
          "baseline_mean": [
            0.7432904347886262,
            0.7233597479082057,
            0.7591317547227805,
            0.7419273124732042
          ],
          "run_count": 5
        },
        "claude-sonnet-4-6": {
          "shared_mean": [
            0.9762214980538093,
            0.9752384047656072,
            0.9748885108849397,
            0.9754494712347853
          ],
          "adapt_mean": [
            0.9790258276384047,
            0.9801459279298627,
            0.9803346996078638,
            0.9798354850587104
          ],
          "baseline_mean": [
            0.9511683199871717,
            0.956560565369591,
            0.9548565939511198,
            0.9541951597692941
          ],
          "run_count": 5
        }
      }
    }
  ],
  "family": "circle_packing_rectangle"
}