{
  "family": "heilbronn_triangle",
  "manifest": "multi_task_shared_then_adapt/heilbronn_triangle_mt_sts.yaml",
  "results_dir": "multi_task_shared_then_adapt/results/heilbronn_triangle",
  "baseline_budget": 30,
  "baseline_reference_prefix": "s60-a15-b30",
  "task_count": 4,
  "categories": [
    {
      "id": "heil_tri_n8",
      "label": "N = 8"
    },
    {
      "id": "heil_tri_n13",
      "label": "N = 13"
    },
    {
      "id": "heil_tri_n14",
      "label": "N = 14"
    },
    {
      "id": "average",
      "label": "Average"
    }
  ],
  "methods": [
    {
      "id": "baseline",
      "label": "Single-task",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "adapt",
      "label": "STA Warmstart",
      "hatch": "//",
      "legend_hatch": "//"
    },
    {
      "id": "best_task_seed",
      "label": "STA Best-Local Program",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "best_shared_seed",
      "label": "STA Best-Shared Program",
      "hatch": "xx",
      "legend_hatch": "xx"
    }
  ],
  "baseline_reference": {
    "setting_prefix": "s60-a15-b30",
    "budget": {
      "shared": 60,
      "adapt": 15,
      "baseline": 30,
      "task_count": 4,
      "total": 120,
      "label": "60 / 15 / 120"
    },
    "label": "0 / 30 / 120",
    "values": [
      0.608747574245084,
      0.4144063384467872,
      0.3311618089347198,
      0.4514385738755303
    ]
  },
  "budgets": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5450567932889869,
              0.4144882904976425,
              0.2861559564527917,
              0.415233680079807
            ],
            "adapt": [
              0.6028204612884289,
              0.3965347078529812,
              0.3181735268056851,
              0.4391762319823651
            ],
            "best_task_seed": [
              0.5613476297368845,
              0.37112151292070567,
              0.37999095594058574,
              0.43748669953272523
            ],
            "best_shared_seed": [
              0.5716165914902931,
              0.45841637598042206,
              0.35301074307419034,
              0.46101457018163516
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.573652405645004,
              0.4229976839253129,
              0.29376520503658876,
              0.43013843153563525
            ],
            "adapt": [
              0.5358397235237394,
              0.4745030291096361,
              0.3458967316299909,
              0.4520798280877888
            ],
            "best_task_seed": [
              0.5546104853215447,
              0.5049400979048461,
              0.3916076816457529,
              0.4837194216240478
            ],
            "best_shared_seed": [
              0.5663824443021859,
              0.517705564703764,
              0.3952576417876814,
              0.49311521693121046
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6473943949503156,
              0.433281704302908,
              0.4091857458999814,
              0.49662061505106836
            ],
            "adapt": [
              0.7845761685175836,
              0.5680222429265337,
              0.45687975049800594,
              0.6031593873140411
            ],
            "best_task_seed": [
              0.8121773147419153,
              0.5536109215520322,
              0.442778216622712,
              0.6028554843055531
            ],
            "best_shared_seed": [
              0.8154402928218589,
              0.5482439511653826,
              0.42300918553818667,
              0.595564476508476
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7321921211925422,
              0.4112061687817377,
              0.3320627503220781,
              0.49182034676545267
            ],
            "adapt": [
              0.6584248254676892,
              0.36994665904744795,
              0.301896581024567,
              0.44342268851323474
            ],
            "best_task_seed": [
              0.6822438273837336,
              0.44837083253591636,
              0.355035366575429,
              0.4952166754983597
            ],
            "best_shared_seed": [
              0.5666553491022065,
              0.3489637489836621,
              0.2902284951707593,
              0.4019491977522093
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8117012159050324,
              0.6088626289989398,
              0.4947359251997462,
              0.6384332567012395
            ],
            "adapt": [
              0.8398604889164067,
              0.592266806926237,
              0.5243858891761554,
              0.6521710616729331
            ],
            "best_task_seed": [
              0.825555387590349,
              0.5584717245039894,
              0.49221051468658034,
              0.6254125422603062
            ],
            "best_shared_seed": [
              0.8807308729489961,
              0.6959918980137683,
              0.5984453038464392,
              0.7250560249364011
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.6619993861963762,
          0.4581672953013082,
          0.36318111658223723,
          0.4944492660266405
        ],
        "adapt": [
          0.6843043335427696,
          0.48025468917256725,
          0.38944649582688085,
          0.5180018395140726
        ],
        "best_task_seed": [
          0.6871869289548854,
          0.48730301788349795,
          0.41232454709421196,
          0.5289381646441984
        ],
        "best_shared_seed": [
          0.6801651101331081,
          0.5138643077693998,
          0.41199027388345144,
          0.5353398972619864
        ]
      }
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5084113747397968,
              0.3408920304723963,
              0.24779142844582536,
              0.3656982778860062
            ],
            "adapt": [
              0.5555187667008417,
              0.5404314862864462,
              0.47553736489985143,
              0.5238292059623798
            ],
            "best_task_seed": [
              0.54890380688624,
              0.5280156664861595,
              0.4352150537983908,
              0.5040448423902634
            ],
            "best_shared_seed": [
              0.6063604509084659,
              0.5789339802037194,
              0.4989056225068878,
              0.5614000178730244
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5298938453479536,
              0.33501778911638797,
              0.33139788442211,
              0.3987698396288172
            ],
            "adapt": [
              0.4869896061584025,
              0.2580642479734495,
              0.1815100098123675,
              0.30885462131473984
            ],
            "best_task_seed": [
              0.5519971180284753,
              0.3599963247778186,
              0.2823931576040384,
              0.39812886680344406
            ],
            "best_shared_seed": [
              0.469655904092277,
              0.4244644379796556,
              0.30139055955485083,
              0.3985036338755944
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5714783783230759,
              0.4856080713546083,
              0.4793829960589536,
              0.5121564819122126
            ],
            "adapt": [
              0.7837438029774642,
              0.6662053369322891,
              0.5515729710092847,
              0.6671740369730126
            ],
            "best_task_seed": [
              0.7727887365263445,
              0.7290450725999026,
              0.5242158537276759,
              0.6753498876179743
            ],
            "best_shared_seed": [
              0.8667178949849085,
              0.6185853122908072,
              0.4741144606635402,
              0.6531392226464185
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5936922977794701,
              0.33666219886321813,
              0.31134484818096325,
              0.4138997816078838
            ],
            "adapt": [
              0.6739853021736465,
              0.43675428602540867,
              0.32942737517006243,
              0.4800556544563726
            ],
            "best_task_seed": [
              0.6508608691990452,
              0.37609962413054177,
              0.2897087175030434,
              0.4388897369442101
            ],
            "best_shared_seed": [
              0.6980441042137413,
              0.5081650852466064,
              0.39405800592895074,
              0.5334223984630995
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7642758099585333,
              0.5360534505257722,
              0.4287082605486,
              0.5763458403443018
            ],
            "adapt": [
              0.8730877115650664,
              0.6580663731335633,
              0.5778557242303097,
              0.7030032696429798
            ],
            "best_task_seed": [
              0.8429022763264987,
              0.687632832309213,
              0.5901391393243737,
              0.7068914159866951
            ],
            "best_shared_seed": [
              0.9143473695306751,
              0.7190303720210289,
              0.6339030090312127,
              0.7557602501943056
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.593550341229766,
          0.40684670806647655,
          0.35972508353129046,
          0.4533740442758443
        ],
        "adapt": [
          0.6746650379150843,
          0.5119043460702313,
          0.4231806890243751,
          0.5365833576698968
        ],
        "best_task_seed": [
          0.6734905613933208,
          0.5361579040607272,
          0.4243343843915044,
          0.5446609499485173
        ],
        "best_shared_seed": [
          0.7110251447460135,
          0.5698358375483635,
          0.4604743315370884,
          0.5804451046104885
        ]
      }
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5208067164751784,
              0.26398088032482325,
              0.2351872363689449,
              0.3399916110563155
            ],
            "adapt": [
              0.545424222196359,
              0.46939207909205,
              0.3638116107267243,
              0.4595426373383778
            ],
            "best_task_seed": [
              0.5639984445009003,
              0.44743626661865293,
              0.34338066589799093,
              0.4516051256725147
            ],
            "best_shared_seed": [
              0.5106828843790653,
              0.453743065442321,
              0.36432134224058654,
              0.4429157640206576
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5109446522260926,
              0.36776303446060166,
              0.2714607216668286,
              0.38338946945117425
            ],
            "adapt": [
              0.5930079202685175,
              0.30154564055026156,
              0.23873802799474858,
              0.3777638629378425
            ],
            "best_task_seed": [
              0.5814869405082637,
              0.3593114425091747,
              0.2940141001581626,
              0.4116041610585337
            ],
            "best_shared_seed": [
              0.6052339298942155,
              0.4639902006034021,
              0.37039432455825894,
              0.47987281835195883
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6437953072408306,
              0.4699506879514342,
              0.35488271705957974,
              0.4895429040839481
            ],
            "adapt": [
              0.8212705523585375,
              0.6179399380637435,
              0.5219563722433638,
              0.653722287555215
            ],
            "best_task_seed": [
              0.664485204935037,
              0.6523480085093676,
              0.5699230595235646,
              0.6289187576559898
            ],
            "best_shared_seed": [
              0.9253619098292549,
              0.6905155140999228,
              0.5782906797169187,
              0.7313893678820322
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6457923903362948,
              0.39043787914825606,
              0.25094327332761396,
              0.4290578476040549
            ],
            "adapt": [
              0.6627449155161005,
              0.4294733670602908,
              0.31917523909647694,
              0.47046450722428934
            ],
            "best_task_seed": [
              0.6669338446918014,
              0.43234554464047575,
              0.3533642585158036,
              0.48421454928269353
            ],
            "best_shared_seed": [
              0.6325525922018765,
              0.4648274394736184,
              0.3987811401694619,
              0.49872039061498563
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7223988049470237,
              0.579899210348821,
              0.5433350962506319,
              0.6152110371821589
            ],
            "adapt": [
              0.8825723749867755,
              0.7210483239329317,
              0.6150609240406032,
              0.7395605409867702
            ],
            "best_task_seed": [
              0.8833574243626254,
              0.6895424162893057,
              0.5706680984660728,
              0.7145226463726679
            ],
            "best_shared_seed": [
              0.9081207007702433,
              0.7423077051772163,
              0.6058183485524049,
              0.7520822514999549
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.608747574245084,
          0.4144063384467872,
          0.3311618089347198,
          0.4514385738755303
        ],
        "adapt": [
          0.701003997065258,
          0.5078798697398554,
          0.41174843482038337,
          0.5402107672084989
        ],
        "best_task_seed": [
          0.6720523717997255,
          0.5161967357133953,
          0.42627003651231893,
          0.53817304800848
        ],
        "best_shared_seed": [
          0.7163904034149311,
          0.5630767849592961,
          0.4635211670475262,
          0.5809961184739179
        ]
      }
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.4939714241903667,
              0.3804719222175924,
              0.31365673854884824,
              0.3960333616522691
            ],
            "adapt": [
              0.5782035485395625,
              0.4733773189270167,
              0.3915163899317911,
              0.48103241913279005
            ],
            "best_task_seed": [
              0.645852684118607,
              0.4182096244435026,
              0.3429535230870969,
              0.4690052772164022
            ],
            "best_shared_seed": [
              0.5289305873422168,
              0.4695895827475115,
              0.40657893028311404,
              0.46836636679094745
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5022633341884611,
              0.27263790714231595,
              0.22544143580004242,
              0.33344755904360646
            ],
            "adapt": [
              0.5238288012558199,
              0.39673556680331973,
              0.3278769361186709,
              0.4161471013926035
            ],
            "best_task_seed": [
              0.5418009864676796,
              0.40609244842151854,
              0.3464218823971112,
              0.43143843909543644
            ],
            "best_shared_seed": [
              0.5715274166972603,
              0.50012141278178,
              0.4169342303390223,
              0.49619435327268757
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.660099050459982,
              0.5213037774425693,
              0.42090433501607427,
              0.5341023876395419
            ],
            "adapt": [
              0.8331915988829666,
              0.6551552532327511,
              0.5191113497487144,
              0.6691527339548107
            ],
            "best_task_seed": [
              0.8096983238031459,
              0.6367483467325752,
              0.5112841982927325,
              0.6525769562761512
            ],
            "best_shared_seed": [
              0.8778896450718966,
              0.6524171088684485,
              0.5144858317614525,
              0.6815975285672659
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6599431395530291,
              0.3284800531173323,
              0.25466387675914204,
              0.41436235647650116
            ],
            "adapt": [
              0.6635796043484085,
              0.5216325372180143,
              0.41634041652254145,
              0.5338508526963215
            ],
            "best_task_seed": [
              0.6562384785266242,
              0.5150355606208862,
              0.41456969556939915,
              0.5286145782389698
            ],
            "best_shared_seed": [
              0.693544355464071,
              0.6098632189627106,
              0.39817021801577596,
              0.5671925974808525
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7549154925765617,
              0.4725420743695752,
              0.3932234050476302,
              0.540226990664589
            ],
            "adapt": [
              0.8546765091156916,
              0.6413318770102978,
              0.5615443022699557,
              0.6858508961319817
            ],
            "best_task_seed": [
              0.8576872145505359,
              0.6025355792117115,
              0.5258082855271711,
              0.6620103597631395
            ],
            "best_shared_seed": [
              0.8917976862517867,
              0.7008205087891879,
              0.5489051873181212,
              0.7138411274530319
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.6142384881936801,
          0.39508714685787705,
          0.3215779582343474,
          0.44363453109530154
        ],
        "adapt": [
          0.6906960124284898,
          0.53764651063828,
          0.4432778789183347,
          0.5572068006617015
        ],
        "best_task_seed": [
          0.7022555374933186,
          0.5157243118860388,
          0.42820751697470216,
          0.5487291221180198
        ],
        "best_shared_seed": [
          0.7127379381654462,
          0.5865623664299278,
          0.4570148795434972,
          0.5854383947129571
        ]
      }
    }
  ],
  "aggregation": "Within a run, each method averages OOD scores across source tasks for each holdout N, except Shared, which uses the single persisted shared program directly. The figure keeps only runs where all requested methods have complete OOD results. It then averages comparable runs within each model and averages the resulting model means within each budget."
}