{
  "family": "circle_packing",
  "manifest": "multi_task_shared_then_adapt/circle_packing_mt_sts.yaml",
  "results_dir": "multi_task_shared_then_adapt/results/circle_packing",
  "baseline_budget": 30,
  "baseline_reference_prefix": "s60-a15-b30",
  "task_count": 4,
  "categories": [
    {
      "id": "cp_n21",
      "label": "N = 21"
    },
    {
      "id": "cp_n23",
      "label": "N = 23"
    },
    {
      "id": "cp_n25",
      "label": "N = 25"
    },
    {
      "id": "average",
      "label": "Average"
    }
  ],
  "methods": [
    {
      "id": "baseline",
      "label": "Single-task",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "adapt",
      "label": "STA Warmstart",
      "hatch": "//",
      "legend_hatch": "//"
    },
    {
      "id": "best_task_seed",
      "label": "STA Best-Local Program",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "best_shared_seed",
      "label": "STA Best-Shared Program",
      "hatch": "xx",
      "legend_hatch": "xx"
    }
  ],
  "baseline_reference": {
    "setting_prefix": "s60-a15-b30",
    "budget": {
      "shared": 60,
      "adapt": 15,
      "baseline": 30,
      "task_count": 4,
      "total": 120,
      "label": "60 / 15 / 120"
    },
    "label": "0 / 30 / 120",
    "values": [
      0.8926243773407293,
      0.893932499709486,
      0.8943583877877359,
      0.8936384216126504
    ]
  },
  "budgets": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7767525591585414,
              0.8009480084241316,
              0.7691113978476258,
              0.782270655143433
            ],
            "adapt": [
              0.8486734406490578,
              0.8602575065035344,
              0.8475174755358843,
              0.8521494742294922
            ],
            "best_task_seed": [
              0.8521148683440849,
              0.8599284509479551,
              0.8561556177560272,
              0.8560663123493557
            ],
            "best_shared_seed": [
              0.8782264663971802,
              0.8827598490728509,
              0.8155208343822448,
              0.8588357166174253
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9133917125239993,
              0.9107358724505661,
              0.8498930528093125,
              0.8913402125946259
            ],
            "adapt": [
              0.9523581948038407,
              0.9370546830145609,
              0.9353658937940693,
              0.9415929238708237
            ],
            "best_task_seed": [
              0.9469669248028133,
              0.9217123563982732,
              0.9381327688131987,
              0.9356040166714283
            ],
            "best_shared_seed": [
              0.9294294572662702,
              0.9363325855948812,
              0.9324609943553666,
              0.932741012405506
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9400989243997883,
              0.9677298791084535,
              0.9376594005076442,
              0.9484960680052954
            ],
            "adapt": [
              0.9888798689200005,
              0.9898972846569387,
              0.9910614631142461,
              0.9899462055637285
            ],
            "best_task_seed": [
              0.9751458351565263,
              0.9805758476472827,
              0.9791861499631977,
              0.9783026109223357
            ],
            "best_shared_seed": [
              0.9773124357365773,
              0.9807485159994036,
              0.9818953250321595,
              0.9799854255893802
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9001128876837022,
              0.907356237939166,
              0.9018937558805463,
              0.903120960501138
            ],
            "adapt": [
              0.9322725937549243,
              0.909393826259719,
              0.9126548086458799,
              0.9181070762201745
            ],
            "best_task_seed": [
              0.9181061698469225,
              0.9084185181594776,
              0.9098440268637502,
              0.9121229049567168
            ],
            "best_shared_seed": [
              0.9072389903042897,
              0.9036051898342807,
              0.9121417789597471,
              0.9076619863661058
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9568402844197909,
              0.9603964550759491,
              0.9617400801808458,
              0.9596589398921953
            ],
            "adapt": [
              0.962419526402168,
              0.9632240814283604,
              0.9717843487670977,
              0.9658093188658754
            ],
            "best_task_seed": [
              0.9579791719191755,
              0.9152531111416142,
              0.9211431486766692,
              0.9314584772458195
            ],
            "best_shared_seed": [
              0.960943598857358,
              0.9667471678546355,
              0.9692891181804967,
              0.96565996163083
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8974392736371645,
          0.9094332905996533,
          0.8840595374451949,
          0.8969773672273375
        ],
        "adapt": [
          0.9369207249059983,
          0.9319654763726227,
          0.9316767979714354,
          0.9335209997500188
        ],
        "best_task_seed": [
          0.9300625940139046,
          0.9171776568589205,
          0.9208923424145686,
          0.9227108644291313
        ],
        "best_shared_seed": [
          0.9306301897123351,
          0.9340386616712104,
          0.922261610182003,
          0.9289768205218495
        ]
      }
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8578911196037374,
              0.8445154023534898,
              0.8197878767822516,
              0.8407314662464931
            ],
            "adapt": [
              0.7547354813658683,
              0.7702609191818629,
              0.8181296821287702,
              0.7810420275588339
            ],
            "best_task_seed": [
              0.7914548867612744,
              0.8024576479061031,
              0.837214033962295,
              0.8103755228765575
            ],
            "best_shared_seed": [
              0.8276522864283476,
              0.8313496272860561,
              0.8279859231700119,
              0.8289959456281386
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8864866192226293,
              0.8479346178373856,
              0.8747356410092196,
              0.8697189593564115
            ],
            "adapt": [
              0.8837033171544307,
              0.8625836176965883,
              0.8246143076295935,
              0.8569670808268709
            ],
            "best_task_seed": [
              0.8674737489760773,
              0.8714708726752765,
              0.8740645182088812,
              0.8710030466200782
            ],
            "best_shared_seed": [
              0.9135307723785051,
              0.9334542561335069,
              0.9151815571411378,
              0.9207221952177166
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9512371028493505,
              0.9481382856437456,
              0.9456531785757887,
              0.9483428556896282
            ],
            "adapt": [
              0.9710890728328737,
              0.9763811652854532,
              0.9773842018111353,
              0.9749514799764875
            ],
            "best_task_seed": [
              0.9830800721978757,
              0.9852395684977713,
              0.9820681581791142,
              0.9834625996249204
            ],
            "best_shared_seed": [
              0.9713286943597119,
              0.972267853304305,
              0.9706220011765883,
              0.9714061829468683
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9062155192540452,
              0.8918312466752312,
              0.8916837796333162,
              0.8965768485208642
            ],
            "adapt": [
              0.9350602013712441,
              0.9414187208369666,
              0.8916523016225695,
              0.9227104079435934
            ],
            "best_task_seed": [
              0.9415936013085618,
              0.9445618778702272,
              0.8973081979620655,
              0.9278212257136182
            ],
            "best_shared_seed": [
              0.9395568308244574,
              0.948560955913722,
              0.9489616970452678,
              0.9456931612611491
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9296694708829204,
              0.9501202433369877,
              0.954540824007703,
              0.9447768460758704
            ],
            "adapt": [
              0.7956081941363498,
              0.9545615336889519,
              0.910663047830794,
              0.886944258552032
            ],
            "best_task_seed": [
              0.9148826344471,
              0.9744788290881876,
              0.9704873180831537,
              0.9532829272061472
            ],
            "best_shared_seed": [
              0.9569577957262284,
              0.9175575611825435,
              0.8640300894880081,
              0.9128484821322601
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.9062999663625366,
          0.896507959169368,
          0.8972802600016558,
          0.9000293951778534
        ],
        "adapt": [
          0.8680392533721534,
          0.9010411913379646,
          0.8844887082045725,
          0.8845230509715636
        ],
        "best_task_seed": [
          0.8996969887381778,
          0.9156417592075131,
          0.912228445279102,
          0.9091890644082643
        ],
        "best_shared_seed": [
          0.9218052759434501,
          0.9206380507640267,
          0.9053562536042028,
          0.9159331934372265
        ]
      }
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8567968384665209,
              0.8515830796335058,
              0.8433180129637534,
              0.8505659770212601
            ],
            "adapt": [
              0.920318536426916,
              0.9228596696800271,
              0.9065822418575227,
              0.9165868159881553
            ],
            "best_task_seed": [
              0.9188997424765303,
              0.9212317433356615,
              0.9029163704187452,
              0.9143492854103122
            ],
            "best_shared_seed": [
              0.9269255074639868,
              0.9231013763408109,
              0.9205762819179071,
              0.9235343885742349
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8343664926962218,
              0.8260882788408598,
              0.9102508213146996,
              0.856901864283927
            ],
            "adapt": [
              0.953570884557322,
              0.9208421243010074,
              0.904541211235102,
              0.9263180733644771
            ],
            "best_task_seed": [
              0.9041778550771926,
              0.8742079349222429,
              0.8777834453979587,
              0.8853897451324648
            ],
            "best_shared_seed": [
              0.900991224165842,
              0.9154991656345034,
              0.9008468098553815,
              0.905779066551909
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9548467531609639,
              0.9546926105936873,
              0.8873178268236744,
              0.9322857301927752
            ],
            "adapt": [
              0.9901918685016398,
              0.990729977358488,
              0.9911496687747057,
              0.9906905048782777
            ],
            "best_task_seed": [
              0.9923377018070386,
              0.994754486887967,
              0.9919546944549544,
              0.9930156277166533
            ],
            "best_shared_seed": [
              0.9927042622943049,
              0.9957388105520184,
              0.9941226265797225,
              0.9941885664753485
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8823800899906524,
              0.8855282849478033,
              0.8779522478040548,
              0.8819535409141702
            ],
            "adapt": [
              0.8993250692758823,
              0.9189755213835517,
              0.8148337986042454,
              0.877711463087893
            ],
            "best_task_seed": [
              0.9084567907755696,
              0.9255803107874463,
              0.9284014079315265,
              0.9208128364981807
            ],
            "best_shared_seed": [
              0.9178584235253121,
              0.917974011590043,
              0.9096024297295401,
              0.9151449549482983
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9347317123892879,
              0.9517702445315737,
              0.952953030032497,
              0.9464849956511194
            ],
            "adapt": [
              0.8504035487306527,
              0.8595254550627665,
              0.8593925745311223,
              0.8564405261081804
            ],
            "best_task_seed": [
              0.9687058550811765,
              0.9746512423530955,
              0.9766291318931779,
              0.97332874310915
            ],
            "best_shared_seed": [
              0.9707210713786403,
              0.9785689645950519,
              0.9763750425093176,
              0.97522169282767
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8926243773407293,
          0.893932499709486,
          0.8943583877877359,
          0.8936384216126504
        ],
        "adapt": [
          0.9227619814984825,
          0.9225865495571682,
          0.8952998990005396,
          0.9135494766853967
        ],
        "best_task_seed": [
          0.9385155890435015,
          0.9380851436572826,
          0.9355370100192726,
          0.9373792475733522
        ],
        "best_shared_seed": [
          0.9418400977656173,
          0.9461764657424855,
          0.9403046381183737,
          0.9427737338754921
        ]
      }
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7937438022578107,
              0.8090905364549998,
              0.766795857233079,
              0.7898767319819632
            ],
            "adapt": [
              0.887604177792993,
              0.8701797183131534,
              0.8812216168338172,
              0.8796685043133212
            ],
            "best_task_seed": [
              0.8954317250434976,
              0.8794058040477978,
              0.8865778936056902,
              0.8871384742323286
            ],
            "best_shared_seed": [
              0.8880374575766197,
              0.8934020858747516,
              0.8808607497426406,
              0.8874334310646705
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8851530144957251,
              0.9071602109326964,
              0.8768788169863951,
              0.889730680804939
            ],
            "adapt": [
              0.9722740664939847,
              0.9792869175002707,
              0.9786186783936094,
              0.9767265541292882
            ],
            "best_task_seed": [
              0.9719635253153738,
              0.9773836758109461,
              0.9806513089811713,
              0.9766661700358303
            ],
            "best_shared_seed": [
              0.9562289942099749,
              0.8806653946329236,
              0.913728618684071,
              0.9168743358423231
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9025781106781166,
              0.957392276051791,
              0.9079710251648413,
              0.9226471372982494
            ],
            "adapt": [
              0.9260616801969116,
              0.970933206124237,
              0.9658760659437015,
              0.9542903174216166
            ],
            "best_task_seed": [
              0.977276412045726,
              0.9692519368141491,
              0.9679734395695145,
              0.97150059614313
            ],
            "best_shared_seed": [
              0.9748308776158939,
              0.927485423685169,
              0.9791175329791686,
              0.9604779447600773
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9052052038703753,
              0.8999638212874927,
              0.8947963275721282,
              0.8999884509099987
            ],
            "adapt": [
              0.9313974294972274,
              0.9332722088138793,
              0.9301616705895464,
              0.9316104363002177
            ],
            "best_task_seed": [
              0.9286981919093117,
              0.8920439749198034,
              0.894187498216996,
              0.9049765550153704
            ],
            "best_shared_seed": [
              0.9293161743198407,
              0.9301836356450266,
              0.9268754258203394,
              0.9287917452617357
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.93770764463784,
              0.9549740431321823,
              0.953665467371507,
              0.9487823850471765
            ],
            "adapt": [
              0.9490892527353172,
              0.9149988030602018,
              0.9641290186269291,
              0.9427390248074825
            ],
            "best_task_seed": [
              0.9620452741394333,
              0.9180877739138278,
              0.9663405409039025,
              0.9488245296523878
            ],
            "best_shared_seed": [
              0.9496361915371117,
              0.9663622839198404,
              0.960382944012332,
              0.9587938064897614
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8848775551879735,
          0.9057161775718324,
          0.8800214988655901,
          0.8902050772084653
        ],
        "adapt": [
          0.9332853213432868,
          0.9337341707623483,
          0.9440014100775207,
          0.9370069673943853
        ],
        "best_task_seed": [
          0.9470830256906684,
          0.9272346331013048,
          0.939146136255455,
          0.9378212650158094
        ],
        "best_shared_seed": [
          0.939609939051888,
          0.9196197647515423,
          0.9321930542477104,
          0.9304742526837136
        ]
      }
    }
  ],
  "aggregation": "Within a run, each method averages OOD scores across source tasks for each holdout N, except Shared, which uses the single persisted shared program directly. The figure keeps only runs where all requested methods have complete OOD results. It then averages comparable runs within each model and averages the resulting model means within each budget."
}