{
  "family": "circle_packing_rectangle",
  "manifest": "multi_task_shared_then_adapt/circle_packing_rectangle_mt_sts.yaml",
  "results_dir": "multi_task_shared_then_adapt/results/circle_packing_rectangle",
  "baseline_budget": 30,
  "baseline_reference_prefix": "s60-a15-b30",
  "task_count": 4,
  "categories": [
    {
      "id": "cp_rect_n19",
      "label": "N = 19"
    },
    {
      "id": "cp_rect_n24",
      "label": "N = 24"
    },
    {
      "id": "cp_rect_n25",
      "label": "N = 25"
    },
    {
      "id": "average",
      "label": "Average"
    }
  ],
  "methods": [
    {
      "id": "baseline",
      "label": "Single-task",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "adapt",
      "label": "STA Warmstart",
      "hatch": "//",
      "legend_hatch": "//"
    },
    {
      "id": "best_task_seed",
      "label": "STA Best-Local Program",
      "hatch": "",
      "legend_hatch": ""
    },
    {
      "id": "best_shared_seed",
      "label": "STA Best-Shared Program",
      "hatch": "xx",
      "legend_hatch": "xx"
    }
  ],
  "baseline_reference": {
    "setting_prefix": "s60-a15-b30",
    "budget": {
      "shared": 60,
      "adapt": 15,
      "baseline": 30,
      "task_count": 4,
      "total": 120,
      "label": "60 / 15 / 120"
    },
    "label": "0 / 30 / 120",
    "values": [
      0.8196904819211144,
      0.8275346984154544,
      0.8466130658946309,
      0.8312794154104
    ]
  },
  "budgets": [
    {
      "setting_prefix": "s20-a25-b30",
      "budget": {
        "shared": 20,
        "adapt": 25,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "20 / 25 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5513425555534447,
              0.5923975962965546,
              0.7653806636207212,
              0.6363736051569069
            ],
            "adapt": [
              0.6557444273411396,
              0.7197834376333353,
              0.7653704644349186,
              0.7136327764697978
            ],
            "best_task_seed": [
              0.6952242011247626,
              0.693993176694491,
              0.8733148397858196,
              0.7541774058683577
            ],
            "best_shared_seed": [
              0.717928312059627,
              0.8329703283150314,
              0.8384554326994923,
              0.7964513576913836
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 4,
          "runs": [
            "run_01_seed_42",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8189972712923994,
              0.8231614332182271,
              0.8189453959130243,
              0.8203680334745502
            ],
            "adapt": [
              0.7970104151037999,
              0.808106937208195,
              0.8236730866725639,
              0.809596812994853
            ],
            "best_task_seed": [
              0.9004225005045735,
              0.9033351274574568,
              0.9136570818643397,
              0.9058049032754567
            ],
            "best_shared_seed": [
              0.9107568707430889,
              0.9244149953345218,
              0.9298978019607045,
              0.921689889346105
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9678757071137685,
              0.9651788974723547,
              0.9671893342554101,
              0.9667479796138444
            ],
            "adapt": [
              0.9853126953880569,
              0.9812135691076614,
              0.9802239073800407,
              0.9822500572919196
            ],
            "best_task_seed": [
              0.9771488325922195,
              0.97331999826841,
              0.9701641752986914,
              0.9735443353864403
            ],
            "best_shared_seed": [
              0.9771760170932362,
              0.9641718122865257,
              0.9696301596119687,
              0.9703259963305768
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8603315671469296,
              0.8783447987902454,
              0.9275077698060393,
              0.8887280452477381
            ],
            "adapt": [
              0.9303507505354596,
              0.9331763152529893,
              0.9252907169193436,
              0.9296059275692642
            ],
            "best_task_seed": [
              0.9313017550165152,
              0.9353626097547478,
              0.9250084532187272,
              0.9305576059966635
            ],
            "best_shared_seed": [
              0.9210046007947424,
              0.9317691963380927,
              0.920940291984994,
              0.9245713630392765
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9483120139883464,
              0.9494609939139578,
              0.9325340415612727,
              0.9434356831545255
            ],
            "adapt": [
              0.9499286213371448,
              0.9469818205733066,
              0.9371530705743657,
              0.944687837494939
            ],
            "best_task_seed": [
              0.9485005460650422,
              0.9496192012346707,
              0.9361660431239989,
              0.9447619301412372
            ],
            "best_shared_seed": [
              0.9520373250119238,
              0.9491371232208824,
              0.9478853982678702,
              0.9496866155002255
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8293718230189777,
          0.841708743938268,
          0.8823114410312936,
          0.8511306693295131
        ],
        "adapt": [
          0.8636693819411201,
          0.8778524159550976,
          0.8863422491962465,
          0.8759546823641546
        ],
        "best_task_seed": [
          0.8905195670606225,
          0.8911260226819552,
          0.9236621186583154,
          0.901769236133631
        ],
        "best_shared_seed": [
          0.8957806251405238,
          0.9204926910990109,
          0.921361816905006,
          0.9125450443815135
        ]
      }
    },
    {
      "setting_prefix": "s40-a20-b30",
      "budget": {
        "shared": 40,
        "adapt": 20,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "40 / 20 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6313605338995749,
              0.7548736597239718,
              0.8436850739116961,
              0.7433064225117476
            ],
            "adapt": [
              0.6300702085920474,
              0.7636246495680277,
              0.8533291234560023,
              0.7490079938720259
            ],
            "best_task_seed": [
              0.5233327516428485,
              0.7178613745404208,
              0.8574136439430813,
              0.6995359233754501
            ],
            "best_shared_seed": [
              0.5613366957103815,
              0.7701813818152692,
              0.8615408789367553,
              0.7310196521541353
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.758657466687855,
              0.7592065620862779,
              0.7532731003328534,
              0.7570457097023289
            ],
            "adapt": [
              0.9034045551010561,
              0.9020903215845217,
              0.8931236635996174,
              0.8995395134283983
            ],
            "best_task_seed": [
              0.9174348399251363,
              0.9007232578019903,
              0.892982256578723,
              0.9037134514352833
            ],
            "best_shared_seed": [
              0.8443353619504037,
              0.8530662012680679,
              0.8900246882976368,
              0.862475417172036
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9366042639385099,
              0.9283005543365274,
              0.9304232042470204,
              0.9317760075073526
            ],
            "adapt": [
              0.9657247434946259,
              0.9652397566034144,
              0.9647299024521427,
              0.9652314675167277
            ],
            "best_task_seed": [
              0.9267898808648354,
              0.9743325973897055,
              0.9718026965009198,
              0.9576417249184868
            ],
            "best_shared_seed": [
              0.9635884959649529,
              0.9625891369962669,
              0.9595160805392583,
              0.9618979045001593
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.8208430172518645,
              0.8736370520212923,
              0.8307408466835426,
              0.8417403053188999
            ],
            "adapt": [
              0.8824266091651495,
              0.8836755725650404,
              0.9028385506515771,
              0.8896469107939223
            ],
            "best_task_seed": [
              0.9415815447607845,
              0.9469373697900897,
              0.9507316080202928,
              0.9464168408570556
            ],
            "best_shared_seed": [
              0.9359363710699968,
              0.9404904250849235,
              0.9453256921336646,
              0.9405841627628616
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9322876838246967,
              0.9351223716734747,
              0.9363291010506906,
              0.9345797188496207
            ],
            "adapt": [
              0.9350814492953636,
              0.9409351522116423,
              0.9185121405389183,
              0.9315095806819749
            ],
            "best_task_seed": [
              0.9599540480967349,
              0.9546945744390763,
              0.941982787730427,
              0.952210470088746
            ],
            "best_shared_seed": [
              0.9517884156316804,
              0.9438471360033993,
              0.9400369287789244,
              0.9452241601380015
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8159505931205002,
          0.8502280399683088,
          0.8588902652451607,
          0.8416896327779899
        ],
        "adapt": [
          0.8633415131296485,
          0.8911130905065292,
          0.9065066761396515,
          0.8869870932586098
        ],
        "best_task_seed": [
          0.853818613058068,
          0.8989098347922566,
          0.9229825985546889,
          0.8919036821350044
        ],
        "best_shared_seed": [
          0.8513970680654831,
          0.8940348562335853,
          0.9192888537372479,
          0.8882402593454387
        ]
      }
    },
    {
      "setting_prefix": "s60-a15-b30",
      "budget": {
        "shared": 60,
        "adapt": 15,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "60 / 15 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.5565428124700731,
              0.6317186469722278,
              0.7297943383466546,
              0.6393519325963185
            ],
            "adapt": [
              0.6050519419944023,
              0.6868145433109272,
              0.8103792531690335,
              0.7007485794914543
            ],
            "best_task_seed": [
              0.5479846076003664,
              0.6472508471773561,
              0.8096838622018316,
              0.6683064389931846
            ],
            "best_shared_seed": [
              0.7241710656015372,
              0.8743860075251406,
              0.8584231049629949,
              0.8189933926965576
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7189513876456475,
              0.6790120755860549,
              0.6738098129184953,
              0.690591092050066
            ],
            "adapt": [
              0.797772432466533,
              0.8145704421209675,
              0.8128324405789797,
              0.80839177172216
            ],
            "best_task_seed": [
              0.8592986492995527,
              0.8429917369116771,
              0.8910022078765791,
              0.8644308646959363
            ],
            "best_shared_seed": [
              0.8243093788907032,
              0.9001129022203906,
              0.8746089412702135,
              0.8663437407937691
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9673052928404433,
              0.9618496071165522,
              0.9603341176494086,
              0.9631630058688012
            ],
            "adapt": [
              0.9839441134305906,
              0.9819666224878892,
              0.9777953378940216,
              0.9812353579375005
            ],
            "best_task_seed": [
              0.9338396495676587,
              0.9317269765357002,
              0.9777656315205517,
              0.9477774192079702
            ],
            "best_shared_seed": [
              0.9860101676878544,
              0.9357425151081407,
              0.9809587720421146,
              0.9675704849460367
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9147923924928538,
              0.9257885474667982,
              0.9333949120269225,
              0.924658617328858
            ],
            "adapt": [
              0.9304543444695851,
              0.9483494390379889,
              0.9315271975393058,
              0.9367769936822933
            ],
            "best_task_seed": [
              0.9433367283570668,
              0.9512500277612155,
              0.9394066226534367,
              0.9446644595905729
            ],
            "best_shared_seed": [
              0.9379818095716155,
              0.952965691195069,
              0.9423365028485075,
              0.9444280012050641
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9408605241565542,
              0.939304614935639,
              0.9357321485316739,
              0.9386324292079558
            ],
            "adapt": [
              0.9063045349974377,
              0.8994489963904974,
              0.9446090146498486,
              0.916787515345928
            ],
            "best_task_seed": [
              0.9563830459755355,
              0.9528852187510447,
              0.949242477385343,
              0.9528369140373076
            ],
            "best_shared_seed": [
              0.9568483296351056,
              0.9569797892314489,
              0.9526276403313952,
              0.9554852530659833
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8196904819211144,
          0.8275346984154544,
          0.8466130658946309,
          0.8312794154104
        ],
        "adapt": [
          0.8447054734717098,
          0.8662300086696539,
          0.8954286487662377,
          0.8687880436358671
        ],
        "best_task_seed": [
          0.8481685361600361,
          0.8652209614273987,
          0.9134201603275484,
          0.8756032193049943
        ],
        "best_shared_seed": [
          0.8858641502773631,
          0.9240373810560379,
          0.9217909922910451,
          0.9105641745414822
        ]
      }
    },
    {
      "setting_prefix": "s80-a10-b30",
      "budget": {
        "shared": 80,
        "adapt": 10,
        "baseline": 30,
        "task_count": 4,
        "total": 120,
        "label": "80 / 10 / 120"
      },
      "model_count": 5,
      "models": {
        "claude-haiku-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.6598212707200707,
              0.7017580032275311,
              0.7553973814987025,
              0.7056588851487681
            ],
            "adapt": [
              0.6185986244228504,
              0.7555017038096731,
              0.8562361720387381,
              0.7434455000904205
            ],
            "best_task_seed": [
              0.6897152369451882,
              0.795832212368523,
              0.8966177192066473,
              0.7940550561734528
            ],
            "best_shared_seed": [
              0.6948218314282703,
              0.884334681194777,
              0.9022610406210522,
              0.8271391844146997
            ]
          }
        },
        "claude-sonnet-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.7432904347886262,
              0.7233597479082057,
              0.7591317547227805,
              0.7419273124732042
            ],
            "adapt": [
              0.7423353907306727,
              0.7500545337193707,
              0.7605182203565406,
              0.7509693816021946
            ],
            "best_task_seed": [
              0.8113610106404652,
              0.8839116795643338,
              0.8970227987578727,
              0.8640984963208906
            ],
            "best_shared_seed": [
              0.8524436635805938,
              0.8752421463977168,
              0.8807175414437707,
              0.8694677838073603
            ]
          }
        },
        "claude-sonnet-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9511683199871717,
              0.956560565369591,
              0.9548565939511198,
              0.9541951597692941
            ],
            "adapt": [
              0.9790258276384047,
              0.9801459279298627,
              0.9803346996078638,
              0.9798354850587104
            ],
            "best_task_seed": [
              0.9824997242533376,
              0.9777403085492251,
              0.9807418100398339,
              0.9803272809474656
            ],
            "best_shared_seed": [
              0.9774470306806551,
              0.9284966049973287,
              0.9764320040157038,
              0.9607918798978959
            ]
          }
        },
        "claude-opus-4-5": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9149049677792129,
              0.9239995732649146,
              0.9309414914134748,
              0.9232820108192008
            ],
            "adapt": [
              0.9450979424824654,
              0.9468567621442403,
              0.9409043616482098,
              0.9442863554249719
            ],
            "best_task_seed": [
              0.9430453143800038,
              0.9419517348065497,
              0.9406780274411624,
              0.9418916922092386
            ],
            "best_shared_seed": [
              0.9494613647854842,
              0.9521187066095346,
              0.9457801884856327,
              0.9491200866268837
            ]
          }
        },
        "claude-opus-4-6": {
          "run_count": 5,
          "runs": [
            "run_01_seed_42",
            "run_02_seed_43",
            "run_03_seed_44",
            "run_04_seed_45",
            "run_05_seed_46"
          ],
          "series": {
            "baseline": [
              0.9476201044302647,
              0.9469405994050593,
              0.936823711133345,
              0.9437948049895564
            ],
            "adapt": [
              0.9533125109463395,
              0.9504454623578485,
              0.9519059674865226,
              0.9518879802635702
            ],
            "best_task_seed": [
              0.9539089095343984,
              0.9511364497416993,
              0.9523381215759736,
              0.9524611602840238
            ],
            "best_shared_seed": [
              0.955823839909737,
              0.9535185604459588,
              0.9566372226077327,
              0.9553265409878093
            ]
          }
        }
      },
      "series": {
        "baseline": [
          0.8433610195410692,
          0.8505236978350602,
          0.8674301865438846,
          0.8537716346400048
        ],
        "adapt": [
          0.8476740592441466,
          0.8766008779921991,
          0.897979884227575,
          0.8740849404879736
        ],
        "best_task_seed": [
          0.8761060391506786,
          0.9101144770060662,
          0.9334796954042981,
          0.9065667371870143
        ],
        "best_shared_seed": [
          0.8859995460769481,
          0.9187421399290632,
          0.9323655994347785,
          0.9123690951469298
        ]
      }
    }
  ],
  "aggregation": "Within a run, each method averages OOD scores across source tasks for each holdout N, except Shared, which uses the single persisted shared program directly. The figure keeps only runs where all requested methods have complete OOD results. It then averages comparable runs within each model and averages the resulting model means within each budget."
}