{
  "aggregate_metrics": {
    "train_mse": 7.748566128456837,
    "train_r2": -903.0621185052875,
    "train_pearson_r": -0.004435322480634275,
    "train_pearson_p": 0.8013005793442622,
    "val_mse": 6.266125107102048,
    "val_r2": -730.0986612000323,
    "val_pearson_r": 0.03637226992983513,
    "val_pearson_p": 0.4927030912361285
  },
  "per_fold_stats": {
    "train_mse_mean": 7.785679473939362,
    "train_mse_std": 27.991657859689575,
    "val_mse_mean": 6.264667772012818,
    "val_mse_std": 17.102180088520186,
    "train_r_mean": 0.10170009319010813,
    "train_r_std": 0.19678522423537795,
    "val_r_mean": 0.10072819546139247,
    "val_r_std": 0.21969355558474982
  },
  "average_coefficients": {
    "right_subspace_overlap_top_k": -0.13346895575523376,
    "right_subspace_overlap_bottom_k": 0.801952064037323,
    "interaction_matrix_overlap_top_k": 0.015142029151320457,
    "interaction_matrix_overlap_bottom_k": 0.1017412543296814,
    "effective_rank": -0.40794315934181213,
    "effective_rank_mergeability_score": -0.30881112813949585,
    "stable_rank": 0.2858622372150421,
    "spectral_gap": 0.5077435374259949,
    "singular_value_ratio": 0.7349931597709656,
    "layerwise_effective_rank": 0.3374524712562561,
    "layerwise_effective_rank_mergeability_score": -0.22803525626659393,
    "task_vector_cosine_similarity": -0.5163270235061646,
    "task_vector_l2_distance": -0.16178396344184875,
    "task_vector_dot_product": 0.005480554886162281,
    "weight_space_angle": -0.4533427655696869,
    "task_vector_magnitude_ratio": -0.25782716274261475,
    "singular_value_overlap": 0.47403138875961304,
    "subspace_overlap": -0.3630792796611786,
    "right_subspace_overlap": 0.42741769552230835,
    "activation_l2_distance": -0.2318987101316452,
    "activation_cosine_similarity": -0.17596180737018585,
    "activation_magnitude_ratio": 0.4634707570075989,
    "activation_dot_product": -0.32706981897354126,
    "encoder_gradient_cosine_similarity": 0.597573459148407,
    "encoder_gradient_l2_distance": -0.26550760865211487,
    "encoder_gradient_dot_product": 0.039665985852479935,
    "input_gradient_cosine_similarity": -0.2483637034893036,
    "input_gradient_l2_distance": 0.05654103681445122,
    "input_gradient_dot_product": 0.21809574961662292
  },
  "coefficient_std": {
    "right_subspace_overlap_top_k": 1.05595862865448,
    "right_subspace_overlap_bottom_k": 2.5282018184661865,
    "interaction_matrix_overlap_top_k": 0.5454081892967224,
    "interaction_matrix_overlap_bottom_k": 1.908506155014038,
    "effective_rank": 2.287868022918701,
    "effective_rank_mergeability_score": 1.3828524351119995,
    "stable_rank": 1.1957604885101318,
    "spectral_gap": 1.0807991027832031,
    "singular_value_ratio": 2.306943416595459,
    "layerwise_effective_rank": 1.036375641822815,
    "layerwise_effective_rank_mergeability_score": 2.0031468868255615,
    "task_vector_cosine_similarity": 2.061363697052002,
    "task_vector_l2_distance": 1.241172432899475,
    "task_vector_dot_product": 1.131712794303894,
    "weight_space_angle": 1.7113909721374512,
    "task_vector_magnitude_ratio": 1.2406337261199951,
    "singular_value_overlap": 1.7094738483428955,
    "subspace_overlap": 1.1407569646835327,
    "right_subspace_overlap": 1.1976432800292969,
    "activation_l2_distance": 1.0394514799118042,
    "activation_cosine_similarity": 0.8770217895507812,
    "activation_magnitude_ratio": 1.28190279006958,
    "activation_dot_product": 1.5811413526535034,
    "encoder_gradient_cosine_similarity": 1.9154295921325684,
    "encoder_gradient_l2_distance": 1.0226759910583496,
    "encoder_gradient_dot_product": 0.7070204019546509,
    "input_gradient_cosine_similarity": 1.1871641874313354,
    "input_gradient_l2_distance": 1.4546291828155518,
    "input_gradient_dot_product": 1.4257041215896606
  },
  "fold_results": [
    {
      "fold": 0,
      "held_out_task": "SUN397",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 4.317343478680121,
      "train_r2": -489.4973297823858,
      "train_pearson_r": -0.26956521251121945,
      "val_mse": 5.799067369983885,
      "val_r2": -1341.135300842326,
      "val_pearson_r": 0.0805822360905511,
      "n_iterations": 56,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.41842108964920044,
        "right_subspace_overlap_bottom_k": -0.7860199213027954,
        "interaction_matrix_overlap_top_k": -0.22450286149978638,
        "interaction_matrix_overlap_bottom_k": -0.27667611837387085,
        "effective_rank": -0.27836817502975464,
        "effective_rank_mergeability_score": 0.5995857119560242,
        "stable_rank": 0.05337550491094589,
        "spectral_gap": 0.5099117159843445,
        "singular_value_ratio": -0.5517200827598572,
        "layerwise_effective_rank": 1.3101770877838135,
        "layerwise_effective_rank_mergeability_score": 0.07760204374790192,
        "task_vector_cosine_similarity": -1.085546612739563,
        "task_vector_l2_distance": 2.1403555870056152,
        "task_vector_dot_product": 0.9818156957626343,
        "weight_space_angle": -1.787272572517395,
        "task_vector_magnitude_ratio": 1.2892879247665405,
        "singular_value_overlap": -1.586434006690979,
        "subspace_overlap": 0.6883716583251953,
        "right_subspace_overlap": -2.1882174015045166,
        "activation_l2_distance": 2.06469988822937,
        "activation_cosine_similarity": 1.02432382106781,
        "activation_magnitude_ratio": 2.4211857318878174,
        "activation_dot_product": -2.579026699066162,
        "encoder_gradient_cosine_similarity": 2.446728229522705,
        "encoder_gradient_l2_distance": -2.6195144653320312,
        "encoder_gradient_dot_product": -2.070119857788086,
        "input_gradient_cosine_similarity": -0.522734522819519,
        "input_gradient_l2_distance": 3.4419639110565186,
        "input_gradient_dot_product": -1.0928771495819092
      }
    },
    {
      "fold": 1,
      "held_out_task": "Cars",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 2.4812251349076058,
      "train_r2": -277.6096450086962,
      "train_pearson_r": 0.16018807449475667,
      "val_mse": 3.135318074130921,
      "val_r2": -708.1586544222575,
      "val_pearson_r": -0.08497188075226458,
      "n_iterations": 51,
      "coefficients": {
        "right_subspace_overlap_top_k": -1.2976070642471313,
        "right_subspace_overlap_bottom_k": 0.9106310606002808,
        "interaction_matrix_overlap_top_k": -0.18253958225250244,
        "interaction_matrix_overlap_bottom_k": 2.005849838256836,
        "effective_rank": -1.5946650505065918,
        "effective_rank_mergeability_score": -1.0627785921096802,
        "stable_rank": -0.09323051571846008,
        "spectral_gap": 0.2550690174102783,
        "singular_value_ratio": 0.1462738811969757,
        "layerwise_effective_rank": 1.325525164604187,
        "layerwise_effective_rank_mergeability_score": 0.23930422961711884,
        "task_vector_cosine_similarity": -0.7657480835914612,
        "task_vector_l2_distance": -0.6895484328269958,
        "task_vector_dot_product": -0.6190858483314514,
        "weight_space_angle": -1.0264662504196167,
        "task_vector_magnitude_ratio": -0.6978583335876465,
        "singular_value_overlap": 0.47594815492630005,
        "subspace_overlap": -2.2436318397521973,
        "right_subspace_overlap": 2.483961343765259,
        "activation_l2_distance": -0.29216697812080383,
        "activation_cosine_similarity": -0.7438372373580933,
        "activation_magnitude_ratio": 2.0042827129364014,
        "activation_dot_product": 0.5238903164863586,
        "encoder_gradient_cosine_similarity": 0.3218074142932892,
        "encoder_gradient_l2_distance": -0.03120957687497139,
        "encoder_gradient_dot_product": 0.4376228451728821,
        "input_gradient_cosine_similarity": 1.4968998432159424,
        "input_gradient_l2_distance": -1.4142149686813354,
        "input_gradient_dot_product": 1.100847601890564
      }
    },
    {
      "fold": 2,
      "held_out_task": "RESISC45",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 0.028166152940227694,
      "train_r2": -2.126878779373693,
      "train_pearson_r": 0.4299618465356089,
      "val_mse": 0.029534181907534925,
      "val_r2": -5.751681278618217,
      "val_pearson_r": 0.34588452531754416,
      "n_iterations": 714,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.5660187602043152,
        "right_subspace_overlap_bottom_k": 0.2873707711696625,
        "interaction_matrix_overlap_top_k": -0.8133692145347595,
        "interaction_matrix_overlap_bottom_k": -0.27986064553260803,
        "effective_rank": 0.36888185143470764,
        "effective_rank_mergeability_score": 0.18632367253303528,
        "stable_rank": 0.031132841482758522,
        "spectral_gap": -0.01393261831253767,
        "singular_value_ratio": -0.18417280912399292,
        "layerwise_effective_rank": 0.12424533069133759,
        "layerwise_effective_rank_mergeability_score": 0.26164525747299194,
        "task_vector_cosine_similarity": -0.02852298505604267,
        "task_vector_l2_distance": -0.07362305372953415,
        "task_vector_dot_product": 0.11559975147247314,
        "weight_space_angle": 0.2305494248867035,
        "task_vector_magnitude_ratio": 0.041404709219932556,
        "singular_value_overlap": 0.002875123405829072,
        "subspace_overlap": 0.07814729958772659,
        "right_subspace_overlap": 0.13307516276836395,
        "activation_l2_distance": 0.21153968572616577,
        "activation_cosine_similarity": 0.31804224848747253,
        "activation_magnitude_ratio": -0.016850624233484268,
        "activation_dot_product": -0.08725526183843613,
        "encoder_gradient_cosine_similarity": 0.01493415329605341,
        "encoder_gradient_l2_distance": -0.20786231756210327,
        "encoder_gradient_dot_product": 0.06084669753909111,
        "input_gradient_cosine_similarity": -0.12622423470020294,
        "input_gradient_l2_distance": -0.11475346982479095,
        "input_gradient_dot_product": -0.09521811455488205
      }
    },
    {
      "fold": 3,
      "held_out_task": "EuroSAT",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.05376414320438446,
      "train_r2": -5.205487264421823,
      "train_pearson_r": -0.24697325632954337,
      "val_mse": 0.01687261370015171,
      "val_r2": -2.9233581936602353,
      "val_pearson_r": 0.030873710943239586,
      "n_iterations": 59,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.08600408583879471,
        "right_subspace_overlap_bottom_k": 0.09765946120023727,
        "interaction_matrix_overlap_top_k": -0.2306084930896759,
        "interaction_matrix_overlap_bottom_k": -0.028628528118133545,
        "effective_rank": -0.14057856798171997,
        "effective_rank_mergeability_score": 0.2019900232553482,
        "stable_rank": 0.7413424253463745,
        "spectral_gap": -0.10037045925855637,
        "singular_value_ratio": -0.27289506793022156,
        "layerwise_effective_rank": -0.4178771674633026,
        "layerwise_effective_rank_mergeability_score": -0.13090001046657562,
        "task_vector_cosine_similarity": 0.20426128804683685,
        "task_vector_l2_distance": 0.18569286167621613,
        "task_vector_dot_product": -0.03772622346878052,
        "weight_space_angle": 0.3237472474575043,
        "task_vector_magnitude_ratio": 0.028044989332556725,
        "singular_value_overlap": 0.2234000712633133,
        "subspace_overlap": -0.05911234766244888,
        "right_subspace_overlap": -0.3303649127483368,
        "activation_l2_distance": 0.11053276062011719,
        "activation_cosine_similarity": 0.21524077653884888,
        "activation_magnitude_ratio": 0.13557887077331543,
        "activation_dot_product": 0.03231358900666237,
        "encoder_gradient_cosine_similarity": 0.2530689835548401,
        "encoder_gradient_l2_distance": -0.11460348963737488,
        "encoder_gradient_dot_product": -0.053831346333026886,
        "input_gradient_cosine_similarity": -0.22005335986614227,
        "input_gradient_l2_distance": 0.0739578977227211,
        "input_gradient_dot_product": 0.21996326744556427
      }
    },
    {
      "fold": 4,
      "held_out_task": "SVHN",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.018177952243161304,
      "train_r2": -1.0646374617231547,
      "train_pearson_r": 0.22362658196414809,
      "val_mse": 0.020212861489873606,
      "val_r2": -3.4116930031328936,
      "val_pearson_r": -0.013833465217357885,
      "n_iterations": 309,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.13213986158370972,
        "right_subspace_overlap_bottom_k": 0.2064487189054489,
        "interaction_matrix_overlap_top_k": -0.14026200771331787,
        "interaction_matrix_overlap_bottom_k": -0.252236932516098,
        "effective_rank": 0.01786554791033268,
        "effective_rank_mergeability_score": 0.694832444190979,
        "stable_rank": 0.8143946528434753,
        "spectral_gap": 0.7167618274688721,
        "singular_value_ratio": -0.18334685266017914,
        "layerwise_effective_rank": 0.10409419238567352,
        "layerwise_effective_rank_mergeability_score": -0.014093405567109585,
        "task_vector_cosine_similarity": -0.011943642050027847,
        "task_vector_l2_distance": 0.059089962393045425,
        "task_vector_dot_product": -0.3446301519870758,
        "weight_space_angle": -0.009867863729596138,
        "task_vector_magnitude_ratio": 0.6280083060264587,
        "singular_value_overlap": 0.12538890540599823,
        "subspace_overlap": -0.3300522565841675,
        "right_subspace_overlap": 0.17266981303691864,
        "activation_l2_distance": -0.5940287709236145,
        "activation_cosine_similarity": -0.5923134684562683,
        "activation_magnitude_ratio": 0.03817326948046684,
        "activation_dot_product": 0.26038500666618347,
        "encoder_gradient_cosine_similarity": 0.03818816319108009,
        "encoder_gradient_l2_distance": -0.4543135464191437,
        "encoder_gradient_dot_product": 0.09837622940540314,
        "input_gradient_cosine_similarity": -0.048445433378219604,
        "input_gradient_l2_distance": 0.060275766998529434,
        "input_gradient_dot_product": -0.19327735900878906
      }
    },
    {
      "fold": 5,
      "held_out_task": "GTSRB",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_mse": 0.03298891324071599,
      "train_r2": -2.6513770950748556,
      "train_pearson_r": 0.20100487471457115,
      "val_mse": 0.016723572441040058,
      "val_r2": -4.640173760876944,
      "val_pearson_r": 0.5121024896699936,
      "n_iterations": 80,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.41642332077026367,
        "right_subspace_overlap_bottom_k": 0.47172409296035767,
        "interaction_matrix_overlap_top_k": -0.49041110277175903,
        "interaction_matrix_overlap_bottom_k": -0.14860893785953522,
        "effective_rank": 0.21776802837848663,
        "effective_rank_mergeability_score": 0.25498074293136597,
        "stable_rank": -0.05286429449915886,
        "spectral_gap": -0.05064934492111206,
        "singular_value_ratio": 0.04487953335046768,
        "layerwise_effective_rank": 0.10129202902317047,
        "layerwise_effective_rank_mergeability_score": -0.046617697924375534,
        "task_vector_cosine_similarity": -0.3494229316711426,
        "task_vector_l2_distance": -0.211480513215065,
        "task_vector_dot_product": -0.131384015083313,
        "weight_space_angle": -0.3431922495365143,
        "task_vector_magnitude_ratio": -0.18981309235095978,
        "singular_value_overlap": 0.19061145186424255,
        "subspace_overlap": 0.21941255033016205,
        "right_subspace_overlap": 0.18180036544799805,
        "activation_l2_distance": 0.8107679486274719,
        "activation_cosine_similarity": 0.6599052548408508,
        "activation_magnitude_ratio": 0.12343764305114746,
        "activation_dot_product": 0.1691121906042099,
        "encoder_gradient_cosine_similarity": 0.08392070233821869,
        "encoder_gradient_l2_distance": -0.19615232944488525,
        "encoder_gradient_dot_product": 0.18240804970264435,
        "input_gradient_cosine_similarity": 0.1923581063747406,
        "input_gradient_l2_distance": -0.26539814472198486,
        "input_gradient_dot_product": -0.016138067469000816
      }
    },
    {
      "fold": 6,
      "held_out_task": "MNIST",
      "n_train_pairs": 164,
      "n_val_pairs": 15,
      "train_mse": 1.1513756593794764,
      "train_r2": -137.2252345126865,
      "train_pearson_r": -0.03025893683993378,
      "val_mse": 1.241270224049991,
      "val_r2": -290.09953717751,
      "val_pearson_r": 0.12069244414172982,
      "n_iterations": 206,
      "coefficients": {
        "right_subspace_overlap_top_k": 2.5502896308898926,
        "right_subspace_overlap_bottom_k": 0.5849870443344116,
        "interaction_matrix_overlap_top_k": -0.31319597363471985,
        "interaction_matrix_overlap_bottom_k": -1.5693198442459106,
        "effective_rank": 3.944392204284668,
        "effective_rank_mergeability_score": -1.5911939144134521,
        "stable_rank": 0.7671626806259155,
        "spectral_gap": 0.1016959697008133,
        "singular_value_ratio": -0.5944372415542603,
        "layerwise_effective_rank": -1.4904135465621948,
        "layerwise_effective_rank_mergeability_score": 3.304548978805542,
        "task_vector_cosine_similarity": -0.19412408769130707,
        "task_vector_l2_distance": 0.9801937937736511,
        "task_vector_dot_product": -2.978447198867798,
        "weight_space_angle": -1.6957229375839233,
        "task_vector_magnitude_ratio": -0.8222857117652893,
        "singular_value_overlap": -0.0035353354178369045,
        "subspace_overlap": 0.9361830353736877,
        "right_subspace_overlap": 0.15110310912132263,
        "activation_l2_distance": -1.426888346672058,
        "activation_cosine_similarity": -1.976518154144287,
        "activation_magnitude_ratio": -0.6579588651657104,
        "activation_dot_product": 0.5647348761558533,
        "encoder_gradient_cosine_similarity": 2.5720226764678955,
        "encoder_gradient_l2_distance": 1.1532189846038818,
        "encoder_gradient_dot_product": 0.7173742651939392,
        "input_gradient_cosine_similarity": -0.871402382850647,
        "input_gradient_l2_distance": -0.03598248213529587,
        "input_gradient_dot_product": -1.1127115488052368
      }
    },
    {
      "fold": 7,
      "held_out_task": "DTD",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.49753891099959313,
      "train_r2": -61.91214124675254,
      "train_pearson_r": -0.12056235290485327,
      "val_mse": 0.618738279708686,
      "val_r2": -178.00530386054322,
      "val_pearson_r": -0.2759210545093806,
      "n_iterations": 149,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.6304349303245544,
        "right_subspace_overlap_bottom_k": 0.5048612952232361,
        "interaction_matrix_overlap_top_k": 0.9499167203903198,
        "interaction_matrix_overlap_bottom_k": -0.8613185286521912,
        "effective_rank": 0.1582455337047577,
        "effective_rank_mergeability_score": -0.37386733293533325,
        "stable_rank": 0.731697678565979,
        "spectral_gap": -0.5648202896118164,
        "singular_value_ratio": 0.15731768310070038,
        "layerwise_effective_rank": -0.950439453125,
        "layerwise_effective_rank_mergeability_score": 0.3105989098548889,
        "task_vector_cosine_similarity": -0.7244613170623779,
        "task_vector_l2_distance": 0.8638401031494141,
        "task_vector_dot_product": -0.5704941153526306,
        "weight_space_angle": 0.815262496471405,
        "task_vector_magnitude_ratio": -0.6953144669532776,
        "singular_value_overlap": -0.35085099935531616,
        "subspace_overlap": 0.6887421011924744,
        "right_subspace_overlap": 0.010146185755729675,
        "activation_l2_distance": -0.9911388754844666,
        "activation_cosine_similarity": 0.6988699436187744,
        "activation_magnitude_ratio": -0.12800750136375427,
        "activation_dot_product": -0.4880423843860626,
        "encoder_gradient_cosine_similarity": 0.958825409412384,
        "encoder_gradient_l2_distance": 1.3497878313064575,
        "encoder_gradient_dot_product": -0.4674294590950012,
        "input_gradient_cosine_similarity": 0.843874990940094,
        "input_gradient_l2_distance": -0.6618340611457825,
        "input_gradient_dot_product": 0.4132261872291565
      }
    },
    {
      "fold": 8,
      "held_out_task": "Flowers102",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_mse": 12.950171585128563,
      "train_r2": -1452.9803689977957,
      "train_pearson_r": -0.06343750309580443,
      "val_mse": 41.987894925319566,
      "val_r2": -9548.335032231493,
      "val_pearson_r": 0.35594841714429376,
      "n_iterations": 63,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.014436899684369564,
        "right_subspace_overlap_bottom_k": 0.3163350522518158,
        "interaction_matrix_overlap_top_k": 0.1707332581281662,
        "interaction_matrix_overlap_bottom_k": -1.0626534223556519,
        "effective_rank": -0.45589256286621094,
        "effective_rank_mergeability_score": -0.0176740363240242,
        "stable_rank": 3.9233410358428955,
        "spectral_gap": 0.7201052904129028,
        "singular_value_ratio": 1.2178759574890137,
        "layerwise_effective_rank": 2.1323704719543457,
        "layerwise_effective_rank_mergeability_score": -2.7220802307128906,
        "task_vector_cosine_similarity": 3.2582666873931885,
        "task_vector_l2_distance": -0.9953839182853699,
        "task_vector_dot_product": -1.832664966583252,
        "weight_space_angle": -0.21217532455921173,
        "task_vector_magnitude_ratio": -0.8658031225204468,
        "singular_value_overlap": 0.7611960768699646,
        "subspace_overlap": -0.8659749031066895,
        "right_subspace_overlap": 2.975724458694458,
        "activation_l2_distance": -2.2554304599761963,
        "activation_cosine_similarity": -0.1744387298822403,
        "activation_magnitude_ratio": -0.9056522846221924,
        "activation_dot_product": -3.046515941619873,
        "encoder_gradient_cosine_similarity": -1.2990785837173462,
        "encoder_gradient_l2_distance": 1.9561480283737183,
        "encoder_gradient_dot_product": 0.9673145413398743,
        "input_gradient_cosine_similarity": 1.233271837234497,
        "input_gradient_l2_distance": -2.3193483352661133,
        "input_gradient_dot_product": 0.3733043968677521
      }
    },
    {
      "fold": 9,
      "held_out_task": "PCAM",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 129.15289385590236,
      "train_r2": -14657.794712449748,
      "train_pearson_r": -0.052074453201239324,
      "val_mse": 69.45558093847903,
      "val_r2": -12694.45703577888,
      "val_pearson_r": -0.10921420468525318,
      "n_iterations": 61,
      "coefficients": {
        "right_subspace_overlap_top_k": -1.5904872417449951,
        "right_subspace_overlap_bottom_k": 11.642648696899414,
        "interaction_matrix_overlap_top_k": 0.29719147086143494,
        "interaction_matrix_overlap_bottom_k": 7.720083713531494,
        "effective_rank": -9.165810585021973,
        "effective_rank_mergeability_score": -5.11166524887085,
        "stable_rank": -1.7710902690887451,
        "spectral_gap": 2.1171836853027344,
        "singular_value_ratio": 10.208305358886719,
        "layerwise_effective_rank": 2.640192747116089,
        "layerwise_effective_rank_mergeability_score": -7.662883281707764,
        "task_vector_cosine_similarity": -8.411931037902832,
        "task_vector_l2_distance": -4.628629207611084,
        "task_vector_dot_product": 1.952284336090088,
        "weight_space_angle": -6.973988056182861,
        "task_vector_magnitude_ratio": -4.80656099319458,
        "singular_value_overlap": 7.0379533767700195,
        "subspace_overlap": -4.175378799438477,
        "right_subspace_overlap": 2.670466899871826,
        "activation_l2_distance": -0.1597590148448944,
        "activation_cosine_similarity": -1.4940816164016724,
        "activation_magnitude_ratio": 4.803842544555664,
        "activation_dot_product": -5.444324016571045,
        "encoder_gradient_cosine_similarity": 7.9018168449401855,
        "encoder_gradient_l2_distance": -0.3589640259742737,
        "encoder_gradient_dot_product": -0.9982295632362366,
        "input_gradient_cosine_similarity": -4.663367748260498,
        "input_gradient_l2_distance": 3.6243600845336914,
        "input_gradient_dot_product": 5.712497711181641
      }
    },
    {
      "fold": 10,
      "held_out_task": "FER2013",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 2.6690058884898127,
      "train_r2": -320.61083676842634,
      "train_pearson_r": -0.043235400462969475,
      "val_mse": 1.2480185212554264,
      "val_r2": -339.4952784398235,
      "val_pearson_r": -0.25130685826127325,
      "n_iterations": 75,
      "coefficients": {
        "right_subspace_overlap_top_k": -2.50140380859375,
        "right_subspace_overlap_bottom_k": -0.5798665285110474,
        "interaction_matrix_overlap_top_k": -0.6784877777099609,
        "interaction_matrix_overlap_bottom_k": -1.0597987174987793,
        "effective_rank": -0.8104969263076782,
        "effective_rank_mergeability_score": -0.8499479293823242,
        "stable_rank": -2.146829843521118,
        "spectral_gap": -1.9765623807907104,
        "singular_value_ratio": -0.49744713306427,
        "layerwise_effective_rank": 1.4951201677322388,
        "layerwise_effective_rank_mergeability_score": 0.7636597156524658,
        "task_vector_cosine_similarity": 1.5221542119979858,
        "task_vector_l2_distance": 0.8060214519500732,
        "task_vector_dot_product": 2.4713194370269775,
        "weight_space_angle": -1.752139687538147,
        "task_vector_magnitude_ratio": -0.31196603178977966,
        "singular_value_overlap": 3.0649759769439697,
        "subspace_overlap": -0.9722149968147278,
        "right_subspace_overlap": 0.20870515704154968,
        "activation_l2_distance": 1.2712204456329346,
        "activation_cosine_similarity": 1.2236367464065552,
        "activation_magnitude_ratio": 1.1150524616241455,
        "activation_dot_product": 1.9687656164169312,
        "encoder_gradient_cosine_similarity": -1.3617290258407593,
        "encoder_gradient_l2_distance": -1.7881715297698975,
        "encoder_gradient_dot_product": 1.0752928256988525,
        "input_gradient_cosine_similarity": -0.30209001898765564,
        "input_gradient_l2_distance": 1.3840097188949585,
        "input_gradient_dot_product": 0.2064773440361023
      }
    },
    {
      "fold": 11,
      "held_out_task": "OxfordIIITPet",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 0.6282989375073559,
      "train_r2": -70.95024908161945,
      "train_pearson_r": 0.3738622815918228,
      "val_mse": 0.6165485964885452,
      "val_r2": -138.35795872321586,
      "val_pearson_r": 0.31459427908972176,
      "n_iterations": 87,
      "coefficients": {
        "right_subspace_overlap_top_k": -1.315001368522644,
        "right_subspace_overlap_bottom_k": 1.3763978481292725,
        "interaction_matrix_overlap_top_k": 0.8569925427436829,
        "interaction_matrix_overlap_bottom_k": -0.6160089373588562,
        "effective_rank": 1.223780632019043,
        "effective_rank_mergeability_score": -1.5491119623184204,
        "stable_rank": 1.1705701351165771,
        "spectral_gap": 3.326742172241211,
        "singular_value_ratio": 0.9097999334335327,
        "layerwise_effective_rank": -0.8132448792457581,
        "layerwise_effective_rank_mergeability_score": 1.2627577781677246,
        "task_vector_cosine_similarity": -1.3569707870483398,
        "task_vector_l2_distance": -0.6106877326965332,
        "task_vector_dot_product": 0.02832013927400112,
        "weight_space_angle": -0.15663570165634155,
        "task_vector_magnitude_ratio": 0.27344414591789246,
        "singular_value_overlap": -0.12875863909721375,
        "subspace_overlap": 0.472516268491745,
        "right_subspace_overlap": 1.5037988424301147,
        "activation_l2_distance": -2.2344648838043213,
        "activation_cosine_similarity": -1.5965847969055176,
        "activation_magnitude_ratio": -0.954463541507721,
        "activation_dot_product": 0.6061603426933289,
        "encoder_gradient_cosine_similarity": -0.6170859932899475,
        "encoder_gradient_l2_distance": -1.2262178659439087,
        "encoder_gradient_dot_product": 0.9152623414993286,
        "input_gradient_cosine_similarity": 0.16438953578472137,
        "input_gradient_l2_distance": -0.8857584595680237,
        "input_gradient_dot_product": 0.9521542191505432
      }
    },
    {
      "fold": 12,
      "held_out_task": "STL10",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 0.0904091103398261,
      "train_r2": -9.843834619278965,
      "train_pearson_r": 0.14791731507309872,
      "val_mse": 0.14793653035236384,
      "val_r2": -31.832277778690617,
      "val_pearson_r": -0.09117910105308148,
      "n_iterations": 78,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.28447410464286804,
        "right_subspace_overlap_bottom_k": 0.37607190012931824,
        "interaction_matrix_overlap_top_k": 0.6372925043106079,
        "interaction_matrix_overlap_bottom_k": 0.030611146241426468,
        "effective_rank": 0.2973686456680298,
        "effective_rank_mergeability_score": 0.8983745574951172,
        "stable_rank": -0.5122472047805786,
        "spectral_gap": 0.8805619478225708,
        "singular_value_ratio": 1.6252599954605103,
        "layerwise_effective_rank": -0.0736253559589386,
        "layerwise_effective_rank_mergeability_score": -0.29119908809661865,
        "task_vector_cosine_similarity": -1.1193583011627197,
        "task_vector_l2_distance": -0.5041283965110779,
        "task_vector_dot_product": 0.6420590281486511,
        "weight_space_angle": 0.0042143091559410095,
        "task_vector_magnitude_ratio": -0.06180283799767494,
        "singular_value_overlap": 0.04813191667199135,
        "subspace_overlap": -1.1441785097122192,
        "right_subspace_overlap": -0.19800005853176117,
        "activation_l2_distance": 0.050080444663763046,
        "activation_cosine_similarity": 0.11612047255039215,
        "activation_magnitude_ratio": -0.13010187447071075,
        "activation_dot_product": 0.08857373148202896,
        "encoder_gradient_cosine_similarity": -0.2070336490869522,
        "encoder_gradient_l2_distance": -0.2025892436504364,
        "encoder_gradient_dot_product": 0.5173092484474182,
        "input_gradient_cosine_similarity": -0.4406539797782898,
        "input_gradient_l2_distance": -0.32013511657714844,
        "input_gradient_dot_product": -0.2934861481189728
      }
    },
    {
      "fold": 13,
      "held_out_task": "CIFAR100",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.04864447994443028,
      "train_r2": -4.4219103428336055,
      "train_pearson_r": 0.23422002307141468,
      "val_mse": 0.037214017359573165,
      "val_r2": -6.515963827523398,
      "val_pearson_r": 0.4043282157661675,
      "n_iterations": 93,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.3816080391407013,
        "right_subspace_overlap_bottom_k": -0.0734051764011383,
        "interaction_matrix_overlap_top_k": -0.19777026772499084,
        "interaction_matrix_overlap_bottom_k": -0.20187021791934967,
        "effective_rank": 0.044559743255376816,
        "effective_rank_mergeability_score": -0.07137752324342728,
        "stable_rank": -0.13622617721557617,
        "spectral_gap": 0.4958967864513397,
        "singular_value_ratio": 0.24550095200538635,
        "layerwise_effective_rank": 0.24445851147174835,
        "layerwise_effective_rank_mergeability_score": 0.250678151845932,
        "task_vector_cosine_similarity": -0.760383665561676,
        "task_vector_l2_distance": -0.010225560516119003,
        "task_vector_dot_product": -0.04633937031030655,
        "weight_space_angle": 0.32240912318229675,
        "task_vector_magnitude_ratio": 0.19124853610992432,
        "singular_value_overlap": -0.07350420206785202,
        "subspace_overlap": 0.3234560787677765,
        "right_subspace_overlap": 0.39245858788490295,
        "activation_l2_distance": 0.07360031455755234,
        "activation_cosine_similarity": 0.04187604784965515,
        "activation_magnitude_ratio": 0.13750821352005005,
        "activation_dot_product": 0.17401988804340363,
        "encoder_gradient_cosine_similarity": 0.24301695823669434,
        "encoder_gradient_l2_distance": -0.7339674830436707,
        "encoder_gradient_dot_product": -0.31068679690361023,
        "input_gradient_cosine_similarity": -0.0035382481291890144,
        "input_gradient_l2_distance": 0.30387449264526367,
        "input_gradient_dot_product": -0.2492884248495102
      }
    },
    {
      "fold": 14,
      "held_out_task": "CIFAR10",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.48109693049863156,
      "train_r2": -56.09433643133025,
      "train_pearson_r": 0.09865435633212535,
      "val_mse": 0.19141116876572362,
      "val_r2": -32.39778489517878,
      "val_pearson_r": 0.16249381003485996,
      "n_iterations": 148,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.5058146715164185,
        "right_subspace_overlap_bottom_k": 0.30316370725631714,
        "interaction_matrix_overlap_top_k": -0.6701672077178955,
        "interaction_matrix_overlap_bottom_k": -1.6767505407333374,
        "effective_rank": -1.0418986082077026,
        "effective_rank_mergeability_score": -1.1249085664749146,
        "stable_rank": 1.5329595804214478,
        "spectral_gap": 1.8673521280288696,
        "singular_value_ratio": 0.322973370552063,
        "layerwise_effective_rank": -0.6533414721488953,
        "layerwise_effective_rank_mergeability_score": -0.0036418696399778128,
        "task_vector_cosine_similarity": 0.34141284227371216,
        "task_vector_l2_distance": 0.21577681601047516,
        "task_vector_dot_product": 0.6589002013206482,
        "weight_space_angle": 1.2171396017074585,
        "task_vector_magnitude_ratio": 0.2129986435174942,
        "singular_value_overlap": 0.05986422672867775,
        "subspace_overlap": 0.18912246823310852,
        "right_subspace_overlap": 1.4497933387756348,
        "activation_l2_distance": -1.051953673362732,
        "activation_cosine_similarity": -1.5842688083648682,
        "activation_magnitude_ratio": 0.4754161238670349,
        "activation_dot_product": 1.0739834308624268,
        "encoder_gradient_cosine_similarity": 0.5308530330657959,
        "encoder_gradient_l2_distance": -1.6170066595077515,
        "encoder_gradient_dot_product": -0.5736780762672424,
        "input_gradient_cosine_similarity": 0.31797248125076294,
        "input_gradient_l2_distance": 1.3990367650985718,
        "input_gradient_dot_product": -1.6803231239318848
      }
    },
    {
      "fold": 15,
      "held_out_task": "Food101",
      "n_train_pairs": 162,
      "n_val_pairs": 17,
      "train_mse": 0.8013472947283955,
      "train_r2": -90.3019340128508,
      "train_pearson_r": 0.3895280779641247,
      "val_mse": 0.3337533483637299,
      "val_r2": -64.03464062085165,
      "val_pearson_r": 0.16367048744319096,
      "n_iterations": 138,
      "coefficients": {
        "right_subspace_overlap_top_k": 1.4834988117218018,
        "right_subspace_overlap_bottom_k": -0.1798345446586609,
        "interaction_matrix_overlap_top_k": 0.9859234094619751,
        "interaction_matrix_overlap_bottom_k": 0.6523208022117615,
        "effective_rank": -0.02228233590722084,
        "effective_rank_mergeability_score": 1.7359191179275513,
        "stable_rank": -0.1487327367067337,
        "spectral_gap": -0.8201059699058533,
        "singular_value_ratio": 2.40718674659729,
        "layerwise_effective_rank": 0.18030652403831482,
        "layerwise_effective_rank_mergeability_score": 0.6149301528930664,
        "task_vector_cosine_similarity": -0.4914792776107788,
        "task_vector_l2_distance": -0.8592410087585449,
        "task_vector_dot_product": -0.576869785785675,
        "weight_space_angle": 0.5715564489364624,
        "task_vector_magnitude_ratio": -1.3886194229125977,
        "singular_value_overlap": -0.05007007718086243,
        "subspace_overlap": -0.41944190859794617,
        "right_subspace_overlap": -0.5467223525047302,
        "activation_l2_distance": 0.8541761040687561,
        "activation_cosine_similarity": 0.31001025438308716,
        "activation_magnitude_ratio": 0.6643768548965454,
        "activation_dot_product": 0.005785558372735977,
        "encoder_gradient_cosine_similarity": -0.17310959100723267,
        "encoder_gradient_l2_distance": 0.10446333885192871,
        "encoder_gradient_dot_product": 0.02450977824628353,
        "input_gradient_cosine_similarity": -1.2171050310134888,
        "input_gradient_l2_distance": -2.0441055297851562,
        "input_gradient_dot_product": -0.6601070165634155
      }
    },
    {
      "fold": 16,
      "held_out_task": "FashionMNIST",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_mse": 0.012798093226570426,
      "train_r2": -0.44771663799536676,
      "train_pearson_r": 0.3422840943855713,
      "val_mse": 0.01053885515317851,
      "val_r2": -0.8337247473613099,
      "val_pearson_r": 0.22505471364409702,
      "n_iterations": 1000,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.10291800647974014,
        "right_subspace_overlap_bottom_k": -0.011196163482964039,
        "interaction_matrix_overlap_top_k": 0.07038921117782593,
        "interaction_matrix_overlap_bottom_k": -0.040953923016786575,
        "effective_rank": 0.37952721118927,
        "effective_rank_mergeability_score": -0.10672205686569214,
        "stable_rank": -0.07791873812675476,
        "spectral_gap": 0.46182337403297424,
        "singular_value_ratio": 0.10448933392763138,
        "layerwise_effective_rank": -0.35045599937438965,
        "layerwise_effective_rank_mergeability_score": -0.020919060334563255,
        "task_vector_cosine_similarity": -0.020466333255171776,
        "task_vector_l2_distance": 0.024946006014943123,
        "task_vector_dot_product": -0.05595295503735542,
        "weight_space_angle": 0.498322993516922,
        "task_vector_magnitude_ratio": 0.2790937125682831,
        "singular_value_overlap": 0.032229043543338776,
        "subspace_overlap": 0.26530277729034424,
        "right_subspace_overlap": 0.018046321347355843,
        "activation_l2_distance": 0.043780580163002014,
        "activation_cosine_similarity": 0.2647574543952942,
        "activation_magnitude_ratio": -0.005755880381911993,
        "activation_dot_product": -0.13662073016166687,
        "encoder_gradient_cosine_similarity": 0.05733524262905121,
        "encoder_gradient_l2_distance": -0.2441592961549759,
        "encoder_gradient_dot_product": 0.0722896009683609,
        "input_gradient_cosine_similarity": -0.16251322627067566,
        "input_gradient_l2_distance": -0.12464579939842224,
        "input_gradient_dot_product": -0.11080383509397507
      }
    },
    {
      "fold": 17,
      "held_out_task": "EMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.03578682764451973,
      "train_r2": -3.288264610317599,
      "train_pearson_r": 0.02730707226568575,
      "val_mse": 0.1763068972808706,
      "val_r2": -33.55152019505325,
      "val_pearson_r": -0.20714838354291354,
      "n_iterations": 58,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.010690414346754551,
        "right_subspace_overlap_bottom_k": 0.12489054352045059,
        "interaction_matrix_overlap_top_k": -0.5725926160812378,
        "interaction_matrix_overlap_bottom_k": 0.004905520938336849,
        "effective_rank": 0.03826458752155304,
        "effective_rank_mergeability_score": -0.25639525055885315,
        "stable_rank": -0.1418972909450531,
        "spectral_gap": 0.3307674825191498,
        "singular_value_ratio": 0.4345223307609558,
        "layerwise_effective_rank": -0.036205142736434937,
        "layerwise_effective_rank_mergeability_score": 0.5438504815101624,
        "task_vector_cosine_similarity": -0.15018188953399658,
        "task_vector_l2_distance": 0.020171741023659706,
        "task_vector_dot_product": 0.19447161257266998,
        "weight_space_angle": 0.22015704214572906,
        "task_vector_magnitude_ratio": 0.23959892988204956,
        "singular_value_overlap": 0.08571982383728027,
        "subspace_overlap": 0.08948265761137009,
        "right_subspace_overlap": -0.18515732884407043,
        "activation_l2_distance": -0.19809415936470032,
        "activation_cosine_similarity": -0.1276421844959259,
        "activation_magnitude_ratio": 0.19616670906543732,
        "activation_dot_product": 0.21160471439361572,
        "encoder_gradient_cosine_similarity": -0.05482815206050873,
        "encoder_gradient_l2_distance": 0.09522203356027603,
        "encoder_gradient_dot_product": 0.35039326548576355,
        "input_gradient_cosine_similarity": -0.15093985199928284,
        "input_gradient_l2_distance": -0.15505646169185638,
        "input_gradient_dot_product": -0.1407725065946579
      }
    },
    {
      "fold": 18,
      "held_out_task": "KMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.08283167953641475,
      "train_r2": -13.50497108510949,
      "train_pearson_r": 0.11643061892149215,
      "val_mse": 0.058690768198862445,
      "val_r2": -20.657873303289566,
      "val_pearson_r": 0.22254226743531835,
      "n_iterations": 275,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.04627827927470207,
        "right_subspace_overlap_bottom_k": 0.13383996486663818,
        "interaction_matrix_overlap_top_k": 0.3155333399772644,
        "interaction_matrix_overlap_bottom_k": -0.03545962646603584,
        "effective_rank": -1.3818845748901367,
        "effective_rank_mergeability_score": 0.5992591977119446,
        "stable_rank": 0.7977613210678101,
        "spectral_gap": 0.7771854400634766,
        "singular_value_ratio": -0.9074245691299438,
        "layerwise_effective_rank": 1.1886340379714966,
        "layerwise_effective_rank_mergeability_score": -0.37273573875427246,
        "task_vector_cosine_similarity": 0.08239645510911942,
        "task_vector_l2_distance": 0.06364385783672333,
        "task_vector_dot_product": -0.3109329044818878,
        "weight_space_angle": -0.1050109788775444,
        "task_vector_magnitude_ratio": 1.4821373224258423,
        "singular_value_overlap": 0.1903519481420517,
        "subspace_overlap": -0.700501024723053,
        "right_subspace_overlap": -0.48305609822273254,
        "activation_l2_distance": -0.13290195167064667,
        "activation_cosine_similarity": 0.1057942733168602,
        "activation_magnitude_ratio": 0.01746051199734211,
        "activation_dot_product": -0.08742859959602356,
        "encoder_gradient_cosine_similarity": 0.046080220490694046,
        "encoder_gradient_l2_distance": 0.0939038097858429,
        "encoder_gradient_dot_product": -0.14647765457630157,
        "input_gradient_cosine_similarity": -0.2201589196920395,
        "input_gradient_l2_distance": -0.6147937774658203,
        "input_gradient_dot_product": 0.5594912767410278
      }
    },
    {
      "fold": 19,
      "held_out_task": "RenderedSST2",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_mse": 0.17972445024506042,
      "train_r2": -19.44259636234976,
      "train_pearson_r": 0.11512376183330579,
      "val_mse": 0.15172369582742895,
      "val_r2": -30.499069291999355,
      "val_pearson_r": 0.10937126052866603,
      "n_iterations": 60,
      "coefficients": {
        "right_subspace_overlap_top_k": -0.4076809585094452,
        "right_subspace_overlap_bottom_k": 0.3323339521884918,
        "interaction_matrix_overlap_top_k": 0.5327751636505127,
        "interaction_matrix_overlap_bottom_k": -0.26880088448524475,
        "effective_rank": 0.04236004501581192,
        "effective_rank_mergeability_score": 0.7681547403335571,
        "stable_rank": 0.23454323410987854,
        "spectral_gap": 1.1202551126480103,
        "singular_value_ratio": 0.06692349910736084,
        "layerwise_effective_rank": 0.6882368922233582,
        "layerwise_effective_rank_mergeability_score": -0.9252102375030518,
        "task_vector_cosine_similarity": -0.26449090242385864,
        "task_vector_l2_distance": -0.01246357336640358,
        "task_vector_dot_product": 0.5693683624267578,
        "weight_space_angle": 0.7922592759132385,
        "task_vector_magnitude_ratio": 0.018213743343949318,
        "singular_value_overlap": -0.6248651742935181,
        "subspace_overlap": -0.3018362820148468,
        "right_subspace_overlap": 0.12812311947345734,
        "activation_l2_distance": -0.791545569896698,
        "activation_cosine_similarity": -0.2081284075975418,
        "activation_magnitude_ratio": -0.06427750736474991,
        "activation_dot_product": -0.35151174664497375,
        "encoder_gradient_cosine_similarity": 0.1957361400127411,
        "encoder_gradient_l2_distance": -0.26816436648368835,
        "encoder_gradient_dot_product": -0.005227243062108755,
        "input_gradient_cosine_similarity": -0.2668137848377228,
        "input_gradient_l2_distance": -0.20063166320323944,
        "input_gradient_dot_product": 0.4689571261405945
      }
    }
  ],
  "optimization_params": {
    "objective": "MSE",
    "n_iterations": 1000,
    "learning_rate": 0.01,
    "patience": 50,
    "convergence_threshold": 1e-06
  }
}