{
  "aggregate_metrics": {
    "train_r": 0.6160949380881315,
    "train_p": 0.0,
    "val_r": 0.39261808728019937,
    "val_p": 1.2129172197036777e-14
  },
  "per_fold_stats": {
    "train_r_mean": 0.7517868201672098,
    "train_r_std": 0.02884538250208843,
    "val_r_mean": 0.660020001724191,
    "val_r_std": 0.175968530047121,
    "n_nonzero_mean": 16.7,
    "n_nonzero_std": 3.4219877264537346
  },
  "average_coefficients": {
    "right_subspace_overlap_top_k": 0.006533559411764145,
    "right_subspace_overlap_bottom_k": -0.013745452277362347,
    "interaction_matrix_overlap_top_k": 0.0007507792324759066,
    "interaction_matrix_overlap_bottom_k": -0.0007551218150183558,
    "effective_rank": -0.00019390031229704618,
    "effective_rank_mergeability_score": 0.00017324488726444542,
    "stable_rank": -0.0003448409552220255,
    "spectral_gap": 5.678176967194304e-05,
    "singular_value_ratio": -0.00019888598762918264,
    "layerwise_effective_rank": -0.000934633775614202,
    "layerwise_effective_rank_mergeability_score": 0.0009657128830440342,
    "task_vector_cosine_similarity": 0.0008784830570220947,
    "task_vector_l2_distance": -0.001598607050254941,
    "task_vector_dot_product": -0.0008031550096347928,
    "weight_space_angle": 0.0007103706011548638,
    "task_vector_magnitude_ratio": -0.00014764565275982022,
    "singular_value_overlap": 0.004652106203138828,
    "subspace_overlap": -0.00015884413733147085,
    "right_subspace_overlap": 0.006524239666759968,
    "activation_l2_distance": -0.001410651602782309,
    "activation_cosine_similarity": 0.010578606277704239,
    "activation_magnitude_ratio": -0.001965178642421961,
    "activation_dot_product": 0.002552092308178544,
    "encoder_gradient_cosine_similarity": 0.0013743110466748476,
    "encoder_gradient_l2_distance": -0.020633619278669357,
    "encoder_gradient_dot_product": 0.00018909941718447953,
    "input_gradient_cosine_similarity": 0.0004466112586669624,
    "input_gradient_l2_distance": -0.01256854273378849,
    "input_gradient_dot_product": -0.0014428261201828718
  },
  "coefficient_std": {
    "right_subspace_overlap_top_k": 0.003112143138423562,
    "right_subspace_overlap_bottom_k": 0.002974230330437422,
    "interaction_matrix_overlap_top_k": 0.0014184248866513371,
    "interaction_matrix_overlap_bottom_k": 0.002221831353381276,
    "effective_rank": 0.0005951150669716299,
    "effective_rank_mergeability_score": 0.0006558449240401387,
    "stable_rank": 0.00061770313186571,
    "spectral_gap": 0.0006575153674930334,
    "singular_value_ratio": 0.0006621009088121355,
    "layerwise_effective_rank": 0.0011761042987927794,
    "layerwise_effective_rank_mergeability_score": 0.0013518346240743995,
    "task_vector_cosine_similarity": 0.0014307035598903894,
    "task_vector_l2_distance": 0.002546121599152684,
    "task_vector_dot_product": 0.0013121989322826266,
    "weight_space_angle": 0.0025951683055609465,
    "task_vector_magnitude_ratio": 0.00044738498399965465,
    "singular_value_overlap": 0.0028970432467758656,
    "subspace_overlap": 0.0006064486806280911,
    "right_subspace_overlap": 0.003317583119496703,
    "activation_l2_distance": 0.003036763286218047,
    "activation_cosine_similarity": 0.004192250315099955,
    "activation_magnitude_ratio": 0.0018638529581949115,
    "activation_dot_product": 0.005334638990461826,
    "encoder_gradient_cosine_similarity": 0.0013877105666324496,
    "encoder_gradient_l2_distance": 0.007479684427380562,
    "encoder_gradient_dot_product": 0.0011348134139552712,
    "input_gradient_cosine_similarity": 0.0010888047982007265,
    "input_gradient_l2_distance": 0.006578623317182064,
    "input_gradient_dot_product": 0.0024355826899409294
  },
  "nonzero_frequency": {
    "right_subspace_overlap_top_k": 0.9,
    "right_subspace_overlap_bottom_k": 1.0,
    "interaction_matrix_overlap_top_k": 0.7,
    "interaction_matrix_overlap_bottom_k": 0.4,
    "effective_rank": 0.1,
    "effective_rank_mergeability_score": 0.2,
    "stable_rank": 0.25,
    "spectral_gap": 0.15,
    "singular_value_ratio": 0.1,
    "layerwise_effective_rank": 0.45,
    "layerwise_effective_rank_mergeability_score": 0.45,
    "task_vector_cosine_similarity": 0.6,
    "task_vector_l2_distance": 0.9,
    "task_vector_dot_product": 0.45,
    "weight_space_angle": 0.7,
    "task_vector_magnitude_ratio": 0.1,
    "singular_value_overlap": 1.0,
    "subspace_overlap": 0.2,
    "right_subspace_overlap": 0.95,
    "activation_l2_distance": 0.6,
    "activation_cosine_similarity": 1.0,
    "activation_magnitude_ratio": 0.65,
    "activation_dot_product": 0.75,
    "encoder_gradient_cosine_similarity": 0.65,
    "encoder_gradient_l2_distance": 1.0,
    "encoder_gradient_dot_product": 0.3,
    "input_gradient_cosine_similarity": 0.5,
    "input_gradient_l2_distance": 1.0,
    "input_gradient_dot_product": 0.65
  },
  "fold_results": [
    {
      "fold": 0,
      "held_out_task": "SUN397",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7481909694068627,
      "val_r": 0.5992775650168041,
      "n_iterations": 249,
      "n_nonzero_coefficients": 15,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.012530717067420483,
        "right_subspace_overlap_bottom_k": -0.010567175224423409,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": -0.007265583146363497,
        "task_vector_dot_product": -0.002739576157182455,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0010759008582681417,
        "subspace_overlap": -0.0014187395572662354,
        "right_subspace_overlap": 0.004626449663192034,
        "activation_l2_distance": -0.005564774386584759,
        "activation_cosine_similarity": 0.013644526712596416,
        "activation_magnitude_ratio": -0.007078429684042931,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.03265593945980072,
        "encoder_gradient_dot_product": 0.003226625267416239,
        "input_gradient_cosine_similarity": 0.0011357793118804693,
        "input_gradient_l2_distance": -0.02408108487725258,
        "input_gradient_dot_product": -0.0031399428844451904
      }
    },
    {
      "fold": 1,
      "held_out_task": "Cars",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7431710504276792,
      "val_r": 0.773532513869468,
      "n_iterations": 246,
      "n_nonzero_coefficients": 14,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.006744374055415392,
        "right_subspace_overlap_bottom_k": -0.015159912407398224,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.001012285239994526,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0019757538102567196,
        "task_vector_l2_distance": -0.0021659620106220245,
        "task_vector_dot_product": -0.003647281788289547,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0041620731353759766,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.009679111652076244,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.0127859553322196,
        "activation_magnitude_ratio": -0.0037905664648860693,
        "activation_dot_product": 0.0034611658193171024,
        "encoder_gradient_cosine_similarity": 0.0012327973963692784,
        "encoder_gradient_l2_distance": -0.029647061601281166,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.020900029689073563,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 2,
      "held_out_task": "RESISC45",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.770284137633837,
      "val_r": 0.7799682407632963,
      "n_iterations": 254,
      "n_nonzero_coefficients": 12,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.004590072203427553,
        "right_subspace_overlap_bottom_k": -0.012331862933933735,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.0011516135418787599,
        "layerwise_effective_rank_mergeability_score": 0.0013954895548522472,
        "task_vector_cosine_similarity": 0.001803043414838612,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.003940535243600607,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.008041040040552616,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.010987753048539162,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.013902591541409492,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0016817966243252158,
        "input_gradient_l2_distance": -0.007669184356927872,
        "input_gradient_dot_product": -0.0019293531076982617
      }
    },
    {
      "fold": 3,
      "held_out_task": "EuroSAT",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7602516039096148,
      "val_r": 0.7217151797216658,
      "n_iterations": 215,
      "n_nonzero_coefficients": 17,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0070540024898946285,
        "right_subspace_overlap_bottom_k": -0.013363191857933998,
        "interaction_matrix_overlap_top_k": 0.0026036137714982033,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.0011917052324861288,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.001363168703392148,
        "task_vector_l2_distance": 0.0014569316990673542,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0013206556905061007,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0036546364426612854,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.004652974661439657,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.01106884516775608,
        "activation_magnitude_ratio": -0.0030538314022123814,
        "activation_dot_product": -0.0012146863155066967,
        "encoder_gradient_cosine_similarity": 0.0034809429198503494,
        "encoder_gradient_l2_distance": -0.014749893918633461,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": -0.001192962285131216,
        "input_gradient_l2_distance": -0.004073450341820717,
        "input_gradient_dot_product": -0.004213355947285891
      }
    },
    {
      "fold": 4,
      "held_out_task": "SVHN",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7603276410851849,
      "val_r": 0.7539258253000004,
      "n_iterations": 408,
      "n_nonzero_coefficients": 15,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0045522586442530155,
        "right_subspace_overlap_bottom_k": -0.013780750334262848,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": -0.0010795542038977146,
        "task_vector_dot_product": -0.001647025696001947,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0018149936804547906,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.008705674670636654,
        "activation_l2_distance": -0.001159735256806016,
        "activation_cosine_similarity": 0.009042926132678986,
        "activation_magnitude_ratio": -0.0018150354735553265,
        "activation_dot_product": -0.0010289426427334547,
        "encoder_gradient_cosine_similarity": 0.0019899229519069195,
        "encoder_gradient_l2_distance": -0.0187973715364933,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.002167662139981985,
        "input_gradient_l2_distance": -0.009704677388072014,
        "input_gradient_dot_product": -0.0018674260936677456
      }
    },
    {
      "fold": 5,
      "held_out_task": "GTSRB",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_r": 0.7074396003256063,
      "val_r": 0.7357687453416482,
      "n_iterations": 189,
      "n_nonzero_coefficients": 26,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009449849836528301,
        "right_subspace_overlap_bottom_k": -0.019527455791831017,
        "interaction_matrix_overlap_top_k": 0.002067153574898839,
        "interaction_matrix_overlap_bottom_k": -0.007974611595273018,
        "effective_rank": -0.0023363197688013315,
        "effective_rank_mergeability_score": 0.0015370349865406752,
        "stable_rank": -0.0019564335234463215,
        "spectral_gap": 0.0018869821215048432,
        "singular_value_ratio": -0.0028964770026504993,
        "layerwise_effective_rank": -0.0030851818155497313,
        "layerwise_effective_rank_mergeability_score": 0.005063203629106283,
        "task_vector_cosine_similarity": 0.0034783119335770607,
        "task_vector_l2_distance": -0.0046456605195999146,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.002119320910423994,
        "task_vector_magnitude_ratio": -0.0012774551287293434,
        "singular_value_overlap": 0.0055870614014565945,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.01035038661211729,
        "activation_l2_distance": -0.008249369449913502,
        "activation_cosine_similarity": 0.011532029137015343,
        "activation_magnitude_ratio": -0.003513963194563985,
        "activation_dot_product": 0.016417959704995155,
        "encoder_gradient_cosine_similarity": 0.0016414120327681303,
        "encoder_gradient_l2_distance": -0.026768654584884644,
        "encoder_gradient_dot_product": -0.002022180240601301,
        "input_gradient_cosine_similarity": 0.001455815159715712,
        "input_gradient_l2_distance": -0.022579330950975418,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 6,
      "held_out_task": "MNIST",
      "n_train_pairs": 164,
      "n_val_pairs": 15,
      "train_r": 0.7285658302726996,
      "val_r": 0.6176602237082307,
      "n_iterations": 432,
      "n_nonzero_coefficients": 15,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.006165440194308758,
        "right_subspace_overlap_bottom_k": -0.012949502095580101,
        "interaction_matrix_overlap_top_k": 0.004293820820748806,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0010859213070943952,
        "task_vector_cosine_similarity": 0.0016055600717663765,
        "task_vector_l2_distance": -0.0024656662717461586,
        "task_vector_dot_product": -0.0011569365160539746,
        "weight_space_angle": -0.0030230311676859856,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.006751133129000664,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.006804970558732748,
        "activation_l2_distance": -0.004413940478116274,
        "activation_cosine_similarity": 0.011327262036502361,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0037936631124466658,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.021336624398827553,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.01741921156644821,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 7,
      "held_out_task": "DTD",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.752156309008719,
      "val_r": 0.33574962345194354,
      "n_iterations": 255,
      "n_nonzero_coefficients": 20,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009502003900706768,
        "right_subspace_overlap_bottom_k": -0.01262452732771635,
        "interaction_matrix_overlap_top_k": -0.0012667356058955193,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.002098728669807315,
        "layerwise_effective_rank_mergeability_score": 0.0017820419743657112,
        "task_vector_cosine_similarity": -0.0014584745513275266,
        "task_vector_l2_distance": -0.0018393568461760879,
        "task_vector_dot_product": -0.0014844076940789819,
        "weight_space_angle": -0.0010989318834617734,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0021742028184235096,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.00534145999699831,
        "activation_l2_distance": -0.0013329494977369905,
        "activation_cosine_similarity": 0.011269895359873772,
        "activation_magnitude_ratio": -0.002432728186249733,
        "activation_dot_product": -0.001976557308807969,
        "encoder_gradient_cosine_similarity": 0.0034162115771323442,
        "encoder_gradient_l2_distance": -0.013070221990346909,
        "encoder_gradient_dot_product": 0.0011020120000466704,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.008043471723794937,
        "input_gradient_dot_product": 0.0010765485931187868
      }
    },
    {
      "fold": 8,
      "held_out_task": "Flowers102",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_r": 0.778457596729628,
      "val_r": 0.6210952972721285,
      "n_iterations": 190,
      "n_nonzero_coefficients": 16,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.011022589169442654,
        "right_subspace_overlap_bottom_k": -0.01359769981354475,
        "interaction_matrix_overlap_top_k": 0.0013250161428004503,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": -0.0019358096178621054,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.001214566407725215,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": 0.0021872848737984896,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0013818780425935984,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0019544889219105244,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.004404126666486263,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.01290079951286316,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": -0.0040403567254543304,
        "encoder_gradient_cosine_similarity": 0.002139739226549864,
        "encoder_gradient_l2_distance": -0.015986299142241478,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0016816130373626947,
        "input_gradient_l2_distance": -0.00897759199142456,
        "input_gradient_dot_product": -0.0021365275606513023
      }
    },
    {
      "fold": 9,
      "held_out_task": "PCAM",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7343395509953744,
      "val_r": 0.7056925699712334,
      "n_iterations": 227,
      "n_nonzero_coefficients": 17,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.004223972093313932,
        "right_subspace_overlap_bottom_k": -0.015559379942715168,
        "interaction_matrix_overlap_top_k": -0.0010706133907660842,
        "interaction_matrix_overlap_bottom_k": -0.0014661040622740984,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": -0.0011982677970081568,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0010640679392963648,
        "task_vector_l2_distance": -0.002369387773796916,
        "task_vector_dot_product": -0.0033723576925694942,
        "weight_space_angle": 0.0026097861118614674,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0015802818816155195,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.01186443492770195,
        "activation_l2_distance": 0.001234426163136959,
        "activation_cosine_similarity": -0.0012033339589834213,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": -0.0028877747245132923,
        "encoder_gradient_cosine_similarity": 0.0029054107144474983,
        "encoder_gradient_l2_distance": -0.023722149431705475,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.016218796372413635,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 10,
      "held_out_task": "FER2013",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7661608557842541,
      "val_r": 0.053362010209868194,
      "n_iterations": 361,
      "n_nonzero_coefficients": 14,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0,
        "right_subspace_overlap_bottom_k": -0.012216389179229736,
        "interaction_matrix_overlap_top_k": -0.001155021833255887,
        "interaction_matrix_overlap_bottom_k": 0.0018325723940506577,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0043264091946184635,
        "task_vector_l2_distance": -0.0029646374750882387,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": -0.001866612583398819,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.013985290192067623,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.0,
        "activation_l2_distance": -0.009232636541128159,
        "activation_cosine_similarity": 0.017819851636886597,
        "activation_magnitude_ratio": -0.003793486161157489,
        "activation_dot_product": 0.0073711806908249855,
        "encoder_gradient_cosine_similarity": -0.0010827642399817705,
        "encoder_gradient_l2_distance": -0.04241575673222542,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.004192317835986614,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 11,
      "held_out_task": "OxfordIIITPet",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7263697658691181,
      "val_r": 0.6713162309422045,
      "n_iterations": 172,
      "n_nonzero_coefficients": 20,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.008591187186539173,
        "right_subspace_overlap_bottom_k": -0.015888188034296036,
        "interaction_matrix_overlap_top_k": 0.0012236040784046054,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0015380490804091096,
        "stable_rank": -0.0014365275856107473,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.002062915824353695,
        "layerwise_effective_rank_mergeability_score": 0.0022570728324353695,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": -0.0033128811046481133,
        "task_vector_dot_product": -0.002074360614642501,
        "weight_space_angle": 0.007544898893684149,
        "task_vector_magnitude_ratio": -0.001675457926467061,
        "singular_value_overlap": 0.006201879121363163,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.006583229172974825,
        "activation_l2_distance": -0.002736788708716631,
        "activation_cosine_similarity": 0.006717224605381489,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.009946642443537712,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.025368375703692436,
        "encoder_gradient_dot_product": 0.0017848419956862926,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.025809668004512787,
        "input_gradient_dot_product": 0.0010502231307327747
      }
    },
    {
      "fold": 12,
      "held_out_task": "STL10",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7141512215961582,
      "val_r": 0.7146388872654563,
      "n_iterations": 246,
      "n_nonzero_coefficients": 20,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.006477988790720701,
        "right_subspace_overlap_bottom_k": -0.01586957275867462,
        "interaction_matrix_overlap_top_k": 0.001439468003809452,
        "interaction_matrix_overlap_bottom_k": -0.005393837578594685,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.0034805366303771734,
        "layerwise_effective_rank_mergeability_score": 0.003298304509371519,
        "task_vector_cosine_similarity": 0.002154349349439144,
        "task_vector_l2_distance": 0.002402711659669876,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": -0.004816691391170025,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.006551518104970455,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.008152984082698822,
        "activation_l2_distance": -0.0012436762917786837,
        "activation_cosine_similarity": 0.012276883237063885,
        "activation_magnitude_ratio": -0.0034222682006657124,
        "activation_dot_product": 0.006220444105565548,
        "encoder_gradient_cosine_similarity": 0.0028482056222856045,
        "encoder_gradient_l2_distance": -0.016530562192201614,
        "encoder_gradient_dot_product": -0.002094108145684004,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.008952366188168526,
        "input_gradient_dot_product": -0.009576952084898949
      }
    },
    {
      "fold": 13,
      "held_out_task": "CIFAR100",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7635301634147592,
      "val_r": 0.7610913105785797,
      "n_iterations": 407,
      "n_nonzero_coefficients": 14,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0050262585282325745,
        "right_subspace_overlap_bottom_k": -0.011115268804132938,
        "interaction_matrix_overlap_top_k": 0.001966975163668394,
        "interaction_matrix_overlap_bottom_k": 0.0011622421443462372,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": 0.0019292524084448814,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.005104102659970522,
        "subspace_overlap": -0.0016720357816666365,
        "right_subspace_overlap": 0.008358077146112919,
        "activation_l2_distance": 0.0013096367474645376,
        "activation_cosine_similarity": 0.01058622170239687,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.014159198850393295,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0015916272532194853,
        "input_gradient_l2_distance": -0.005630462896078825,
        "input_gradient_dot_product": -0.0031767101027071476
      }
    },
    {
      "fold": 14,
      "held_out_task": "CIFAR10",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.766218101209297,
      "val_r": 0.7390701149446024,
      "n_iterations": 207,
      "n_nonzero_coefficients": 16,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.005925776436924934,
        "right_subspace_overlap_bottom_k": -0.014833161607384682,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.0010800051968544722,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": -0.0014665044145658612,
        "task_vector_l2_distance": 0.001426997478120029,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0034932238049805164,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.002168960403650999,
        "subspace_overlap": -0.0012775567593052983,
        "right_subspace_overlap": 0.007271558977663517,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.014022769406437874,
        "activation_magnitude_ratio": -0.0013304221211001277,
        "activation_dot_product": -0.00487874262034893,
        "encoder_gradient_cosine_similarity": 0.0014415476471185684,
        "encoder_gradient_l2_distance": -0.015329321846365929,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.002707290928810835,
        "input_gradient_l2_distance": -0.004078666679561138,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 15,
      "held_out_task": "Food101",
      "n_train_pairs": 162,
      "n_val_pairs": 17,
      "train_r": 0.7956610216670765,
      "val_r": 0.8747010435115844,
      "n_iterations": 483,
      "n_nonzero_coefficients": 16,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.004295747727155685,
        "right_subspace_overlap_bottom_k": -0.016013644635677338,
        "interaction_matrix_overlap_top_k": 0.001422423985786736,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0010421816259622574,
        "task_vector_l2_distance": -0.0042541297152638435,
        "task_vector_dot_product": -0.0014868841972202063,
        "weight_space_angle": 0.0035336269065737724,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.008267699740827084,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.011223962530493736,
        "activation_l2_distance": 0.0017476719804108143,
        "activation_cosine_similarity": 0.01102382317185402,
        "activation_magnitude_ratio": -0.0037047727964818478,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.022395748645067215,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": -0.001226160442456603,
        "input_gradient_l2_distance": -0.011874107643961906,
        "input_gradient_dot_product": 0.001316157984547317
      }
    },
    {
      "fold": 16,
      "held_out_task": "FashionMNIST",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7547188768375466,
      "val_r": 0.7748558732854434,
      "n_iterations": 221,
      "n_nonzero_coefficients": 17,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.00760407792404294,
        "right_subspace_overlap_bottom_k": -0.014370042830705643,
        "interaction_matrix_overlap_top_k": 0.001574160996824503,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": -0.0010421441402286291,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": -0.0017165369354188442,
        "layerwise_effective_rank_mergeability_score": 0.0010441081831231713,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": -0.0025996132753789425,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0029484834522008896,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.005239992402493954,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.005312396679073572,
        "activation_l2_distance": 0.0014291038969531655,
        "activation_cosine_similarity": 0.008447161875665188,
        "activation_magnitude_ratio": -0.0018299937946721911,
        "activation_dot_product": 0.003018053714185953,
        "encoder_gradient_cosine_similarity": 0.003176088212057948,
        "encoder_gradient_l2_distance": -0.013487215153872967,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.012485723942518234,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 17,
      "held_out_task": "EMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7577255219804372,
      "val_r": 0.6633278739207144,
      "n_iterations": 181,
      "n_nonzero_coefficients": 13,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.00775822764262557,
        "right_subspace_overlap_bottom_k": -0.013725434429943562,
        "interaction_matrix_overlap_top_k": 0.0017220373265445232,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": 0.0,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0015457309782505035,
        "weight_space_angle": -0.0010860887123271823,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.004191776271909475,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.005399889312684536,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.01317095197737217,
        "activation_magnitude_ratio": -0.0014925316208973527,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0015883739106357098,
        "encoder_gradient_l2_distance": -0.019419198855757713,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.015111020766198635,
        "input_gradient_dot_product": -0.0016323423478752375
      }
    },
    {
      "fold": 18,
      "held_out_task": "KMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.6909546624300167,
      "val_r": 0.5730936455838425,
      "n_iterations": 228,
      "n_nonzero_coefficients": 23,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009156647138297558,
        "right_subspace_overlap_bottom_k": -0.017052579671144485,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": -0.0016760316211730242,
        "effective_rank": -0.0015416864771395922,
        "effective_rank_mergeability_score": 0.0016779573634266853,
        "stable_rank": -0.0012634459417313337,
        "spectral_gap": 0.0011844629188999534,
        "singular_value_ratio": -0.0010812426917254925,
        "layerwise_effective_rank": -0.002825451549142599,
        "layerwise_effective_rank_mergeability_score": 0.0021735485643148422,
        "task_vector_cosine_similarity": 0.0,
        "task_vector_l2_distance": -0.0022471360862255096,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.005696613807231188,
        "subspace_overlap": 0.0011914493516087532,
        "right_subspace_overlap": 0.005685778800398111,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.01300010085105896,
        "activation_magnitude_ratio": -0.00204554945230484,
        "activation_dot_product": 0.00672561302781105,
        "encoder_gradient_cosine_similarity": 0.002708334242925048,
        "encoder_gradient_l2_distance": -0.01860889233648777,
        "encoder_gradient_dot_product": 0.001784797292202711,
        "input_gradient_cosine_similarity": -0.0010702371364459395,
        "input_gradient_l2_distance": -0.01274374034255743,
        "input_gradient_dot_product": -0.0013865700457245111
      }
    },
    {
      "fold": 19,
      "held_out_task": "RenderedSST2",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.8170619227603284,
      "val_r": 0.7305572598251051,
      "n_iterations": 292,
      "n_nonzero_coefficients": 14,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0,
        "right_subspace_overlap_bottom_k": -0.00436329236254096,
        "interaction_matrix_overlap_top_k": -0.0011303190840408206,
        "interaction_matrix_overlap_bottom_k": -0.002598951105028391,
        "effective_rank": 0.0,
        "effective_rank_mergeability_score": -0.0012881439179182053,
        "stable_rank": 0.0,
        "spectral_gap": 0.0,
        "singular_value_ratio": 0.0,
        "layerwise_effective_rank": 0.0,
        "layerwise_effective_rank_mergeability_score": 0.0,
        "task_vector_cosine_similarity": 0.0016817942960187793,
        "task_vector_l2_distance": -0.004165748134255409,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0011468935990706086,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.002938978374004364,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": -0.0019737049005925655,
        "activation_l2_distance": 0.0,
        "activation_cosine_similarity": 0.0011504924623295665,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.010114185512065887,
        "encoder_gradient_cosine_similarity": 0.0,
        "encoder_gradient_l2_distance": -0.014321299269795418,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.01082594133913517,
        "input_gradient_dot_product": -0.0032402712386101484
      }
    }
  ],
  "optimization_params": {
    "lambda_l1": 1.0,
    "n_iterations": 2000,
    "learning_rate": 0.01,
    "patience": 100,
    "convergence_threshold": 1e-05
  }
}