{
  "aggregate_metrics": {
    "train_r": 0.636008860567922,
    "train_p": 0.0,
    "val_r": 0.5470177078810532,
    "val_p": 2.4987981320384807e-29
  },
  "per_fold_stats": {
    "train_r_mean": 0.7603918462942831,
    "train_r_std": 0.019579279887000322,
    "val_r_mean": 0.6028658615960845,
    "val_r_std": 0.18465412726026958,
    "n_nonzero_mean": 23.95,
    "n_nonzero_std": 2.178875856950093
  },
  "average_coefficients": {
    "right_subspace_overlap_top_k": 0.009797555394470692,
    "right_subspace_overlap_bottom_k": -0.0132050272077322,
    "interaction_matrix_overlap_top_k": 0.0029477975331246853,
    "interaction_matrix_overlap_bottom_k": 0.0003103134222328663,
    "effective_rank": 0.05435997247695923,
    "effective_rank_mergeability_score": 0.03510959818959236,
    "stable_rank": 0.0026604225859045982,
    "spectral_gap": 0.040813885629177094,
    "singular_value_ratio": 0.038240864872932434,
    "layerwise_effective_rank": 0.038767118006944656,
    "layerwise_effective_rank_mergeability_score": 0.06595613062381744,
    "task_vector_cosine_similarity": 0.07314308732748032,
    "task_vector_l2_distance": -0.0007742965826764703,
    "task_vector_dot_product": 0.0013161993119865656,
    "weight_space_angle": 0.09051511436700821,
    "task_vector_magnitude_ratio": 0.003563620848581195,
    "singular_value_overlap": 0.004900320433080196,
    "subspace_overlap": 0.0024844533763825893,
    "right_subspace_overlap": 0.007836291566491127,
    "activation_l2_distance": 0.019489126279950142,
    "activation_cosine_similarity": 0.02716594934463501,
    "activation_magnitude_ratio": -2.151332955691032e-05,
    "activation_dot_product": 0.0024511869996786118,
    "encoder_gradient_cosine_similarity": 0.0035849474370479584,
    "encoder_gradient_l2_distance": -0.012591141276061535,
    "encoder_gradient_dot_product": 0.0023450329899787903,
    "input_gradient_cosine_similarity": 0.0026455430779606104,
    "input_gradient_l2_distance": -0.009899117052555084,
    "input_gradient_dot_product": -0.0005513665964826941
  },
  "coefficient_std": {
    "right_subspace_overlap_top_k": 0.004132028203457594,
    "right_subspace_overlap_bottom_k": 0.0032481735106557608,
    "interaction_matrix_overlap_top_k": 0.002315203659236431,
    "interaction_matrix_overlap_bottom_k": 0.0011211164528504014,
    "effective_rank": 0.013905918225646019,
    "effective_rank_mergeability_score": 0.013895738869905472,
    "stable_rank": 0.005912187043577433,
    "spectral_gap": 0.014267207123339176,
    "singular_value_ratio": 0.014404688030481339,
    "layerwise_effective_rank": 0.011149565689265728,
    "layerwise_effective_rank_mergeability_score": 0.015470439568161964,
    "task_vector_cosine_similarity": 0.013256005942821503,
    "task_vector_l2_distance": 0.0018063061870634556,
    "task_vector_dot_product": 0.0019117887131869793,
    "weight_space_angle": 0.012224286794662476,
    "task_vector_magnitude_ratio": 0.003570543136447668,
    "singular_value_overlap": 0.0031920697074383497,
    "subspace_overlap": 0.0027462877333164215,
    "right_subspace_overlap": 0.0035147862508893013,
    "activation_l2_distance": 0.006328114308416843,
    "activation_cosine_similarity": 0.007913737557828426,
    "activation_magnitude_ratio": 0.0006367500172927976,
    "activation_dot_product": 0.0039245146326720715,
    "encoder_gradient_cosine_similarity": 0.0014898671070113778,
    "encoder_gradient_l2_distance": 0.003537653014063835,
    "encoder_gradient_dot_product": 0.002044718014076352,
    "input_gradient_cosine_similarity": 0.0020377766340970993,
    "input_gradient_l2_distance": 0.0044233533553779125,
    "input_gradient_dot_product": 0.0012834820663556457
  },
  "nonzero_frequency": {
    "right_subspace_overlap_top_k": 0.95,
    "right_subspace_overlap_bottom_k": 1.0,
    "interaction_matrix_overlap_top_k": 0.75,
    "interaction_matrix_overlap_bottom_k": 0.5,
    "effective_rank": 1.0,
    "effective_rank_mergeability_score": 1.0,
    "stable_rank": 0.7,
    "spectral_gap": 1.0,
    "singular_value_ratio": 1.0,
    "layerwise_effective_rank": 1.0,
    "layerwise_effective_rank_mergeability_score": 1.0,
    "task_vector_cosine_similarity": 1.0,
    "task_vector_l2_distance": 0.35,
    "task_vector_dot_product": 0.5,
    "weight_space_angle": 1.0,
    "task_vector_magnitude_ratio": 0.85,
    "singular_value_overlap": 0.95,
    "subspace_overlap": 0.65,
    "right_subspace_overlap": 0.95,
    "activation_l2_distance": 1.0,
    "activation_cosine_similarity": 0.95,
    "activation_magnitude_ratio": 0.25,
    "activation_dot_product": 0.65,
    "encoder_gradient_cosine_similarity": 1.0,
    "encoder_gradient_l2_distance": 1.0,
    "encoder_gradient_dot_product": 0.75,
    "input_gradient_cosine_similarity": 0.75,
    "input_gradient_l2_distance": 1.0,
    "input_gradient_dot_product": 0.45
  },
  "fold_results": [
    {
      "fold": 0,
      "held_out_task": "SUN397",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7602331798041058,
      "val_r": 0.6510539049952611,
      "n_iterations": 523,
      "n_nonzero_coefficients": 27,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.008445714600384235,
        "right_subspace_overlap_bottom_k": -0.011657841503620148,
        "interaction_matrix_overlap_top_k": 0.0029467083513736725,
        "interaction_matrix_overlap_bottom_k": 0.0013273607473820448,
        "effective_rank": 0.07485952973365784,
        "effective_rank_mergeability_score": 0.06353633850812912,
        "stable_rank": 0.0027368480805307627,
        "spectral_gap": 0.05328845977783203,
        "singular_value_ratio": 0.05123745650053024,
        "layerwise_effective_rank": 0.01572132110595703,
        "layerwise_effective_rank_mergeability_score": 0.031884338706731796,
        "task_vector_cosine_similarity": 0.06711762398481369,
        "task_vector_l2_distance": -0.0016059931367635727,
        "task_vector_dot_product": 0.0017015370540320873,
        "weight_space_angle": 0.08634822815656662,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0029228224884718657,
        "subspace_overlap": 0.0034175643231719732,
        "right_subspace_overlap": 0.006265781819820404,
        "activation_l2_distance": 0.021642444655299187,
        "activation_cosine_similarity": 0.030540233477950096,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": -0.003029260551556945,
        "encoder_gradient_cosine_similarity": 0.003368286183103919,
        "encoder_gradient_l2_distance": -0.01123195979744196,
        "encoder_gradient_dot_product": 0.001770024886354804,
        "input_gradient_cosine_similarity": 0.003527234308421612,
        "input_gradient_l2_distance": -0.006709341891109943,
        "input_gradient_dot_product": -0.0028918557800352573
      }
    },
    {
      "fold": 1,
      "held_out_task": "Cars",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7176167085603844,
      "val_r": 0.41177178957196153,
      "n_iterations": 343,
      "n_nonzero_coefficients": 24,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.004669573158025742,
        "right_subspace_overlap_bottom_k": -0.01099216565489769,
        "interaction_matrix_overlap_top_k": 0.004512439481914043,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.05276953801512718,
        "effective_rank_mergeability_score": 0.02957974746823311,
        "stable_rank": 0.0020070949103683233,
        "spectral_gap": 0.027874086052179337,
        "singular_value_ratio": 0.02671845071017742,
        "layerwise_effective_rank": 0.04104198142886162,
        "layerwise_effective_rank_mergeability_score": 0.06661196053028107,
        "task_vector_cosine_similarity": 0.08815452456474304,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.00182082480750978,
        "weight_space_angle": 0.10480973869562149,
        "task_vector_magnitude_ratio": 0.0022487209644168615,
        "singular_value_overlap": 0.0050592366605997086,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.009742332622408867,
        "activation_l2_distance": 0.025159010663628578,
        "activation_cosine_similarity": 0.028295285999774933,
        "activation_magnitude_ratio": -0.0010510036954656243,
        "activation_dot_product": 0.0019169820006936789,
        "encoder_gradient_cosine_similarity": 0.002828196156769991,
        "encoder_gradient_l2_distance": -0.013365596532821655,
        "encoder_gradient_dot_product": 0.007661711424589157,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.010095737874507904,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 2,
      "held_out_task": "RESISC45",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7880162645958032,
      "val_r": 0.7843400544464499,
      "n_iterations": 501,
      "n_nonzero_coefficients": 23,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.010806366801261902,
        "right_subspace_overlap_bottom_k": -0.010189277119934559,
        "interaction_matrix_overlap_top_k": 0.0014522207202389836,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.050217628479003906,
        "effective_rank_mergeability_score": 0.02394859492778778,
        "stable_rank": 0.0011691163526847959,
        "spectral_gap": 0.04896235093474388,
        "singular_value_ratio": 0.04874503239989281,
        "layerwise_effective_rank": 0.050663094967603683,
        "layerwise_effective_rank_mergeability_score": 0.08308206498622894,
        "task_vector_cosine_similarity": 0.0639258325099945,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.08094679564237595,
        "task_vector_magnitude_ratio": 0.002564789727330208,
        "singular_value_overlap": 0.001143230707384646,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.010127956047654152,
        "activation_l2_distance": 0.017061220481991768,
        "activation_cosine_similarity": 0.023953117430210114,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0014457179931923747,
        "encoder_gradient_l2_distance": -0.012768558226525784,
        "encoder_gradient_dot_product": 0.003079058602452278,
        "input_gradient_cosine_similarity": 0.002100772690027952,
        "input_gradient_l2_distance": -0.010221216827630997,
        "input_gradient_dot_product": -0.00204808684065938
      }
    },
    {
      "fold": 3,
      "held_out_task": "EuroSAT",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7308945973824823,
      "val_r": 0.717824996492944,
      "n_iterations": 537,
      "n_nonzero_coefficients": 21,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.01427371520549059,
        "right_subspace_overlap_bottom_k": -0.011593079194426537,
        "interaction_matrix_overlap_top_k": 0.002092500915750861,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.03450019657611847,
        "effective_rank_mergeability_score": 0.015147550962865353,
        "stable_rank": 0.0,
        "spectral_gap": 0.061613913625478745,
        "singular_value_ratio": 0.06101638078689575,
        "layerwise_effective_rank": 0.05300329253077507,
        "layerwise_effective_rank_mergeability_score": 0.07978519797325134,
        "task_vector_cosine_similarity": 0.06182216480374336,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.07762698084115982,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0016471717972308397,
        "subspace_overlap": 0.0018850343767553568,
        "right_subspace_overlap": 0.0032013989984989166,
        "activation_l2_distance": 0.019633445888757706,
        "activation_cosine_similarity": 0.026796258985996246,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": -0.0013788886135444045,
        "encoder_gradient_cosine_similarity": 0.0025795770343393087,
        "encoder_gradient_l2_distance": -0.009534932672977448,
        "encoder_gradient_dot_product": 0.0014999577542766929,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.0063939462415874004,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 4,
      "held_out_task": "SVHN",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7542514380757174,
      "val_r": 0.6816973872052975,
      "n_iterations": 456,
      "n_nonzero_coefficients": 25,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.007547907996922731,
        "right_subspace_overlap_bottom_k": -0.011715313419699669,
        "interaction_matrix_overlap_top_k": 0.002972938120365143,
        "interaction_matrix_overlap_bottom_k": 0.0015358394011855125,
        "effective_rank": 0.04620683193206787,
        "effective_rank_mergeability_score": 0.023261424154043198,
        "stable_rank": 0.0020054399501532316,
        "spectral_gap": 0.03964792937040329,
        "singular_value_ratio": 0.03640588000416756,
        "layerwise_effective_rank": 0.0451372005045414,
        "layerwise_effective_rank_mergeability_score": 0.06830679625272751,
        "task_vector_cosine_similarity": 0.08609943836927414,
        "task_vector_l2_distance": -0.0016628848388791084,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.10176257789134979,
        "task_vector_magnitude_ratio": 0.001983778551220894,
        "singular_value_overlap": 0.003393472172319889,
        "subspace_overlap": 0.0028154542669653893,
        "right_subspace_overlap": 0.010908430442214012,
        "activation_l2_distance": 0.017116308212280273,
        "activation_cosine_similarity": 0.02477278560400009,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.00302702933549881,
        "encoder_gradient_l2_distance": -0.012803186662495136,
        "encoder_gradient_dot_product": 0.0049484046176075935,
        "input_gradient_cosine_similarity": 0.0024083571042865515,
        "input_gradient_l2_distance": -0.010994812473654747,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 5,
      "held_out_task": "GTSRB",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_r": 0.7768527625112625,
      "val_r": 0.7182396860478898,
      "n_iterations": 385,
      "n_nonzero_coefficients": 23,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.015918772667646408,
        "right_subspace_overlap_bottom_k": -0.014724748209118843,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": -0.0011686945799738169,
        "effective_rank": 0.05053598806262016,
        "effective_rank_mergeability_score": 0.02877831645309925,
        "stable_rank": 0.0018399882828816772,
        "spectral_gap": 0.03171686455607414,
        "singular_value_ratio": 0.030749991536140442,
        "layerwise_effective_rank": 0.057869888842105865,
        "layerwise_effective_rank_mergeability_score": 0.08553779870271683,
        "task_vector_cosine_similarity": 0.07362072169780731,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.004612022079527378,
        "weight_space_angle": 0.0951140895485878,
        "task_vector_magnitude_ratio": 0.0017794822342693806,
        "singular_value_overlap": 0.003051162464544177,
        "subspace_overlap": 0.0018479579593986273,
        "right_subspace_overlap": 0.005559207871556282,
        "activation_l2_distance": 0.014529400505125523,
        "activation_cosine_similarity": 0.025716286152601242,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0056084864772856236,
        "encoder_gradient_l2_distance": -0.017393313348293304,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0017913318006321788,
        "input_gradient_l2_distance": -0.0026296330615878105,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 6,
      "held_out_task": "MNIST",
      "n_train_pairs": 164,
      "n_val_pairs": 15,
      "train_r": 0.7450440524931728,
      "val_r": 0.6375791075446408,
      "n_iterations": 544,
      "n_nonzero_coefficients": 26,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009240454994142056,
        "right_subspace_overlap_bottom_k": -0.01750115491449833,
        "interaction_matrix_overlap_top_k": 0.004205784760415554,
        "interaction_matrix_overlap_bottom_k": 0.0018196039600297809,
        "effective_rank": 0.043952323496341705,
        "effective_rank_mergeability_score": 0.017774850130081177,
        "stable_rank": 0.0016658131498843431,
        "spectral_gap": 0.04199517145752907,
        "singular_value_ratio": 0.04218101501464844,
        "layerwise_effective_rank": 0.04131656140089035,
        "layerwise_effective_rank_mergeability_score": 0.0722721740603447,
        "task_vector_cosine_similarity": 0.0936817154288292,
        "task_vector_l2_distance": 0.0024831797927618027,
        "task_vector_dot_product": 0.0016784629551693797,
        "weight_space_angle": 0.0999901220202446,
        "task_vector_magnitude_ratio": 0.001234646188095212,
        "singular_value_overlap": 0.004972153343260288,
        "subspace_overlap": 0.0017155504319816828,
        "right_subspace_overlap": 0.005970028229057789,
        "activation_l2_distance": 0.018871041014790535,
        "activation_cosine_similarity": 0.03291245549917221,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0041092014871537685,
        "encoder_gradient_l2_distance": -0.013008715584874153,
        "encoder_gradient_dot_product": 0.001793603296391666,
        "input_gradient_cosine_similarity": 0.001990805845707655,
        "input_gradient_l2_distance": -0.007849989458918571,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 7,
      "held_out_task": "DTD",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7483677923629198,
      "val_r": 0.5284533106735473,
      "n_iterations": 300,
      "n_nonzero_coefficients": 26,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.012204858474433422,
        "right_subspace_overlap_bottom_k": -0.01629316806793213,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.001702398993074894,
        "effective_rank": 0.07184580713510513,
        "effective_rank_mergeability_score": 0.0458490364253521,
        "stable_rank": 0.0010523421224206686,
        "spectral_gap": 0.04726209118962288,
        "singular_value_ratio": 0.04656081646680832,
        "layerwise_effective_rank": 0.03552635759115219,
        "layerwise_effective_rank_mergeability_score": 0.06738368421792984,
        "task_vector_cosine_similarity": 0.04849442094564438,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.003226952161639929,
        "weight_space_angle": 0.06847326457500458,
        "task_vector_magnitude_ratio": 0.004120031371712685,
        "singular_value_overlap": 0.006038963329046965,
        "subspace_overlap": 0.003647236153483391,
        "right_subspace_overlap": 0.00689297029748559,
        "activation_l2_distance": 0.015720492228865623,
        "activation_cosine_similarity": 0.025808341801166534,
        "activation_magnitude_ratio": -0.0010020805057138205,
        "activation_dot_product": 0.002048204652965069,
        "encoder_gradient_cosine_similarity": 0.00337672489695251,
        "encoder_gradient_l2_distance": -0.007687402423471212,
        "encoder_gradient_dot_product": 0.004930244758725166,
        "input_gradient_cosine_similarity": 0.004970982670783997,
        "input_gradient_l2_distance": -0.00799369066953659,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 8,
      "held_out_task": "Flowers102",
      "n_train_pairs": 163,
      "n_val_pairs": 16,
      "train_r": 0.781148710643519,
      "val_r": 0.020921508523550236,
      "n_iterations": 238,
      "n_nonzero_coefficients": 27,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.01875588297843933,
        "right_subspace_overlap_bottom_k": -0.01857205480337143,
        "interaction_matrix_overlap_top_k": 0.0060261269100010395,
        "interaction_matrix_overlap_bottom_k": 0.002129815286025405,
        "effective_rank": 0.030437249690294266,
        "effective_rank_mergeability_score": 0.04226686805486679,
        "stable_rank": 0.028010854497551918,
        "spectral_gap": 0.03271938115358353,
        "singular_value_ratio": 0.029988471418619156,
        "layerwise_effective_rank": 0.03759116679430008,
        "layerwise_effective_rank_mergeability_score": 0.06672268360853195,
        "task_vector_cosine_similarity": 0.06423065066337585,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.08351156115531921,
        "task_vector_magnitude_ratio": 0.0064734951592981815,
        "singular_value_overlap": 0.004665628541260958,
        "subspace_overlap": 0.0010492530418559909,
        "right_subspace_overlap": 0.008152930065989494,
        "activation_l2_distance": 0.026264404878020287,
        "activation_cosine_similarity": 0.03209364414215088,
        "activation_magnitude_ratio": 0.0014579070266336203,
        "activation_dot_product": 0.0015622942009940743,
        "encoder_gradient_cosine_similarity": 0.007643657736480236,
        "encoder_gradient_l2_distance": -0.005689941346645355,
        "encoder_gradient_dot_product": 0.00439385324716568,
        "input_gradient_cosine_similarity": 0.0017851796001195908,
        "input_gradient_l2_distance": -0.010177492164075375,
        "input_gradient_dot_product": -0.003280606120824814
      }
    },
    {
      "fold": 9,
      "held_out_task": "PCAM",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7546796740047407,
      "val_r": 0.45917723731961163,
      "n_iterations": 434,
      "n_nonzero_coefficients": 19,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.01425003819167614,
        "right_subspace_overlap_bottom_k": -0.015362954698503017,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.08198992162942886,
        "effective_rank_mergeability_score": 0.061709463596343994,
        "stable_rank": 0.0,
        "spectral_gap": 0.04505809023976326,
        "singular_value_ratio": 0.0459529347717762,
        "layerwise_effective_rank": 0.03322852402925491,
        "layerwise_effective_rank_mergeability_score": 0.06126109138131142,
        "task_vector_cosine_similarity": 0.0701519101858139,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.001033987384289503,
        "weight_space_angle": 0.0885695293545723,
        "task_vector_magnitude_ratio": 0.002652094466611743,
        "singular_value_overlap": 0.001201228005811572,
        "subspace_overlap": 0.0070610661059618,
        "right_subspace_overlap": 0.006154593545943499,
        "activation_l2_distance": 0.003571803215891123,
        "activation_cosine_similarity": 0.0,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.002797609195113182,
        "encoder_gradient_l2_distance": -0.009565269574522972,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.008328192867338657,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 10,
      "held_out_task": "FER2013",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7774505769074354,
      "val_r": 0.262260509731293,
      "n_iterations": 351,
      "n_nonzero_coefficients": 24,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.005421049427241087,
        "right_subspace_overlap_bottom_k": -0.01392168179154396,
        "interaction_matrix_overlap_top_k": 0.007122265174984932,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.04429490864276886,
        "effective_rank_mergeability_score": 0.05026128143072128,
        "stable_rank": 0.002363264560699463,
        "spectral_gap": 0.03064604662358761,
        "singular_value_ratio": 0.03148643672466278,
        "layerwise_effective_rank": 0.05644480139017105,
        "layerwise_effective_rank_mergeability_score": 0.06181076169013977,
        "task_vector_cosine_similarity": 0.06786061078310013,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.003692965256050229,
        "weight_space_angle": 0.09246567636728287,
        "task_vector_magnitude_ratio": 0.0031891963444650173,
        "singular_value_overlap": 0.010895101353526115,
        "subspace_overlap": 0.0026315962895751,
        "right_subspace_overlap": 0.004787693731486797,
        "activation_l2_distance": 0.014825192280113697,
        "activation_cosine_similarity": 0.025519942864775658,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.010320421308279037,
        "encoder_gradient_cosine_similarity": 0.003907721024006605,
        "encoder_gradient_l2_distance": -0.020473694428801537,
        "encoder_gradient_dot_product": 0.002385206986218691,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.002684214850887656,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 11,
      "held_out_task": "OxfordIIITPet",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7482341179013767,
      "val_r": 0.6881970991990278,
      "n_iterations": 553,
      "n_nonzero_coefficients": 25,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.007779944222420454,
        "right_subspace_overlap_bottom_k": -0.011396615765988827,
        "interaction_matrix_overlap_top_k": 0.006889307405799627,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.03443816304206848,
        "effective_rank_mergeability_score": 0.026144709438085556,
        "stable_rank": 0.003338388167321682,
        "spectral_gap": 0.08523096144199371,
        "singular_value_ratio": 0.08157100528478622,
        "layerwise_effective_rank": 0.016497613862156868,
        "layerwise_effective_rank_mergeability_score": 0.027957962825894356,
        "task_vector_cosine_similarity": 0.047195613384246826,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.07377175241708755,
        "task_vector_magnitude_ratio": 0.005237981211394072,
        "singular_value_overlap": 0.008419957011938095,
        "subspace_overlap": 0.004767949227243662,
        "right_subspace_overlap": 0.011899339966475964,
        "activation_l2_distance": 0.029148584231734276,
        "activation_cosine_similarity": 0.03544263169169426,
        "activation_magnitude_ratio": 0.0014735704753547907,
        "activation_dot_product": 0.0035611253697425127,
        "encoder_gradient_cosine_similarity": 0.004923287313431501,
        "encoder_gradient_l2_distance": -0.012985944747924805,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.006510498002171516,
        "input_gradient_l2_distance": -0.01349292416125536,
        "input_gradient_dot_product": -0.0017649729270488024
      }
    },
    {
      "fold": 12,
      "held_out_task": "STL10",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.7671894062912067,
      "val_r": 0.6816614576531593,
      "n_iterations": 263,
      "n_nonzero_coefficients": 21,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.006454101298004389,
        "right_subspace_overlap_bottom_k": -0.011690374463796616,
        "interaction_matrix_overlap_top_k": 0.0024222172796726227,
        "interaction_matrix_overlap_bottom_k": -0.0011829647701233625,
        "effective_rank": 0.05568665266036987,
        "effective_rank_mergeability_score": 0.02348557487130165,
        "stable_rank": 0.0,
        "spectral_gap": 0.0364292711019516,
        "singular_value_ratio": 0.03643253818154335,
        "layerwise_effective_rank": 0.03481423854827881,
        "layerwise_effective_rank_mergeability_score": 0.07133252918720245,
        "task_vector_cosine_similarity": 0.07468349486589432,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.0810864269733429,
        "task_vector_magnitude_ratio": 0.0,
        "singular_value_overlap": 0.0032288958318531513,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.012098615989089012,
        "activation_l2_distance": 0.02853785827755928,
        "activation_cosine_similarity": 0.03428764268755913,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0027372438926249743,
        "encoder_gradient_l2_distance": -0.008756374940276146,
        "encoder_gradient_dot_product": 0.0017024684930220246,
        "input_gradient_cosine_similarity": 0.002892098855227232,
        "input_gradient_l2_distance": -0.008916477672755718,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 13,
      "held_out_task": "CIFAR100",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7581867424399589,
      "val_r": 0.7533070496886474,
      "n_iterations": 341,
      "n_nonzero_coefficients": 24,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.011465783230960369,
        "right_subspace_overlap_bottom_k": -0.013728480786085129,
        "interaction_matrix_overlap_top_k": 0.004899340681731701,
        "interaction_matrix_overlap_bottom_k": 0.0014261486940085888,
        "effective_rank": 0.05322251841425896,
        "effective_rank_mergeability_score": 0.03790004178881645,
        "stable_rank": 0.0,
        "spectral_gap": 0.04080292955040932,
        "singular_value_ratio": 0.04126583784818649,
        "layerwise_effective_rank": 0.031084634363651276,
        "layerwise_effective_rank_mergeability_score": 0.05987150967121124,
        "task_vector_cosine_similarity": 0.07971974462270737,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.10800260305404663,
        "task_vector_magnitude_ratio": 0.004927343688905239,
        "singular_value_overlap": 0.009436339139938354,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.012137345969676971,
        "activation_l2_distance": 0.012607118114829063,
        "activation_cosine_similarity": 0.023040518164634705,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.011923897080123425,
        "encoder_gradient_cosine_similarity": 0.003225516527891159,
        "encoder_gradient_l2_distance": -0.017860673367977142,
        "encoder_gradient_dot_product": 0.0020174451638013124,
        "input_gradient_cosine_similarity": 0.005515174940228462,
        "input_gradient_l2_distance": -0.014717232435941696,
        "input_gradient_dot_product": 0.0019280363339930773
      }
    },
    {
      "fold": 14,
      "held_out_task": "CIFAR10",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7478595387275223,
      "val_r": 0.7298390483952464,
      "n_iterations": 263,
      "n_nonzero_coefficients": 24,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009577536024153233,
        "right_subspace_overlap_bottom_k": -0.01370878517627716,
        "interaction_matrix_overlap_top_k": 0.0029236427508294582,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.04374336078763008,
        "effective_rank_mergeability_score": 0.01661696285009384,
        "stable_rank": 0.0031623567920178175,
        "spectral_gap": 0.04042035713791847,
        "singular_value_ratio": 0.029365137219429016,
        "layerwise_effective_rank": 0.04092630743980408,
        "layerwise_effective_rank_mergeability_score": 0.07115034759044647,
        "task_vector_cosine_similarity": 0.08090327680110931,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.10043668001890182,
        "task_vector_magnitude_ratio": 0.012022781185805798,
        "singular_value_overlap": 0.0034160849172621965,
        "subspace_overlap": 0.010266268625855446,
        "right_subspace_overlap": 0.006299299653619528,
        "activation_l2_distance": 0.013228144496679306,
        "activation_cosine_similarity": 0.02295910194516182,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0030703824013471603,
        "encoder_gradient_cosine_similarity": 0.004511023405939341,
        "encoder_gradient_l2_distance": -0.013674253597855568,
        "encoder_gradient_dot_product": 0.0022138801869004965,
        "input_gradient_cosine_similarity": 0.0,
        "input_gradient_l2_distance": -0.013520793057978153,
        "input_gradient_dot_product": -0.001087153097614646
      }
    },
    {
      "fold": 15,
      "held_out_task": "Food101",
      "n_train_pairs": 162,
      "n_val_pairs": 17,
      "train_r": 0.8002974180075895,
      "val_r": 0.6050794943750459,
      "n_iterations": 263,
      "n_nonzero_coefficients": 22,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.008655274286866188,
        "right_subspace_overlap_bottom_k": -0.013375569134950638,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.05402281880378723,
        "effective_rank_mergeability_score": 0.04060268774628639,
        "stable_rank": 0.001027920050546527,
        "spectral_gap": 0.023249223828315735,
        "singular_value_ratio": 0.0207726638764143,
        "layerwise_effective_rank": 0.02935105748474598,
        "layerwise_effective_rank_mergeability_score": 0.048227034509181976,
        "task_vector_cosine_similarity": 0.09922850131988525,
        "task_vector_l2_distance": -0.004671430215239525,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.11345893889665604,
        "task_vector_magnitude_ratio": 0.0017736094305291772,
        "singular_value_overlap": 0.00865877140313387,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.013255054131150246,
        "activation_l2_distance": 0.029413189738988876,
        "activation_cosine_similarity": 0.03714824095368385,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0,
        "encoder_gradient_cosine_similarity": 0.0013598236255347729,
        "encoder_gradient_l2_distance": -0.013986648991703987,
        "encoder_gradient_dot_product": 0.0010415377328172326,
        "input_gradient_cosine_similarity": 0.005942468531429768,
        "input_gradient_l2_distance": -0.010748865082859993,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 16,
      "held_out_task": "FashionMNIST",
      "n_train_pairs": 160,
      "n_val_pairs": 19,
      "train_r": 0.769711971300589,
      "val_r": 0.7567865532489317,
      "n_iterations": 320,
      "n_nonzero_coefficients": 23,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.008392978459596634,
        "right_subspace_overlap_bottom_k": -0.013712020590901375,
        "interaction_matrix_overlap_top_k": 0.005573976784944534,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.06935754418373108,
        "effective_rank_mergeability_score": 0.044140227138996124,
        "stable_rank": 0.0,
        "spectral_gap": 0.032377131283283234,
        "singular_value_ratio": 0.02941538766026497,
        "layerwise_effective_rank": 0.03393636271357536,
        "layerwise_effective_rank_mergeability_score": 0.06426579505205154,
        "task_vector_cosine_similarity": 0.06937575340270996,
        "task_vector_l2_distance": -0.001691895187832415,
        "task_vector_dot_product": 0.0,
        "weight_space_angle": 0.084381602704525,
        "task_vector_magnitude_ratio": 0.0012477264972403646,
        "singular_value_overlap": 0.005465805996209383,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.008707348257303238,
        "activation_l2_distance": 0.02093588188290596,
        "activation_cosine_similarity": 0.0296732559800148,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0017937201773747802,
        "encoder_gradient_cosine_similarity": 0.004042437300086021,
        "encoder_gradient_l2_distance": -0.013344700448215008,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.0035255993716418743,
        "input_gradient_l2_distance": -0.01508785504847765,
        "input_gradient_dot_product": -0.0010589327430352569
      }
    },
    {
      "fold": 17,
      "held_out_task": "EMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7432388133180653,
      "val_r": 0.7082440313059885,
      "n_iterations": 443,
      "n_nonzero_coefficients": 27,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.009637675248086452,
        "right_subspace_overlap_bottom_k": -0.015366344712674618,
        "interaction_matrix_overlap_top_k": 0.0036534336395561695,
        "interaction_matrix_overlap_bottom_k": -0.0024863588623702526,
        "effective_rank": 0.06343119591474533,
        "effective_rank_mergeability_score": 0.0312346164137125,
        "stable_rank": 0.0015931872185319662,
        "spectral_gap": 0.026381472125649452,
        "singular_value_ratio": 0.02404179237782955,
        "layerwise_effective_rank": 0.036310821771621704,
        "layerwise_effective_rank_mergeability_score": 0.0838649794459343,
        "task_vector_cosine_similarity": 0.07798988372087479,
        "task_vector_l2_distance": -0.00239388644695282,
        "task_vector_dot_product": 0.006045778747648001,
        "weight_space_angle": 0.0884164422750473,
        "task_vector_magnitude_ratio": 0.001813146285712719,
        "singular_value_overlap": 0.011165876872837543,
        "subspace_overlap": 0.002216784516349435,
        "right_subspace_overlap": 0.003078971989452839,
        "activation_l2_distance": 0.023887399584054947,
        "activation_cosine_similarity": 0.037242159247398376,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.007796066347509623,
        "encoder_gradient_cosine_similarity": 0.004506764933466911,
        "encoder_gradient_l2_distance": -0.01661876030266285,
        "encoder_gradient_dot_product": 0.004786347039043903,
        "input_gradient_cosine_similarity": 0.004541096743196249,
        "input_gradient_l2_distance": -0.02242879383265972,
        "input_gradient_dot_product": 0.0
      }
    },
    {
      "fold": 18,
      "held_out_task": "KMNIST",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7809695131399639,
      "val_r": 0.5721181922635667,
      "n_iterations": 393,
      "n_nonzero_coefficients": 26,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.012453481554985046,
        "right_subspace_overlap_bottom_k": -0.015779249370098114,
        "interaction_matrix_overlap_top_k": 0.0012630483834072948,
        "interaction_matrix_overlap_bottom_k": 0.0011031199246644974,
        "effective_rank": 0.06878064572811127,
        "effective_rank_mergeability_score": 0.04936038330197334,
        "stable_rank": 0.0012358356034383178,
        "spectral_gap": 0.023178022354841232,
        "singular_value_ratio": 0.022180868312716484,
        "layerwise_effective_rank": 0.03474090248346329,
        "layerwise_effective_rank_mergeability_score": 0.06030777841806412,
        "task_vector_cosine_similarity": 0.08493194729089737,
        "task_vector_l2_distance": 0.0,
        "task_vector_dot_product": 0.003767920657992363,
        "weight_space_angle": 0.1044832319021225,
        "task_vector_magnitude_ratio": 0.004190361592918634,
        "singular_value_overlap": 0.0032245106995105743,
        "subspace_overlap": 0.0,
        "right_subspace_overlap": 0.011486532166600227,
        "activation_l2_distance": 0.02102701924741268,
        "activation_cosine_similarity": 0.027701159939169884,
        "activation_magnitude_ratio": 0.0,
        "activation_dot_product": 0.0010130873415619135,
        "encoder_gradient_cosine_similarity": 0.004535524640232325,
        "encoder_gradient_l2_distance": -0.011328227818012238,
        "encoder_gradient_dot_product": 0.002676913980394602,
        "input_gradient_cosine_similarity": 0.001949070836417377,
        "input_gradient_l2_distance": -0.005787009373307228,
        "input_gradient_dot_product": -0.002177067566663027
      }
    },
    {
      "fold": 19,
      "held_out_task": "RenderedSST2",
      "n_train_pairs": 161,
      "n_val_pairs": 18,
      "train_r": 0.7575936474178476,
      "val_r": 0.68876481323963,
      "n_iterations": 243,
      "n_nonzero_coefficients": 22,
      "coefficients": {
        "right_subspace_overlap_top_k": 0.0,
        "right_subspace_overlap_bottom_k": -0.0028196878265589476,
        "interaction_matrix_overlap_top_k": 0.0,
        "interaction_matrix_overlap_bottom_k": 0.0,
        "effective_rank": 0.06290663033723831,
        "effective_rank_mergeability_score": 0.030593276023864746,
        "stable_rank": 0.0,
        "spectral_gap": 0.04742387309670448,
        "singular_value_ratio": 0.028729286044836044,
        "layerwise_effective_rank": 0.050136078149080276,
        "layerwise_effective_rank_mergeability_score": 0.0874861404299736,
        "task_vector_cosine_similarity": 0.06367392092943192,
        "task_vector_l2_distance": -0.005943020805716515,
        "task_vector_dot_product": -0.0012564667267724872,
        "weight_space_angle": 0.07664602994918823,
        "task_vector_magnitude_ratio": 0.013813228346407413,
        "singular_value_overlap": 0.0,
        "subspace_overlap": 0.006367349531501532,
        "right_subspace_overlap": 0.0,
        "activation_l2_distance": 0.01660252921283245,
        "activation_cosine_similarity": 0.0194159634411335,
        "activation_magnitude_ratio": -0.001308659790083766,
        "activation_dot_product": 0.008425704203546047,
        "encoder_gradient_cosine_similarity": 0.0011651107342913747,
        "encoder_gradient_l2_distance": -0.009744662791490555,
        "encoder_gradient_dot_product": 0.0,
        "input_gradient_cosine_similarity": 0.003460187930613756,
        "input_gradient_l2_distance": -0.009204111993312836,
        "input_gradient_dot_product": 0.0013533078599721193
      }
    }
  ],
  "optimization_params": {
    "lambda_l1": 1,
    "n_iterations": 2000,
    "learning_rate": 0.01,
    "patience": 100,
    "convergence_threshold": 1e-05
  }
}