{
  "bert-base-uncased": {
    "robustness": [
      {
        "noise_type": "baseline",
        "noise_level": 0.0,
        "mean_robustness": 0.9999999976158143,
        "std_robustness": 9.081870674204765e-08,
        "ci_95_lower": 0.9999999795954125,
        "ci_95_upper": 1.000000015636216,
        "p_value_uncorrected": 1.0,
        "p_value_bonferroni": 1.0,
        "p_value_fdr": 1.0,
        "effect_size": 0.0,
        "significant_uncorrected": false,
        "significant_bonferroni": false,
        "significant_fdr": false,
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.05,
        "mean_robustness": 0.7965516692399979,
        "std_robustness": 0.07670952254258519,
        "ci_95_lower": 0.7813308357428844,
        "ci_95_upper": 0.8117725027371113,
        "p_value_uncorrected": 4.352679017856285e-67,
        "p_value_bonferroni": 1.3058037053568853e-65,
        "p_value_fdr": 4.663584661988876e-67,
        "effect_size": -2240158.83592871,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.1,
        "mean_robustness": 0.6559821963310242,
        "std_robustness": 0.1101535886766133,
        "ci_95_lower": 0.6341253345388591,
        "ci_95_upper": 0.6778390581231892,
        "p_value_uncorrected": 1.9451074503039028e-78,
        "p_value_bonferroni": 5.835322350911708e-77,
        "p_value_fdr": 8.336174787016726e-78,
        "effect_size": -3787961.9037287524,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.2,
        "mean_robustness": 0.5202380892634392,
        "std_robustness": 0.09990993179099918,
        "ci_95_lower": 0.5004137912350615,
        "ci_95_upper": 0.5400623872918169,
        "p_value_uncorrected": 4.765468286699495e-111,
        "p_value_bonferroni": 1.4296404860098484e-109,
        "p_value_fdr": 3.574101215024621e-110,
        "effect_size": -5282633.122216139,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.05,
        "mean_robustness": 0.9400493943691254,
        "std_robustness": 0.03588363930239303,
        "ci_95_lower": 0.9329293018305622,
        "ci_95_upper": 0.9471694869076885,
        "p_value_uncorrected": 1.1310557586300771e-39,
        "p_value_bonferroni": 3.393167275890231e-38,
        "p_value_fdr": 3.393167275890231e-38,
        "effect_size": -660112.9370512461,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.1,
        "mean_robustness": 0.9445330762863159,
        "std_robustness": 0.03749340258545619,
        "ci_95_lower": 0.9370935717883357,
        "ci_95_upper": 0.951972580784296,
        "p_value_uncorrected": 7.625466758969469e-34,
        "p_value_bonferroni": 2.2876400276908406e-32,
        "p_value_fdr": 2.2876400276908405e-33,
        "effect_size": -610743.3514445549,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.2,
        "mean_robustness": 0.9408951044082642,
        "std_robustness": 0.038111897868602834,
        "ci_95_lower": 0.9333328770277592,
        "ci_95_upper": 0.9484573317887691,
        "p_value_uncorrected": 4.965922864405924e-36,
        "p_value_bonferroni": 1.4897768593217773e-34,
        "p_value_fdr": 8.763393290128102e-36,
        "effect_size": -650800.8683213877,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.05,
        "mean_robustness": 0.9949606329202652,
        "std_robustness": 0.02168361960960856,
        "ci_95_lower": 0.99065813236032,
        "ci_95_upper": 0.9992631334802103,
        "p_value_uncorrected": 0.021137652066227874,
        "p_value_bonferroni": 0.6341295619868362,
        "p_value_fdr": 0.04227530413245575,
        "effect_size": -55488.17943270669,
        "significant_uncorrected": "True",
        "significant_bonferroni": "False",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.1,
        "mean_robustness": 0.9894623190164566,
        "std_robustness": 0.027209425456101485,
        "ci_95_lower": 0.9840633786933758,
        "ci_95_upper": 0.9948612593395375,
        "p_value_uncorrected": 0.00014612848103037336,
        "p_value_bonferroni": 0.004383854430911201,
        "p_value_fdr": 0.00016860978580427697,
        "effect_size": -116029.82444231208,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.2,
        "mean_robustness": 0.9823726111650467,
        "std_robustness": 0.043402689859191546,
        "ci_95_lower": 0.9737605758690785,
        "ci_95_upper": 0.9909846464610149,
        "p_value_uncorrected": 7.026099127066304e-05,
        "p_value_bonferroni": 0.002107829738119891,
        "p_value_fdr": 9.164477122260396e-05,
        "effect_size": -194094.22445129743,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999976158143
      }
    ],
    "causal_circuits": [
      {
        "layer": 0,
        "head": 0,
        "baseline_robustness": 0.6758764004707336,
        "intervention_robustness": 0.7236315762996673,
        "causal_effect": 0.04775517582893374,
        "p_value": 0.0018517939454978421,
        "significant": "True"
      },
      {
        "layer": 0,
        "head": 1,
        "baseline_robustness": 0.6606343346834183,
        "intervention_robustness": 0.6440551716089249,
        "causal_effect": -0.016579163074493364,
        "p_value": 0.45909193892266964,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 2,
        "baseline_robustness": 0.6584893447160721,
        "intervention_robustness": 0.6660674315690994,
        "causal_effect": 0.007578086853027299,
        "p_value": 0.7284863692506958,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 3,
        "baseline_robustness": 0.6506820982694625,
        "intervention_robustness": 0.6545390832424164,
        "causal_effect": 0.0038569849729538364,
        "p_value": 0.8331760731819016,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 4,
        "baseline_robustness": 0.657493736743927,
        "intervention_robustness": 0.662382378578186,
        "causal_effect": 0.004888641834259078,
        "p_value": 0.7898221815612894,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 5,
        "baseline_robustness": 0.6366811597347259,
        "intervention_robustness": 0.643965774178505,
        "causal_effect": 0.007284614443779036,
        "p_value": 0.6702792143963734,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 6,
        "baseline_robustness": 0.6584146595001221,
        "intervention_robustness": 0.6577956074476242,
        "causal_effect": -0.0006190520524979037,
        "p_value": 0.9701139697503499,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 7,
        "baseline_robustness": 0.6385667204856873,
        "intervention_robustness": 0.6712802010774612,
        "causal_effect": 0.032713480591773925,
        "p_value": 0.11890287662728836,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 8,
        "baseline_robustness": 0.6445358580350876,
        "intervention_robustness": 0.6624607157707214,
        "causal_effect": 0.017924857735633792,
        "p_value": 0.2533372493436381,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 9,
        "baseline_robustness": 0.6659842336177826,
        "intervention_robustness": 0.653669918179512,
        "causal_effect": -0.012314315438270551,
        "p_value": 0.46241580063482357,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 10,
        "baseline_robustness": 0.6622464644908905,
        "intervention_robustness": 0.6563738769292832,
        "causal_effect": -0.005872587561607356,
        "p_value": 0.7495067779949546,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 11,
        "baseline_robustness": 0.6713182127475739,
        "intervention_robustness": 0.6322545039653779,
        "causal_effect": -0.039063708782196005,
        "p_value": 0.04449602802484684,
        "significant": "True"
      },
      {
        "layer": 1,
        "head": 0,
        "baseline_robustness": 0.6612526601552964,
        "intervention_robustness": 0.6699390631914138,
        "causal_effect": 0.008686403036117452,
        "p_value": 0.6530828151403354,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 1,
        "baseline_robustness": 0.6506488960981369,
        "intervention_robustness": 0.6506742125749588,
        "causal_effect": 2.5316476821912737e-05,
        "p_value": 0.9988266033873902,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 2,
        "baseline_robustness": 0.6679166632890702,
        "intervention_robustness": 0.655827888250351,
        "causal_effect": -0.01208877503871919,
        "p_value": 0.49722357774542003,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 3,
        "baseline_robustness": 0.6526875936985016,
        "intervention_robustness": 0.6292889529466629,
        "causal_effect": -0.02339864075183873,
        "p_value": 0.211923347156855,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 4,
        "baseline_robustness": 0.6654781812429428,
        "intervention_robustness": 0.6521117615699769,
        "causal_effect": -0.013366419672965923,
        "p_value": 0.43701474669594276,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 5,
        "baseline_robustness": 0.6685063600540161,
        "intervention_robustness": 0.6664624398946762,
        "causal_effect": -0.0020439201593398515,
        "p_value": 0.9073510631228012,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 6,
        "baseline_robustness": 0.6571429151296616,
        "intervention_robustness": 0.6606684291362762,
        "causal_effect": 0.003525514006614605,
        "p_value": 0.8232474998797672,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 7,
        "baseline_robustness": 0.6589364618062973,
        "intervention_robustness": 0.6509845775365829,
        "causal_effect": -0.007951884269714382,
        "p_value": 0.6399997361800978,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 8,
        "baseline_robustness": 0.6559780848026275,
        "intervention_robustness": 0.6601442003250122,
        "causal_effect": 0.004166115522384661,
        "p_value": 0.8039698336604464,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 9,
        "baseline_robustness": 0.6739721858501434,
        "intervention_robustness": 0.6464954286813736,
        "causal_effect": -0.02747675716876985,
        "p_value": 0.12456232465907394,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 10,
        "baseline_robustness": 0.6503397661447525,
        "intervention_robustness": 0.6560980194807052,
        "causal_effect": 0.005758253335952701,
        "p_value": 0.6658138189687113,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 11,
        "baseline_robustness": 0.6496706581115723,
        "intervention_robustness": 0.6713605153560639,
        "causal_effect": 0.021689857244491573,
        "p_value": 0.19231522361331024,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 0,
        "baseline_robustness": 0.6672078150510788,
        "intervention_robustness": 0.6913354194164276,
        "causal_effect": 0.024127604365348776,
        "p_value": 0.16226091589519365,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 1,
        "baseline_robustness": 0.6295170885324478,
        "intervention_robustness": 0.660081775188446,
        "causal_effect": 0.030564686655998208,
        "p_value": 0.0946755292345637,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 2,
        "baseline_robustness": 0.6607312041521073,
        "intervention_robustness": 0.660395040512085,
        "causal_effect": -0.00033616364002231336,
        "p_value": 0.9830350732149238,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 3,
        "baseline_robustness": 0.6582037651538849,
        "intervention_robustness": 0.6481298297643662,
        "causal_effect": -0.010073935389518729,
        "p_value": 0.5796068820989614,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 4,
        "baseline_robustness": 0.6617253839969635,
        "intervention_robustness": 0.658533770442009,
        "causal_effect": -0.0031916135549544533,
        "p_value": 0.8623724954102745,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 5,
        "baseline_robustness": 0.6514932149648667,
        "intervention_robustness": 0.6495918053388595,
        "causal_effect": -0.0019014096260071556,
        "p_value": 0.9083955623399577,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 6,
        "baseline_robustness": 0.6454483437538147,
        "intervention_robustness": 0.6411128294467926,
        "causal_effect": -0.004335514307022059,
        "p_value": 0.8004709710375786,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 7,
        "baseline_robustness": 0.6642504334449768,
        "intervention_robustness": 0.659567973613739,
        "causal_effect": -0.0046824598312378285,
        "p_value": 0.8034750851517647,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 8,
        "baseline_robustness": 0.6531868684291839,
        "intervention_robustness": 0.677313602566719,
        "causal_effect": 0.024126734137535077,
        "p_value": 0.12122657608054802,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 9,
        "baseline_robustness": 0.6662560480833054,
        "intervention_robustness": 0.6420030605792999,
        "causal_effect": -0.024252987504005508,
        "p_value": 0.1871626114350857,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 10,
        "baseline_robustness": 0.6648065596818924,
        "intervention_robustness": 0.6285584020614624,
        "causal_effect": -0.03624815762042999,
        "p_value": 0.05031837930753082,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 11,
        "baseline_robustness": 0.6472573298215866,
        "intervention_robustness": 0.644656121134758,
        "causal_effect": -0.00260120868682856,
        "p_value": 0.8635736935935833,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 0,
        "baseline_robustness": 0.6431741148233414,
        "intervention_robustness": 0.6908127295970917,
        "causal_effect": 0.04763861477375031,
        "p_value": 0.007885394439307043,
        "significant": "True"
      },
      {
        "layer": 3,
        "head": 1,
        "baseline_robustness": 0.6629557090997696,
        "intervention_robustness": 0.654194386601448,
        "causal_effect": -0.008761322498321622,
        "p_value": 0.48430109585226133,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 2,
        "baseline_robustness": 0.6891096073389054,
        "intervention_robustness": 0.6741283357143402,
        "causal_effect": -0.014981271624565196,
        "p_value": 0.39048190194577814,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 3,
        "baseline_robustness": 0.6527283483743668,
        "intervention_robustness": 0.645628035068512,
        "causal_effect": -0.007100313305854811,
        "p_value": 0.6948287415652219,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 4,
        "baseline_robustness": 0.6678435558080673,
        "intervention_robustness": 0.6502738875150681,
        "causal_effect": -0.01756966829299922,
        "p_value": 0.29012629842040477,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 5,
        "baseline_robustness": 0.6507784360647202,
        "intervention_robustness": 0.6678386920690537,
        "causal_effect": 0.017060256004333474,
        "p_value": 0.2515985141781909,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 6,
        "baseline_robustness": 0.6755974173545838,
        "intervention_robustness": 0.6463336604833603,
        "causal_effect": -0.029263756871223512,
        "p_value": 0.0418080099823025,
        "significant": "True"
      },
      {
        "layer": 3,
        "head": 7,
        "baseline_robustness": 0.6638313990831375,
        "intervention_robustness": 0.675655054450035,
        "causal_effect": 0.011823655366897556,
        "p_value": 0.46108009393382765,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 8,
        "baseline_robustness": 0.6671712356805801,
        "intervention_robustness": 0.6515110605955123,
        "causal_effect": -0.015660175085067785,
        "p_value": 0.472569892328509,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 9,
        "baseline_robustness": 0.6697039866447448,
        "intervention_robustness": 0.6579047513008117,
        "causal_effect": -0.011799235343933123,
        "p_value": 0.4858647841296082,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 10,
        "baseline_robustness": 0.6586856949329376,
        "intervention_robustness": 0.6468593621253967,
        "causal_effect": -0.011826332807540862,
        "p_value": 0.5129387171040232,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 11,
        "baseline_robustness": 0.6568719983100891,
        "intervention_robustness": 0.6589099359512329,
        "causal_effect": 0.0020379376411437944,
        "p_value": 0.8940605276765414,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 0,
        "baseline_robustness": 0.6355719721317291,
        "intervention_robustness": 0.6636285436153412,
        "causal_effect": 0.02805657148361207,
        "p_value": 0.06328522004504199,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 1,
        "baseline_robustness": 0.6497792565822601,
        "intervention_robustness": 0.6432086300849914,
        "causal_effect": -0.0065706264972686945,
        "p_value": 0.7097575307920247,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 2,
        "baseline_robustness": 0.643877991437912,
        "intervention_robustness": 0.6514095139503479,
        "causal_effect": 0.007531522512435829,
        "p_value": 0.6060330810640897,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 3,
        "baseline_robustness": 0.661213116645813,
        "intervention_robustness": 0.6600071853399276,
        "causal_effect": -0.0012059313058853105,
        "p_value": 0.9481514514599674,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 4,
        "baseline_robustness": 0.6658973854780197,
        "intervention_robustness": 0.6501384735107422,
        "causal_effect": -0.015758911967277522,
        "p_value": 0.34753397975219924,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 5,
        "baseline_robustness": 0.647088183760643,
        "intervention_robustness": 0.6549148499965668,
        "causal_effect": 0.007826666235923763,
        "p_value": 0.6703193663240083,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 6,
        "baseline_robustness": 0.6527818185091019,
        "intervention_robustness": 0.6695284324884415,
        "causal_effect": 0.01674661397933963,
        "p_value": 0.42510026032927173,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 7,
        "baseline_robustness": 0.6698153042793273,
        "intervention_robustness": 0.64395088493824,
        "causal_effect": -0.025864419341087297,
        "p_value": 0.14065713464270102,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 8,
        "baseline_robustness": 0.6469177460670471,
        "intervention_robustness": 0.6859423410892487,
        "causal_effect": 0.03902459502220157,
        "p_value": 0.016892520776320762,
        "significant": "True"
      },
      {
        "layer": 4,
        "head": 9,
        "baseline_robustness": 0.6700995397567749,
        "intervention_robustness": 0.649907518029213,
        "causal_effect": -0.02019202172756196,
        "p_value": 0.21422665947900307,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 10,
        "baseline_robustness": 0.6455156564712524,
        "intervention_robustness": 0.6744827616214752,
        "causal_effect": 0.028967105150222827,
        "p_value": 0.11817535282291779,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 11,
        "baseline_robustness": 0.6613372457027435,
        "intervention_robustness": 0.6491403502225875,
        "causal_effect": -0.012196895480156011,
        "p_value": 0.44172382908069974,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 0,
        "baseline_robustness": 0.6427461916208267,
        "intervention_robustness": 0.6484438270330429,
        "causal_effect": 0.00569763541221624,
        "p_value": 0.7756483793733261,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 1,
        "baseline_robustness": 0.6569260782003403,
        "intervention_robustness": 0.6599225902557373,
        "causal_effect": 0.0029965120553969715,
        "p_value": 0.8534942739559144,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 2,
        "baseline_robustness": 0.6771428120136261,
        "intervention_robustness": 0.6683900326490402,
        "causal_effect": -0.00875277936458596,
        "p_value": 0.617911580044805,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 3,
        "baseline_robustness": 0.6533959931135178,
        "intervention_robustness": 0.6636480683088303,
        "causal_effect": 0.01025207519531246,
        "p_value": 0.539899860254743,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 4,
        "baseline_robustness": 0.6598560029268264,
        "intervention_robustness": 0.6503653585910797,
        "causal_effect": -0.009490644335746756,
        "p_value": 0.5609032215228473,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 5,
        "baseline_robustness": 0.6659540128707886,
        "intervention_robustness": 0.6669792568683625,
        "causal_effect": 0.0010252439975738659,
        "p_value": 0.9564800144297647,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 6,
        "baseline_robustness": 0.6857139599323273,
        "intervention_robustness": 0.6566329842805863,
        "causal_effect": -0.02908097565174106,
        "p_value": 0.08836675084700524,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 7,
        "baseline_robustness": 0.6547734373807907,
        "intervention_robustness": 0.6555627626180649,
        "causal_effect": 0.0007893252372741522,
        "p_value": 0.9657394905613649,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 8,
        "baseline_robustness": 0.6563591659069061,
        "intervention_robustness": 0.6954499691724777,
        "causal_effect": 0.03909080326557157,
        "p_value": 0.03326216542951001,
        "significant": "True"
      },
      {
        "layer": 5,
        "head": 9,
        "baseline_robustness": 0.6618971079587936,
        "intervention_robustness": 0.666703667640686,
        "causal_effect": 0.004806559681892364,
        "p_value": 0.7772375668454778,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 10,
        "baseline_robustness": 0.6793391817808151,
        "intervention_robustness": 0.6649580973386765,
        "causal_effect": -0.014381084442138592,
        "p_value": 0.38322352164536144,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 11,
        "baseline_robustness": 0.6581213676929474,
        "intervention_robustness": 0.6560061967372894,
        "causal_effect": -0.0021151709556579412,
        "p_value": 0.9108222123786013,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 0,
        "baseline_robustness": 0.6632439875602723,
        "intervention_robustness": 0.6663423085212707,
        "causal_effect": 0.0030983209609984907,
        "p_value": 0.8635801938191091,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 1,
        "baseline_robustness": 0.6564147311449051,
        "intervention_robustness": 0.6461326271295548,
        "causal_effect": -0.010282104015350346,
        "p_value": 0.5138255843908173,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 2,
        "baseline_robustness": 0.6491207391023636,
        "intervention_robustness": 0.6694692772626877,
        "causal_effect": 0.020348538160324114,
        "p_value": 0.30475568180551416,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 3,
        "baseline_robustness": 0.6422784996032714,
        "intervention_robustness": 0.663777095079422,
        "causal_effect": 0.021498595476150517,
        "p_value": 0.24524639379963764,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 4,
        "baseline_robustness": 0.662440767288208,
        "intervention_robustness": 0.6346229821443558,
        "causal_effect": -0.027817785143852247,
        "p_value": 0.07650729064100337,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 5,
        "baseline_robustness": 0.6612807208299637,
        "intervention_robustness": 0.662964499592781,
        "causal_effect": 0.001683778762817334,
        "p_value": 0.9202361069866987,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 6,
        "baseline_robustness": 0.65722292304039,
        "intervention_robustness": 0.6883492606878281,
        "causal_effect": 0.031126337647438107,
        "p_value": 0.10732295062540424,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 7,
        "baseline_robustness": 0.648131058216095,
        "intervention_robustness": 0.6518535184860229,
        "causal_effect": 0.003722460269927952,
        "p_value": 0.8451921051268525,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 8,
        "baseline_robustness": 0.6671382039785385,
        "intervention_robustness": 0.6555117416381836,
        "causal_effect": -0.011626462340354915,
        "p_value": 0.5099544306955246,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 9,
        "baseline_robustness": 0.6783668696880341,
        "intervention_robustness": 0.6662692403793335,
        "causal_effect": -0.012097629308700508,
        "p_value": 0.49234548833129266,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 10,
        "baseline_robustness": 0.6516395950317383,
        "intervention_robustness": 0.6700102925300598,
        "causal_effect": 0.018370697498321542,
        "p_value": 0.2355164862719906,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 11,
        "baseline_robustness": 0.6338256466388702,
        "intervention_robustness": 0.677479956150055,
        "causal_effect": 0.04365430951118476,
        "p_value": 0.0096135194470409,
        "significant": "True"
      },
      {
        "layer": 7,
        "head": 0,
        "baseline_robustness": 0.6424840831756592,
        "intervention_robustness": 0.6538702356815338,
        "causal_effect": 0.011386152505874603,
        "p_value": 0.5222955428904572,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 1,
        "baseline_robustness": 0.6439689987897873,
        "intervention_robustness": 0.63590387403965,
        "causal_effect": -0.00806512475013732,
        "p_value": 0.608183503199114,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 2,
        "baseline_robustness": 0.6495326381921768,
        "intervention_robustness": 0.6684239608049393,
        "causal_effect": 0.018891322612762496,
        "p_value": 0.28835277935031023,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 3,
        "baseline_robustness": 0.652683197259903,
        "intervention_robustness": 0.6576370888948441,
        "causal_effect": 0.004953891634941088,
        "p_value": 0.790149987075148,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 4,
        "baseline_robustness": 0.6614906448125839,
        "intervention_robustness": 0.677989371418953,
        "causal_effect": 0.016498726606369085,
        "p_value": 0.2504877706685709,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 5,
        "baseline_robustness": 0.6521876662969589,
        "intervention_robustness": 0.6398391604423523,
        "causal_effect": -0.012348505854606562,
        "p_value": 0.5243399337673791,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 6,
        "baseline_robustness": 0.6488717234134674,
        "intervention_robustness": 0.6765507996082306,
        "causal_effect": 0.027679076194763175,
        "p_value": 0.11394316978706805,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 7,
        "baseline_robustness": 0.649189612865448,
        "intervention_robustness": 0.6525398427248001,
        "causal_effect": 0.003350229859352183,
        "p_value": 0.8340957666381053,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 8,
        "baseline_robustness": 0.6601366025209426,
        "intervention_robustness": 0.654530126452446,
        "causal_effect": -0.005606476068496624,
        "p_value": 0.7655761951780019,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 9,
        "baseline_robustness": 0.6637888228893281,
        "intervention_robustness": 0.6621075528860092,
        "causal_effect": -0.0016812700033188577,
        "p_value": 0.9121989954976434,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 10,
        "baseline_robustness": 0.663683626651764,
        "intervention_robustness": 0.6484499943256378,
        "causal_effect": -0.015233632326126179,
        "p_value": 0.3610430635439671,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 11,
        "baseline_robustness": 0.6518063104152679,
        "intervention_robustness": 0.68340651512146,
        "causal_effect": 0.03160020470619207,
        "p_value": 0.10789531292002533,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 0,
        "baseline_robustness": 0.6317605608701706,
        "intervention_robustness": 0.672416033744812,
        "causal_effect": 0.04065547287464133,
        "p_value": 0.013025406554848589,
        "significant": "True"
      },
      {
        "layer": 8,
        "head": 1,
        "baseline_robustness": 0.6775336581468582,
        "intervention_robustness": 0.6636064314842224,
        "causal_effect": -0.013927226662635794,
        "p_value": 0.48328475802655946,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 2,
        "baseline_robustness": 0.6597188413143158,
        "intervention_robustness": 0.67268470287323,
        "causal_effect": 0.012965861558914216,
        "p_value": 0.45992614309555013,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 3,
        "baseline_robustness": 0.6718837851285935,
        "intervention_robustness": 0.656401954293251,
        "causal_effect": -0.015481830835342492,
        "p_value": 0.3383913919571543,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 4,
        "baseline_robustness": 0.6665949100255966,
        "intervention_robustness": 0.6687212097644806,
        "causal_effect": 0.0021262997388840077,
        "p_value": 0.899056070299445,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 5,
        "baseline_robustness": 0.6802006554603577,
        "intervention_robustness": 0.6758709990978241,
        "causal_effect": -0.004329656362533574,
        "p_value": 0.792629272958851,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 6,
        "baseline_robustness": 0.6611634838581085,
        "intervention_robustness": 0.6536241614818573,
        "causal_effect": -0.007539322376251212,
        "p_value": 0.6875986177083142,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 7,
        "baseline_robustness": 0.6725930720567703,
        "intervention_robustness": 0.6625382715463638,
        "causal_effect": -0.010054800510406525,
        "p_value": 0.5745539022076964,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 8,
        "baseline_robustness": 0.6462263917922973,
        "intervention_robustness": 0.6467967998981475,
        "causal_effect": 0.0005704081058501842,
        "p_value": 0.9706540395478743,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 9,
        "baseline_robustness": 0.6535814285278321,
        "intervention_robustness": 0.6624345278739929,
        "causal_effect": 0.008853099346160853,
        "p_value": 0.6092676047812311,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 10,
        "baseline_robustness": 0.6715358400344849,
        "intervention_robustness": 0.6472464597225189,
        "causal_effect": -0.024289380311965925,
        "p_value": 0.1809319253309701,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 11,
        "baseline_robustness": 0.6512456828355789,
        "intervention_robustness": 0.6520578420162201,
        "causal_effect": 0.0008121591806411477,
        "p_value": 0.9610188234539148,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 0,
        "baseline_robustness": 0.6522749775648117,
        "intervention_robustness": 0.694429030418396,
        "causal_effect": 0.04215405285358431,
        "p_value": 0.013511877204650942,
        "significant": "True"
      },
      {
        "layer": 9,
        "head": 1,
        "baseline_robustness": 0.6614585590362548,
        "intervention_robustness": 0.6433417475223542,
        "causal_effect": -0.018116811513900655,
        "p_value": 0.374339200115942,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 2,
        "baseline_robustness": 0.6514213985204697,
        "intervention_robustness": 0.6523566442728043,
        "causal_effect": 0.0009352457523346125,
        "p_value": 0.9553693776521568,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 3,
        "baseline_robustness": 0.6460849988460541,
        "intervention_robustness": 0.6640857368707657,
        "causal_effect": 0.018000738024711582,
        "p_value": 0.3465644230869339,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 4,
        "baseline_robustness": 0.6573742419481278,
        "intervention_robustness": 0.6725953233242035,
        "causal_effect": 0.015221081376075696,
        "p_value": 0.28060430849474377,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 5,
        "baseline_robustness": 0.66542440533638,
        "intervention_robustness": 0.6654220700263977,
        "causal_effect": -2.335309982282041e-06,
        "p_value": 0.9999046879774778,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 6,
        "baseline_robustness": 0.6445566320419311,
        "intervention_robustness": 0.6425690937042237,
        "causal_effect": -0.001987538337707484,
        "p_value": 0.9066216151439797,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 7,
        "baseline_robustness": 0.6581244820356369,
        "intervention_robustness": 0.6857342672348022,
        "causal_effect": 0.02760978519916535,
        "p_value": 0.10042156933523383,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 8,
        "baseline_robustness": 0.6545194911956788,
        "intervention_robustness": 0.6446106106042861,
        "causal_effect": -0.00990888059139261,
        "p_value": 0.5541548470374591,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 9,
        "baseline_robustness": 0.6563904005289077,
        "intervention_robustness": 0.6415083628892898,
        "causal_effect": -0.014882037639617929,
        "p_value": 0.42518561331696714,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 10,
        "baseline_robustness": 0.6616090601682663,
        "intervention_robustness": 0.6554013442993164,
        "causal_effect": -0.006207715868949859,
        "p_value": 0.7044246189634553,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 11,
        "baseline_robustness": 0.6832260304689407,
        "intervention_robustness": 0.6337112230062485,
        "causal_effect": -0.04951480746269221,
        "p_value": 0.00737924513846962,
        "significant": "True"
      },
      {
        "layer": 10,
        "head": 0,
        "baseline_robustness": 0.6847087168693542,
        "intervention_robustness": 0.6952015161514282,
        "causal_effect": 0.010492799282074028,
        "p_value": 0.5055171610271446,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 1,
        "baseline_robustness": 0.645798859000206,
        "intervention_robustness": 0.6485897874832154,
        "causal_effect": 0.002790928483009436,
        "p_value": 0.8679250171325936,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 2,
        "baseline_robustness": 0.6560174769163132,
        "intervention_robustness": 0.6563666838407517,
        "causal_effect": 0.00034920692443851653,
        "p_value": 0.9851002586992994,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 3,
        "baseline_robustness": 0.6674723356962204,
        "intervention_robustness": 0.6449071294069291,
        "causal_effect": -0.022565206289291373,
        "p_value": 0.228773789680209,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 4,
        "baseline_robustness": 0.6537136280536652,
        "intervention_robustness": 0.6624088358879089,
        "causal_effect": 0.008695207834243712,
        "p_value": 0.6361947490451689,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 5,
        "baseline_robustness": 0.6578290683031082,
        "intervention_robustness": 0.6649003666639328,
        "causal_effect": 0.007071298360824652,
        "p_value": 0.702264845538989,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 6,
        "baseline_robustness": 0.6509571623802185,
        "intervention_robustness": 0.6674654620885849,
        "causal_effect": 0.016508299708366425,
        "p_value": 0.35453709347477524,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 7,
        "baseline_robustness": 0.6726099866628646,
        "intervention_robustness": 0.6554271632432938,
        "causal_effect": -0.017182823419570892,
        "p_value": 0.2989703753133175,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 8,
        "baseline_robustness": 0.6659724897146225,
        "intervention_robustness": 0.648802649974823,
        "causal_effect": -0.017169839739799575,
        "p_value": 0.22437894998328062,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 9,
        "baseline_robustness": 0.6616037529706955,
        "intervention_robustness": 0.6701390463113784,
        "causal_effect": 0.008535293340682926,
        "p_value": 0.6063163654283938,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 10,
        "baseline_robustness": 0.6684972018003463,
        "intervention_robustness": 0.6264214485883712,
        "causal_effect": -0.0420757532119751,
        "p_value": 0.011616222893934307,
        "significant": "True"
      },
      {
        "layer": 10,
        "head": 11,
        "baseline_robustness": 0.6598271906375885,
        "intervention_robustness": 0.6843588614463806,
        "causal_effect": 0.02453167080879204,
        "p_value": 0.11195658653814716,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 0,
        "baseline_robustness": 0.630605331659317,
        "intervention_robustness": 0.7130343055725098,
        "causal_effect": 0.08242897391319282,
        "p_value": 7.675298655547077e-05,
        "significant": "True"
      },
      {
        "layer": 11,
        "head": 1,
        "baseline_robustness": 0.6380516165494918,
        "intervention_robustness": 0.6747293943166732,
        "causal_effect": 0.036677777767181396,
        "p_value": 0.019047064048109276,
        "significant": "True"
      },
      {
        "layer": 11,
        "head": 2,
        "baseline_robustness": 0.635691990852356,
        "intervention_robustness": 0.6394883275032044,
        "causal_effect": 0.003796336650848442,
        "p_value": 0.8087841066939365,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 3,
        "baseline_robustness": 0.6636300969123841,
        "intervention_robustness": 0.6684467947483063,
        "causal_effect": 0.004816697835922201,
        "p_value": 0.7899200982409609,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 4,
        "baseline_robustness": 0.66512710750103,
        "intervention_robustness": 0.6654177272319793,
        "causal_effect": 0.00029061973094934856,
        "p_value": 0.9831318017160441,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 5,
        "baseline_robustness": 0.657318086028099,
        "intervention_robustness": 0.6685109561681748,
        "causal_effect": 0.01119287014007575,
        "p_value": 0.5790897005650653,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 6,
        "baseline_robustness": 0.6558934617042541,
        "intervention_robustness": 0.6357579576969147,
        "causal_effect": -0.020135504007339433,
        "p_value": 0.21874196286180472,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 7,
        "baseline_robustness": 0.6678207767009735,
        "intervention_robustness": 0.6780989772081375,
        "causal_effect": 0.010278200507163993,
        "p_value": 0.5536264289103622,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 8,
        "baseline_robustness": 0.6514249122142792,
        "intervention_robustness": 0.652271534204483,
        "causal_effect": 0.0008466219902037997,
        "p_value": 0.9530943251073123,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 9,
        "baseline_robustness": 0.6719088250398636,
        "intervention_robustness": 0.6666294538974762,
        "causal_effect": -0.005279371142387412,
        "p_value": 0.7770411479891176,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 10,
        "baseline_robustness": 0.664481696486473,
        "intervention_robustness": 0.6640444856882095,
        "causal_effect": -0.00043721079826353204,
        "p_value": 0.9803334537851011,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 11,
        "baseline_robustness": 0.6671715939044952,
        "intervention_robustness": 0.6416438716650009,
        "causal_effect": -0.025527722239494288,
        "p_value": 0.16086833302412634,
        "significant": "False"
      }
    ],
    "noise_validation_summary": {
      "total": 1000,
      "valid": 779,
      "invalid": 221
    }
  },
  "roberta-base": {
    "robustness": [
      {
        "noise_type": "baseline",
        "noise_level": 0.0,
        "mean_robustness": 0.9999999886751175,
        "std_robustness": 1.3599736713572208e-07,
        "ci_95_lower": 0.9999999616902894,
        "ci_95_upper": 1.0000000156599456,
        "p_value_uncorrected": 1.0,
        "p_value_bonferroni": 1.0,
        "p_value_fdr": 1.0,
        "effect_size": 0.0,
        "significant_uncorrected": false,
        "significant_bonferroni": false,
        "significant_fdr": false,
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.05,
        "mean_robustness": 0.9865679520368577,
        "std_robustness": 0.005768721340397087,
        "ci_95_lower": 0.9854233125696431,
        "ci_95_upper": 0.9877125915040722,
        "p_value_uncorrected": 1.3387353454311563e-58,
        "p_value_bonferroni": 4.016206036293469e-57,
        "p_value_fdr": 1.8255481983152133e-58,
        "effect_size": -98766.88733874548,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.1,
        "mean_robustness": 0.971577622294426,
        "std_robustness": 0.010006285636714593,
        "ci_95_lower": 0.9695921581362253,
        "ci_95_upper": 0.9735630864526267,
        "p_value_uncorrected": 9.192979419022136e-72,
        "p_value_bonferroni": 2.757893825706641e-70,
        "p_value_fdr": 9.509978709333245e-72,
        "effect_size": -208992.0340320021,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.2,
        "mean_robustness": 0.9546733194589615,
        "std_robustness": 0.011735946102334454,
        "ci_95_lower": 0.9523446531391371,
        "ci_95_upper": 0.9570019857787859,
        "p_value_uncorrected": 3.957499170573376e-94,
        "p_value_bonferroni": 1.1872497511720127e-92,
        "p_value_fdr": 1.484062188965016e-93,
        "effect_size": -333290.7847467452,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.05,
        "mean_robustness": 0.9949321079254151,
        "std_robustness": 0.0030548063769511176,
        "ci_95_lower": 0.9943259680657428,
        "ci_95_upper": 0.9955382477850874,
        "p_value_uncorrected": 2.555627452462654e-39,
        "p_value_bonferroni": 7.666882357387961e-38,
        "p_value_fdr": 1.5333764714775922e-38,
        "effect_size": -37264.55045739829,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.1,
        "mean_robustness": 0.9950202679634095,
        "std_robustness": 0.0030098833155805995,
        "ci_95_lower": 0.9944230418137256,
        "ci_95_upper": 0.9956174941130933,
        "p_value_uncorrected": 3.503987912600942e-39,
        "p_value_bonferroni": 1.0511963737802828e-37,
        "p_value_fdr": 5.255981868901414e-38,
        "effect_size": -36616.30233428274,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.2,
        "mean_robustness": 0.9950790017843246,
        "std_robustness": 0.0030175034866017384,
        "ci_95_lower": 0.9944802636273895,
        "ci_95_upper": 0.9956777399412597,
        "p_value_uncorrected": 1.8238752335205258e-38,
        "p_value_bonferroni": 5.471625700561577e-37,
        "p_value_fdr": 4.974205182328707e-38,
        "effect_size": -36184.42764323426,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.05,
        "mean_robustness": 0.999255895614624,
        "std_robustness": 0.0039187446512095195,
        "ci_95_lower": 0.9984783316580677,
        "ci_95_upper": 1.0000334595711804,
        "p_value_uncorrected": 0.05904430377327076,
        "p_value_bonferroni": 1.0,
        "p_value_fdr": 0.1476107594331769,
        "effect_size": -5471.378425664031,
        "significant_uncorrected": "False",
        "significant_bonferroni": false,
        "significant_fdr": "False",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.1,
        "mean_robustness": 0.9969529587030411,
        "std_robustness": 0.01020569859333832,
        "ci_95_lower": 0.9949279266879522,
        "ci_95_upper": 0.9989779907181301,
        "p_value_uncorrected": 0.003187044011592039,
        "p_value_bonferroni": 0.09561132034776117,
        "p_value_fdr": 0.007354716949827782,
        "effect_size": -22405.065893926727,
        "significant_uncorrected": "True",
        "significant_bonferroni": "False",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.2,
        "mean_robustness": 0.9922325152158737,
        "std_robustness": 0.015597931147669173,
        "ci_95_lower": 0.9891375472767067,
        "ci_95_upper": 0.9953274831550408,
        "p_value_uncorrected": 1.381820171609256e-06,
        "p_value_bonferroni": 4.145460514827768e-05,
        "p_value_fdr": 1.5353557462325067e-06,
        "effect_size": -57114.881139515215,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.9999999886751175
      }
    ],
    "causal_circuits": [
      {
        "layer": 0,
        "head": 0,
        "baseline_robustness": 0.9713827979564666,
        "intervention_robustness": 0.9683169269561768,
        "causal_effect": -0.003065871000289877,
        "p_value": 0.08000158155487885,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 1,
        "baseline_robustness": 0.9763878166675568,
        "intervention_robustness": 0.970928465127945,
        "causal_effect": -0.005459351539611856,
        "p_value": 0.012785811543525772,
        "significant": "True"
      },
      {
        "layer": 0,
        "head": 2,
        "baseline_robustness": 0.9727947556972504,
        "intervention_robustness": 0.9714749312400818,
        "causal_effect": -0.0013198244571686057,
        "p_value": 0.4327576247141902,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 3,
        "baseline_robustness": 0.9744447910785675,
        "intervention_robustness": 0.9728151524066925,
        "causal_effect": -0.0016296386718750444,
        "p_value": 0.3360674364544748,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 4,
        "baseline_robustness": 0.9724184799194336,
        "intervention_robustness": 0.9725724184513092,
        "causal_effect": 0.0001539385318755082,
        "p_value": 0.9235242316334786,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 5,
        "baseline_robustness": 0.972213534116745,
        "intervention_robustness": 0.972051031589508,
        "causal_effect": -0.00016250252723704062,
        "p_value": 0.9310685399675739,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 6,
        "baseline_robustness": 0.972708340883255,
        "intervention_robustness": 0.9733817768096924,
        "causal_effect": 0.0006734359264374268,
        "p_value": 0.7724455464822562,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 7,
        "baseline_robustness": 0.9733006918430328,
        "intervention_robustness": 0.970681118965149,
        "causal_effect": -0.0026195728778838356,
        "p_value": 0.26442511110347056,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 8,
        "baseline_robustness": 0.9712250328063965,
        "intervention_robustness": 0.9717614102363586,
        "causal_effect": 0.0005363774299621538,
        "p_value": 0.7912652178557873,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 9,
        "baseline_robustness": 0.9714599907398224,
        "intervention_robustness": 0.9739381539821624,
        "causal_effect": 0.00247816324234007,
        "p_value": 0.2282921576885084,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 10,
        "baseline_robustness": 0.9698416256904602,
        "intervention_robustness": 0.9715410768985748,
        "causal_effect": 0.0016994512081146551,
        "p_value": 0.39100638813778366,
        "significant": "False"
      },
      {
        "layer": 0,
        "head": 11,
        "baseline_robustness": 0.9704317510128021,
        "intervention_robustness": 0.973518226146698,
        "causal_effect": 0.0030864751338959273,
        "p_value": 0.07404855130019741,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 0,
        "baseline_robustness": 0.972779186964035,
        "intervention_robustness": 0.9733043384552001,
        "causal_effect": 0.0005251514911651656,
        "p_value": 0.7864436058190363,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 1,
        "baseline_robustness": 0.9739988160133362,
        "intervention_robustness": 0.9698089456558228,
        "causal_effect": -0.004189870357513392,
        "p_value": 0.027139191361654925,
        "significant": "True"
      },
      {
        "layer": 1,
        "head": 2,
        "baseline_robustness": 0.9740366291999817,
        "intervention_robustness": 0.9741955041885376,
        "causal_effect": 0.00015887498855593485,
        "p_value": 0.9404490613502401,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 3,
        "baseline_robustness": 0.9732755219936371,
        "intervention_robustness": 0.9727485966682434,
        "causal_effect": -0.0005269253253936679,
        "p_value": 0.7826250913680971,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 4,
        "baseline_robustness": 0.9730057072639465,
        "intervention_robustness": 0.9727269172668457,
        "causal_effect": -0.00027878999710084784,
        "p_value": 0.8898616424101417,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 5,
        "baseline_robustness": 0.9711613643169403,
        "intervention_robustness": 0.9731850934028625,
        "causal_effect": 0.002023729085922188,
        "p_value": 0.31796638899344054,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 6,
        "baseline_robustness": 0.970160037279129,
        "intervention_robustness": 0.9725546288490295,
        "causal_effect": 0.002394591569900517,
        "p_value": 0.2580738936053789,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 7,
        "baseline_robustness": 0.971050351858139,
        "intervention_robustness": 0.9725585150718689,
        "causal_effect": 0.0015081632137298184,
        "p_value": 0.41885780871410705,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 8,
        "baseline_robustness": 0.9718717455863952,
        "intervention_robustness": 0.9701810991764068,
        "causal_effect": -0.0016906464099883944,
        "p_value": 0.2956113161909766,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 9,
        "baseline_robustness": 0.9728315305709839,
        "intervention_robustness": 0.9694583821296692,
        "causal_effect": -0.003373148441314755,
        "p_value": 0.012309782428063056,
        "significant": "True"
      },
      {
        "layer": 1,
        "head": 10,
        "baseline_robustness": 0.9710975074768067,
        "intervention_robustness": 0.9717535603046418,
        "causal_effect": 0.0006560528278350963,
        "p_value": 0.7490840374999607,
        "significant": "False"
      },
      {
        "layer": 1,
        "head": 11,
        "baseline_robustness": 0.9714640140533447,
        "intervention_robustness": 0.9698241996765137,
        "causal_effect": -0.001639814376831028,
        "p_value": 0.40535544867558615,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 0,
        "baseline_robustness": 0.9712117660045624,
        "intervention_robustness": 0.9731693243980408,
        "causal_effect": 0.0019575583934784424,
        "p_value": 0.29160124170775076,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 1,
        "baseline_robustness": 0.9734715294837951,
        "intervention_robustness": 0.9712145841121673,
        "causal_effect": -0.0022569453716277943,
        "p_value": 0.2527608128146851,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 2,
        "baseline_robustness": 0.9726533937454224,
        "intervention_robustness": 0.9709031009674072,
        "causal_effect": -0.0017502927780151945,
        "p_value": 0.41673857921939494,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 3,
        "baseline_robustness": 0.9708991849422455,
        "intervention_robustness": 0.9724395191669464,
        "causal_effect": 0.001540334224700901,
        "p_value": 0.3928504908890239,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 4,
        "baseline_robustness": 0.9723612236976623,
        "intervention_robustness": 0.9728323996067048,
        "causal_effect": 0.0004711759090424428,
        "p_value": 0.8082981593315288,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 5,
        "baseline_robustness": 0.974201067686081,
        "intervention_robustness": 0.9722438383102417,
        "causal_effect": -0.001957229375839198,
        "p_value": 0.27342190273897393,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 6,
        "baseline_robustness": 0.9719385015964508,
        "intervention_robustness": 0.9717600011825561,
        "causal_effect": -0.00017850041389466664,
        "p_value": 0.9294595821249727,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 7,
        "baseline_robustness": 0.9719269406795502,
        "intervention_robustness": 0.9739105582237244,
        "causal_effect": 0.0019836175441741988,
        "p_value": 0.4073240114311003,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 8,
        "baseline_robustness": 0.975051805973053,
        "intervention_robustness": 0.9715865683555603,
        "causal_effect": -0.0034652376174926225,
        "p_value": 0.08024416286987582,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 9,
        "baseline_robustness": 0.9727847158908844,
        "intervention_robustness": 0.9717728805541992,
        "causal_effect": -0.0010118353366852517,
        "p_value": 0.5338469756411279,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 10,
        "baseline_robustness": 0.9710427486896515,
        "intervention_robustness": 0.9734300112724305,
        "causal_effect": 0.002387262582778993,
        "p_value": 0.2656597280533669,
        "significant": "False"
      },
      {
        "layer": 2,
        "head": 11,
        "baseline_robustness": 0.9717245852947235,
        "intervention_robustness": 0.9736091732978821,
        "causal_effect": 0.0018845880031586049,
        "p_value": 0.2104782468634945,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 0,
        "baseline_robustness": 0.971600329875946,
        "intervention_robustness": 0.9703825688362122,
        "causal_effect": -0.0012177610397338112,
        "p_value": 0.4735691038067805,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 1,
        "baseline_robustness": 0.9723702991008758,
        "intervention_robustness": 0.972211127281189,
        "causal_effect": -0.00015917181968683192,
        "p_value": 0.9373999550352622,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 2,
        "baseline_robustness": 0.9720251619815826,
        "intervention_robustness": 0.9713415443897248,
        "causal_effect": -0.0006836175918578657,
        "p_value": 0.7295290849562264,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 3,
        "baseline_robustness": 0.9715507113933564,
        "intervention_robustness": 0.9729104948043823,
        "causal_effect": 0.0013597834110259388,
        "p_value": 0.5321560118652532,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 4,
        "baseline_robustness": 0.9745524990558624,
        "intervention_robustness": 0.9724746727943421,
        "causal_effect": -0.0020778262615203102,
        "p_value": 0.25713030820946375,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 5,
        "baseline_robustness": 0.9696747136116027,
        "intervention_robustness": 0.9715275037288665,
        "causal_effect": 0.0018527901172638028,
        "p_value": 0.30535523471379067,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 6,
        "baseline_robustness": 0.9725499892234802,
        "intervention_robustness": 0.9737459063529968,
        "causal_effect": 0.0011959171295166104,
        "p_value": 0.53908661163182,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 7,
        "baseline_robustness": 0.973579363822937,
        "intervention_robustness": 0.9736819005012513,
        "causal_effect": 0.00010253667831428004,
        "p_value": 0.9474178415746766,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 8,
        "baseline_robustness": 0.9731453573703766,
        "intervention_robustness": 0.9710836899280548,
        "causal_effect": -0.002061667442321724,
        "p_value": 0.237948655513017,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 9,
        "baseline_robustness": 0.9747461080551147,
        "intervention_robustness": 0.9716538274288178,
        "causal_effect": -0.0030922806262969615,
        "p_value": 0.13129880645141398,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 10,
        "baseline_robustness": 0.9736562943458558,
        "intervention_robustness": 0.9726021456718444,
        "causal_effect": -0.0010541486740113237,
        "p_value": 0.5302966263923736,
        "significant": "False"
      },
      {
        "layer": 3,
        "head": 11,
        "baseline_robustness": 0.9719939136505127,
        "intervention_robustness": 0.9729030275344849,
        "causal_effect": 0.0009091138839721902,
        "p_value": 0.6369317610095374,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 0,
        "baseline_robustness": 0.9715374410152435,
        "intervention_robustness": 0.9690033733844757,
        "causal_effect": -0.0025340676307678,
        "p_value": 0.23972324312463728,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 1,
        "baseline_robustness": 0.9733973014354705,
        "intervention_robustness": 0.9711508321762085,
        "causal_effect": -0.002246469259262085,
        "p_value": 0.23297225376651978,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 2,
        "baseline_robustness": 0.9718903756141662,
        "intervention_robustness": 0.9715415835380554,
        "causal_effect": -0.0003487920761108265,
        "p_value": 0.8380402190864304,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 3,
        "baseline_robustness": 0.9726193070411682,
        "intervention_robustness": 0.9748583602905273,
        "causal_effect": 0.0022390532493591353,
        "p_value": 0.24048157460344502,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 4,
        "baseline_robustness": 0.9718577885627746,
        "intervention_robustness": 0.9725613868236542,
        "causal_effect": 0.0007035982608796232,
        "p_value": 0.670041334475579,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 5,
        "baseline_robustness": 0.970047093629837,
        "intervention_robustness": 0.9743267381191254,
        "causal_effect": 0.0042796444892884145,
        "p_value": 0.057078272208769404,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 6,
        "baseline_robustness": 0.9715792882442474,
        "intervention_robustness": 0.9722456109523773,
        "causal_effect": 0.0006663227081298295,
        "p_value": 0.6981289697644782,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 7,
        "baseline_robustness": 0.9746072936058044,
        "intervention_robustness": 0.9696605277061462,
        "causal_effect": -0.004946765899658256,
        "p_value": 0.011395892403461604,
        "significant": "True"
      },
      {
        "layer": 4,
        "head": 8,
        "baseline_robustness": 0.9724334597587585,
        "intervention_robustness": 0.973757416009903,
        "causal_effect": 0.0013239562511444536,
        "p_value": 0.4508981584978464,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 9,
        "baseline_robustness": 0.9724090731143952,
        "intervention_robustness": 0.9739317023754119,
        "causal_effect": 0.0015226292610167613,
        "p_value": 0.4477712175777352,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 10,
        "baseline_robustness": 0.9725689971446991,
        "intervention_robustness": 0.9722975981235504,
        "causal_effect": -0.0002713990211486994,
        "p_value": 0.8960543537882841,
        "significant": "False"
      },
      {
        "layer": 4,
        "head": 11,
        "baseline_robustness": 0.9712025749683381,
        "intervention_robustness": 0.9704731786251068,
        "causal_effect": -0.0007293963432312145,
        "p_value": 0.7131838719209294,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 0,
        "baseline_robustness": 0.971741931438446,
        "intervention_robustness": 0.9712178075313568,
        "causal_effect": -0.0005241239070892245,
        "p_value": 0.7740706913719473,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 1,
        "baseline_robustness": 0.9725571811199188,
        "intervention_robustness": 0.9709967696666717,
        "causal_effect": -0.0015604114532471458,
        "p_value": 0.450993443300589,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 2,
        "baseline_robustness": 0.9712597596645355,
        "intervention_robustness": 0.9714588916301727,
        "causal_effect": 0.00019913196563725588,
        "p_value": 0.9083295027771074,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 3,
        "baseline_robustness": 0.9706541669368743,
        "intervention_robustness": 0.9719788265228272,
        "causal_effect": 0.0013246595859528476,
        "p_value": 0.45871853850580524,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 4,
        "baseline_robustness": 0.9709790635108948,
        "intervention_robustness": 0.9746481561660767,
        "causal_effect": 0.0036690926551818936,
        "p_value": 0.09200031984541046,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 5,
        "baseline_robustness": 0.9717465949058532,
        "intervention_robustness": 0.9732609355449676,
        "causal_effect": 0.0015143406391143754,
        "p_value": 0.32567822082687825,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 6,
        "baseline_robustness": 0.9711394441127777,
        "intervention_robustness": 0.9719513881206513,
        "causal_effect": 0.000811944007873544,
        "p_value": 0.6693669190050355,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 7,
        "baseline_robustness": 0.9715654754638672,
        "intervention_robustness": 0.9733945393562317,
        "causal_effect": 0.0018290638923644797,
        "p_value": 0.3051854780168966,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 8,
        "baseline_robustness": 0.9717983222007751,
        "intervention_robustness": 0.9711916828155518,
        "causal_effect": -0.0006066393852233176,
        "p_value": 0.7715001242132113,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 9,
        "baseline_robustness": 0.9727548837661744,
        "intervention_robustness": 0.9705929219722748,
        "causal_effect": -0.0021619617938996027,
        "p_value": 0.22190095239015556,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 10,
        "baseline_robustness": 0.9722382652759552,
        "intervention_robustness": 0.97010378241539,
        "causal_effect": -0.002134482860565279,
        "p_value": 0.327067518122923,
        "significant": "False"
      },
      {
        "layer": 5,
        "head": 11,
        "baseline_robustness": 0.9733694183826447,
        "intervention_robustness": 0.9741451120376587,
        "causal_effect": 0.0007756936550140647,
        "p_value": 0.6793850662059666,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 0,
        "baseline_robustness": 0.9705057525634766,
        "intervention_robustness": 0.9709118235111237,
        "causal_effect": 0.00040607094764710805,
        "p_value": 0.8250305371371105,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 1,
        "baseline_robustness": 0.9731006586551666,
        "intervention_robustness": 0.9719018042087555,
        "causal_effect": -0.0011988544464111017,
        "p_value": 0.5482943350534273,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 2,
        "baseline_robustness": 0.9721339321136475,
        "intervention_robustness": 0.9710036301612854,
        "causal_effect": -0.0011303019523620783,
        "p_value": 0.5078163438061156,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 3,
        "baseline_robustness": 0.9726489174365998,
        "intervention_robustness": 0.9745718491077423,
        "causal_effect": 0.001922931671142547,
        "p_value": 0.23518924009202696,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 4,
        "baseline_robustness": 0.9721092796325683,
        "intervention_robustness": 0.9705016350746155,
        "causal_effect": -0.0016076445579528142,
        "p_value": 0.40893673352118276,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 5,
        "baseline_robustness": 0.9718184280395508,
        "intervention_robustness": 0.9723365330696105,
        "causal_effect": 0.0005181050300597789,
        "p_value": 0.7955521104820897,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 6,
        "baseline_robustness": 0.9721925318241119,
        "intervention_robustness": 0.9729939675331116,
        "causal_effect": 0.0008014357089997093,
        "p_value": 0.5959890911419499,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 7,
        "baseline_robustness": 0.9734303534030915,
        "intervention_robustness": 0.9732343792915344,
        "causal_effect": -0.00019597411155702904,
        "p_value": 0.9126694428827586,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 8,
        "baseline_robustness": 0.9696479892730713,
        "intervention_robustness": 0.970510116815567,
        "causal_effect": 0.0008621275424957053,
        "p_value": 0.6827310093302419,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 9,
        "baseline_robustness": 0.9693702232837677,
        "intervention_robustness": 0.9712760269641876,
        "causal_effect": 0.0019058036804199263,
        "p_value": 0.3466331244586188,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 10,
        "baseline_robustness": 0.9733528196811676,
        "intervention_robustness": 0.9705304539203644,
        "causal_effect": -0.0028223657608031916,
        "p_value": 0.12985567815940074,
        "significant": "False"
      },
      {
        "layer": 6,
        "head": 11,
        "baseline_robustness": 0.9713087952136994,
        "intervention_robustness": 0.9730315589904786,
        "causal_effect": 0.001722763776779157,
        "p_value": 0.3587998384299259,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 0,
        "baseline_robustness": 0.9730540180206299,
        "intervention_robustness": 0.9746546137332917,
        "causal_effect": 0.001600595712661801,
        "p_value": 0.2966429456861679,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 1,
        "baseline_robustness": 0.9733521187305451,
        "intervention_robustness": 0.974489860534668,
        "causal_effect": 0.0011377418041229603,
        "p_value": 0.509371250156414,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 2,
        "baseline_robustness": 0.9710315418243408,
        "intervention_robustness": 0.9718550336360932,
        "causal_effect": 0.0008234918117523415,
        "p_value": 0.6664500867019636,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 3,
        "baseline_robustness": 0.9704476499557495,
        "intervention_robustness": 0.9703639507293701,
        "causal_effect": -8.369922637940785e-05,
        "p_value": 0.9623692032390105,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 4,
        "baseline_robustness": 0.9717488372325898,
        "intervention_robustness": 0.968613098859787,
        "causal_effect": -0.0031357383728027877,
        "p_value": 0.1758883910993173,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 5,
        "baseline_robustness": 0.973163937330246,
        "intervention_robustness": 0.9736127984523774,
        "causal_effect": 0.0004488611221313832,
        "p_value": 0.8275319800825588,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 6,
        "baseline_robustness": 0.9712604558467866,
        "intervention_robustness": 0.9742214345932007,
        "causal_effect": 0.0029609787464141,
        "p_value": 0.18446039688598884,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 7,
        "baseline_robustness": 0.9727919363975525,
        "intervention_robustness": 0.9730232322216034,
        "causal_effect": 0.00023129582405090332,
        "p_value": 0.9039415779309239,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 8,
        "baseline_robustness": 0.9738076484203338,
        "intervention_robustness": 0.9717133331298828,
        "causal_effect": -0.00209431529045101,
        "p_value": 0.2058390763514954,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 9,
        "baseline_robustness": 0.9727798330783844,
        "intervention_robustness": 0.9708637583255768,
        "causal_effect": -0.0019160747528076394,
        "p_value": 0.24681569868107117,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 10,
        "baseline_robustness": 0.9713419878482819,
        "intervention_robustness": 0.9733771657943726,
        "causal_effect": 0.0020351779460907604,
        "p_value": 0.3215914439816799,
        "significant": "False"
      },
      {
        "layer": 7,
        "head": 11,
        "baseline_robustness": 0.9715322780609131,
        "intervention_robustness": 0.9745054638385773,
        "causal_effect": 0.0029731857776641446,
        "p_value": 0.12732672745260884,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 0,
        "baseline_robustness": 0.9734934842586518,
        "intervention_robustness": 0.9684637653827667,
        "causal_effect": -0.005029718875885103,
        "p_value": 0.020475747144539858,
        "significant": "True"
      },
      {
        "layer": 8,
        "head": 1,
        "baseline_robustness": 0.9736544549465179,
        "intervention_robustness": 0.9746213161945343,
        "causal_effect": 0.0009668612480163707,
        "p_value": 0.5213261682373922,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 2,
        "baseline_robustness": 0.9734615278244019,
        "intervention_robustness": 0.9734720432758331,
        "causal_effect": 1.0515451431269973e-05,
        "p_value": 0.9957152127957156,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 3,
        "baseline_robustness": 0.9707346785068512,
        "intervention_robustness": 0.9744316256046295,
        "causal_effect": 0.003696947097778236,
        "p_value": 0.03979253876329644,
        "significant": "True"
      },
      {
        "layer": 8,
        "head": 4,
        "baseline_robustness": 0.9731038558483124,
        "intervention_robustness": 0.9731187808513642,
        "causal_effect": 1.4925003051780017e-05,
        "p_value": 0.9934837091550512,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 5,
        "baseline_robustness": 0.9713586258888245,
        "intervention_robustness": 0.9725768911838532,
        "causal_effect": 0.0012182652950286643,
        "p_value": 0.5695049720923325,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 6,
        "baseline_robustness": 0.9726810443401337,
        "intervention_robustness": 0.9715283405780792,
        "causal_effect": -0.0011527037620544522,
        "p_value": 0.5918437833146248,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 7,
        "baseline_robustness": 0.9728698062896729,
        "intervention_robustness": 0.9736568295955658,
        "causal_effect": 0.0007870233058928644,
        "p_value": 0.6478426626852735,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 8,
        "baseline_robustness": 0.9721347153186798,
        "intervention_robustness": 0.971013560295105,
        "causal_effect": -0.0011211550235747936,
        "p_value": 0.5839925827324055,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 9,
        "baseline_robustness": 0.9727506387233734,
        "intervention_robustness": 0.9725578010082245,
        "causal_effect": -0.0001928377151488858,
        "p_value": 0.9176258733779852,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 10,
        "baseline_robustness": 0.9712063658237458,
        "intervention_robustness": 0.972340795993805,
        "causal_effect": 0.0011344301700592085,
        "p_value": 0.5368027936948444,
        "significant": "False"
      },
      {
        "layer": 8,
        "head": 11,
        "baseline_robustness": 0.9741036033630371,
        "intervention_robustness": 0.9709045326709748,
        "causal_effect": -0.003199070692062378,
        "p_value": 0.10045579654979821,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 0,
        "baseline_robustness": 0.9707502436637878,
        "intervention_robustness": 0.9714244472980499,
        "causal_effect": 0.0006742036342620716,
        "p_value": 0.7209513277295586,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 1,
        "baseline_robustness": 0.9718517935276032,
        "intervention_robustness": 0.9708381974697113,
        "causal_effect": -0.0010135960578918635,
        "p_value": 0.5004558420388798,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 2,
        "baseline_robustness": 0.9700582098960876,
        "intervention_robustness": 0.9714330458641052,
        "causal_effect": 0.0013748359680175737,
        "p_value": 0.4728780592841264,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 3,
        "baseline_robustness": 0.9730745720863342,
        "intervention_robustness": 0.9732899868488312,
        "causal_effect": 0.00021541476249697933,
        "p_value": 0.9108047889088995,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 4,
        "baseline_robustness": 0.9717638182640076,
        "intervention_robustness": 0.9720593750476837,
        "causal_effect": 0.0002955567836760986,
        "p_value": 0.8854807356913674,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 5,
        "baseline_robustness": 0.9712672567367554,
        "intervention_robustness": 0.9714473533630371,
        "causal_effect": 0.00018009662628171164,
        "p_value": 0.9051092176164126,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 6,
        "baseline_robustness": 0.9724794733524322,
        "intervention_robustness": 0.9755042314529419,
        "causal_effect": 0.003024758100509617,
        "p_value": 0.0678248247184725,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 7,
        "baseline_robustness": 0.9713573241233826,
        "intervention_robustness": 0.9724653124809265,
        "causal_effect": 0.0011079883575438876,
        "p_value": 0.4974854360229539,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 8,
        "baseline_robustness": 0.9732624280452729,
        "intervention_robustness": 0.9751278376579284,
        "causal_effect": 0.0018654096126555775,
        "p_value": 0.2603316102836015,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 9,
        "baseline_robustness": 0.9731183326244355,
        "intervention_robustness": 0.9701331663131714,
        "causal_effect": -0.002985166311264109,
        "p_value": 0.13102297029620782,
        "significant": "False"
      },
      {
        "layer": 9,
        "head": 10,
        "baseline_robustness": 0.9741460978984833,
        "intervention_robustness": 0.9694563519954681,
        "causal_effect": -0.004689745903015186,
        "p_value": 0.014685669440166712,
        "significant": "True"
      },
      {
        "layer": 9,
        "head": 11,
        "baseline_robustness": 0.9733226203918457,
        "intervention_robustness": 0.971653380393982,
        "causal_effect": -0.0016692399978637251,
        "p_value": 0.28909223276433227,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 0,
        "baseline_robustness": 0.9728059256076813,
        "intervention_robustness": 0.9727400612831115,
        "causal_effect": -6.586432456978653e-05,
        "p_value": 0.9748089631601863,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 1,
        "baseline_robustness": 0.9712792825698853,
        "intervention_robustness": 0.9715534174442291,
        "causal_effect": 0.000274134874343801,
        "p_value": 0.8901676803746041,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 2,
        "baseline_robustness": 0.9728027546405792,
        "intervention_robustness": 0.9727404236793518,
        "causal_effect": -6.233096122743031e-05,
        "p_value": 0.9741514633382424,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 3,
        "baseline_robustness": 0.9725365602970123,
        "intervention_robustness": 0.9744164085388184,
        "causal_effect": 0.0018798482418060924,
        "p_value": 0.2430733998896337,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 4,
        "baseline_robustness": 0.9697229921817779,
        "intervention_robustness": 0.9725337660312653,
        "causal_effect": 0.002810773849487358,
        "p_value": 0.17505751068758113,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 5,
        "baseline_robustness": 0.9727158915996551,
        "intervention_robustness": 0.9716839516162872,
        "causal_effect": -0.0010319399833679244,
        "p_value": 0.5638585151408144,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 6,
        "baseline_robustness": 0.9732641839981079,
        "intervention_robustness": 0.9708747601509095,
        "causal_effect": -0.002389423847198424,
        "p_value": 0.24370953356382613,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 7,
        "baseline_robustness": 0.9726561868190765,
        "intervention_robustness": 0.9729047894477845,
        "causal_effect": 0.00024860262870796124,
        "p_value": 0.8876954447234235,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 8,
        "baseline_robustness": 0.9709027171134949,
        "intervention_robustness": 0.9711637759208679,
        "causal_effect": 0.000261058807373038,
        "p_value": 0.8764164806745496,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 9,
        "baseline_robustness": 0.9698873281478881,
        "intervention_robustness": 0.9722174561023712,
        "causal_effect": 0.0023301279544830633,
        "p_value": 0.21377439403659407,
        "significant": "False"
      },
      {
        "layer": 10,
        "head": 10,
        "baseline_robustness": 0.9746762263774872,
        "intervention_robustness": 0.970224769115448,
        "causal_effect": -0.004451457262039216,
        "p_value": 0.032740279583174804,
        "significant": "True"
      },
      {
        "layer": 10,
        "head": 11,
        "baseline_robustness": 0.9736588811874389,
        "intervention_robustness": 0.9722977006435394,
        "causal_effect": -0.001361180543899554,
        "p_value": 0.461254623982125,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 0,
        "baseline_robustness": 0.9731345117092133,
        "intervention_robustness": 0.9798419892787933,
        "causal_effect": 0.0067074775695800115,
        "p_value": 0.0001644859225200127,
        "significant": "True"
      },
      {
        "layer": 11,
        "head": 1,
        "baseline_robustness": 0.9725042712688446,
        "intervention_robustness": 0.9727593243122101,
        "causal_effect": 0.00025505304336548296,
        "p_value": 0.9037609060758132,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 2,
        "baseline_robustness": 0.9712782025337219,
        "intervention_robustness": 0.9734718310832977,
        "causal_effect": 0.0021936285495758323,
        "p_value": 0.1991099417345597,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 3,
        "baseline_robustness": 0.9734067976474762,
        "intervention_robustness": 0.9741102004051209,
        "causal_effect": 0.0007034027576446888,
        "p_value": 0.7140786501242438,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 4,
        "baseline_robustness": 0.9710494768619538,
        "intervention_robustness": 0.9710073339939117,
        "causal_effect": -4.214286804204992e-05,
        "p_value": 0.9793755376346258,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 5,
        "baseline_robustness": 0.972448605298996,
        "intervention_robustness": 0.9726602137088776,
        "causal_effect": 0.0002116084098815474,
        "p_value": 0.921820338749322,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 6,
        "baseline_robustness": 0.9745763206481933,
        "intervention_robustness": 0.9731255674362183,
        "causal_effect": -0.0014507532119750755,
        "p_value": 0.4721104488357287,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 7,
        "baseline_robustness": 0.9742423009872436,
        "intervention_robustness": 0.9741407525539398,
        "causal_effect": -0.0001015484333037886,
        "p_value": 0.948387877851393,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 8,
        "baseline_robustness": 0.9721759748458862,
        "intervention_robustness": 0.9702631187438965,
        "causal_effect": -0.0019128561019897683,
        "p_value": 0.3270734374351769,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 9,
        "baseline_robustness": 0.9729522562026978,
        "intervention_robustness": 0.9730942344665527,
        "causal_effect": 0.00014197826385498047,
        "p_value": 0.9390772329315364,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 10,
        "baseline_robustness": 0.9718314576148986,
        "intervention_robustness": 0.9720937013626099,
        "causal_effect": 0.0002622437477112216,
        "p_value": 0.8530862145610577,
        "significant": "False"
      },
      {
        "layer": 11,
        "head": 11,
        "baseline_robustness": 0.9731673073768615,
        "intervention_robustness": 0.9724464452266693,
        "causal_effect": -0.0007208621501921808,
        "p_value": 0.684810186128936,
        "significant": "False"
      }
    ],
    "noise_validation_summary": {
      "total": 1000,
      "valid": 796,
      "invalid": 204
    }
  },
  "distilbert-base-uncased": {
    "robustness": [
      {
        "noise_type": "baseline",
        "noise_level": 0.0,
        "mean_robustness": 0.999999989271164,
        "std_robustness": 1.2461951675374294e-07,
        "ci_95_lower": 0.9999999645439482,
        "ci_95_upper": 1.0000000139983798,
        "p_value_uncorrected": 1.0,
        "p_value_bonferroni": 1.0,
        "p_value_fdr": 1.0,
        "effect_size": 0.0,
        "significant_uncorrected": false,
        "significant_bonferroni": false,
        "significant_fdr": false,
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.05,
        "mean_robustness": 0.8677205938100815,
        "std_robustness": 0.06128545254601489,
        "ci_95_lower": 0.8555602304278132,
        "ci_95_upper": 0.8798809571923498,
        "p_value_uncorrected": 6.491485550134507e-54,
        "p_value_bonferroni": 1.947445665040352e-52,
        "p_value_fdr": 8.114356937668133e-54,
        "effect_size": -1061466.1243027928,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.1,
        "mean_robustness": 0.7494529336690903,
        "std_robustness": 0.09129605386231474,
        "ci_95_lower": 0.7313378159011448,
        "ci_95_upper": 0.7675680514370358,
        "p_value_uncorrected": 2.1228338283727826e-69,
        "p_value_bonferroni": 6.368501485118348e-68,
        "p_value_fdr": 3.184250742559174e-69,
        "effect_size": -2010496.1255561002,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "char_swap",
        "noise_level": 0.2,
        "mean_robustness": 0.654919117987156,
        "std_robustness": 0.09391088851385768,
        "ci_95_lower": 0.6362851602951121,
        "ci_95_upper": 0.6735530756791999,
        "p_value_uncorrected": 2.2825139883154806e-90,
        "p_value_bonferroni": 6.847541964946442e-89,
        "p_value_fdr": 2.2825139883154806e-90,
        "effect_size": -2769075.665458665,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.05,
        "mean_robustness": 0.973356522321701,
        "std_robustness": 0.017232644648746555,
        "ci_95_lower": 0.9699371917585642,
        "ci_95_upper": 0.9767758528848378,
        "p_value_uncorrected": 6.921145959375896e-36,
        "p_value_bonferroni": 2.076343787812769e-34,
        "p_value_fdr": 2.3070486531252988e-35,
        "effect_size": -213798.50960353465,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.1,
        "mean_robustness": 0.9736933600902558,
        "std_robustness": 0.01621267851332425,
        "ci_95_lower": 0.970476412936706,
        "ci_95_upper": 0.9769103072438056,
        "p_value_uncorrected": 3.2389216872969e-38,
        "p_value_bonferroni": 9.716765061890699e-37,
        "p_value_fdr": 1.6194608436484498e-37,
        "effect_size": -211095.58009996123,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "word_substitution",
        "noise_level": 0.2,
        "mean_robustness": 0.9746724450588227,
        "std_robustness": 0.017378900187144705,
        "ci_95_lower": 0.9712240942238317,
        "ci_95_upper": 0.9781207958938136,
        "p_value_uncorrected": 3.6064125190045576e-33,
        "p_value_bonferroni": 1.0819237557013673e-31,
        "p_value_fdr": 3.606412519004558e-32,
        "effect_size": -203238.98593179692,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.05,
        "mean_robustness": 0.9968032377958298,
        "std_robustness": 0.010963430155859873,
        "ci_95_lower": 0.9946278553996359,
        "ci_95_upper": 0.9989786201920237,
        "p_value_uncorrected": 0.003956491521468694,
        "p_value_bonferroni": 0.11869474564406082,
        "p_value_fdr": 0.007418421602753801,
        "effect_size": -25652.093336641385,
        "significant_uncorrected": "True",
        "significant_bonferroni": "False",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.1,
        "mean_robustness": 0.990471476316452,
        "std_robustness": 0.02221009113332824,
        "ci_95_lower": 0.9860645123843218,
        "ci_95_upper": 0.9948784402485823,
        "p_value_uncorrected": 2.7902339229007455e-05,
        "p_value_bonferroni": 0.0008370701768702236,
        "p_value_fdr": 5.979072691930169e-05,
        "effect_size": -76460.84018718297,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      },
      {
        "noise_type": "grammar",
        "noise_level": 0.2,
        "mean_robustness": 0.9900561076402664,
        "std_robustness": 0.019065823659256544,
        "ci_95_lower": 0.9862730345903492,
        "ci_95_upper": 0.9938391806901836,
        "p_value_uncorrected": 4.5989785038811634e-07,
        "p_value_bonferroni": 1.379693551164349e-05,
        "p_value_fdr": 7.664964173135272e-07,
        "effect_size": -79793.9350908202,
        "significant_uncorrected": "True",
        "significant_bonferroni": "True",
        "significant_fdr": "True",
        "n_samples": 100,
        "baseline_mean": 0.999999989271164
      }
    ],
    "causal_circuits": [],
    "noise_validation_summary": {
      "total": 1000,
      "valid": 790,
      "invalid": 210
    }
  }
}