{"cpp-python": {"top_5_attn": {"attn-17": {"cpp_top": {"KL": {"mean": 0.02064746856689453, "std": 0.010504398860191565}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8763720703125, "std": 0.057849394876486354}, "a_after": {"mean": 0.9141748046875, "std": 0.03120988949655762}, "delta_bias": {"mean": 0.0020751953125, "std": 0.012548467325586902}}, "python_top": {"KL": {"mean": 0.005699214935302735, "std": 0.0032232336927797236}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9137451171875, "std": 0.031073050004502528}, "a_after": {"mean": 0.903564453125, "std": 0.03543426158214077}, "delta_bias": {"mean": -5.859375e-05, "std": 0.001340864241179307}}}, "attn-16": {"cpp_top": {"KL": {"mean": 0.09955108642578125, "std": 0.08646240481221666}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8866943359375, "std": 0.04542966183211651}, "a_after": {"mean": 0.9149365234375, "std": 0.025905395467689826}, "delta_bias": {"mean": 0.0116357421875, "std": 0.0170803368838181}}, "python_top": {"KL": {"mean": 0.012163848876953124, "std": 0.007586685685070466}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.92369140625, "std": 0.02356764293478286}, "a_after": {"mean": 0.9149462890625, "std": 0.027689516653677627}, "delta_bias": {"mean": -0.001494140625, "std": 0.002931183198796702}}}, "attn-2": {"cpp_top": {"KL": {"mean": 0.0013023632764816283, "std": 0.0006752464895275104}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8796484375, "std": 0.04925072997856398}, "a_after": {"mean": 0.91783203125, "std": 0.03310601433553068}, "delta_bias": {"mean": 0.0016943359375, "std": 0.00341582259799292}}, "python_top": {"KL": {"mean": 0.0004968959093093872, "std": 0.0003894849550209527}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.914677734375, "std": 0.032078904375786685}, "a_after": {"mean": 0.90400390625, "std": 0.03724068787349947}, "delta_bias": {"mean": 0.0004345703125, "std": 0.00049511377293403}}}, "attn-19": {"cpp_top": {"KL": {"mean": 0.003169703483581543, "std": 0.0015638444837363862}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8858154296875, "std": 0.049835691059319676}, "a_after": {"mean": 0.9235595703125, "std": 0.02978555041382571}, "delta_bias": {"mean": 0.0021337890625, "std": 0.0038338504236436113}}, "python_top": {"KL": {"mean": 0.0022272825241088866, "std": 0.000988458850776896}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.91330078125, "std": 0.030929098413978482}, "a_after": {"mean": 0.9026025390625, "std": 0.0357150622623345}, "delta_bias": {"mean": 0.000458984375, "std": 0.0011650735786571146}}}, "attn-21": {"cpp_top": {"KL": {"mean": 0.005195112228393555, "std": 0.003235936689521311}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.873291015625, "std": 0.052566621133289776}, "a_after": {"mean": 0.911708984375, "std": 0.03553341070560709}, "delta_bias": {"mean": 0.0014599609375, "std": 0.0047368569015852294}}, "python_top": {"KL": {"mean": 0.002684895992279053, "std": 0.0018210877862053006}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.91953125, "std": 0.03030094948762566}, "a_after": {"mean": 0.9098779296875, "std": 0.03475433017199582}, "delta_bias": {"mean": -0.0005859375, "std": 0.0011382655401821083}}}}, "bottom_5_attn": {"attn-5": {"cpp_top": {"KL": {"mean": 0.0006829828023910522, "std": 0.0005329628039616875}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8801904296875, "std": 0.05225416684498507}, "a_after": {"mean": 0.919736328125, "std": 0.033684920942022335}, "delta_bias": {"mean": 0.00033203125, "std": 0.001851700927543937}}, "python_top": {"KL": {"mean": 0.0006591886281967163, "std": 0.0004639998535459046}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9129736328125, "std": 0.03284556790031699}, "a_after": {"mean": 0.90228515625, "std": 0.038208996511329855}, "delta_bias": {"mean": 0.00044921875, "std": 0.0006788565794952495}}}, "attn-6": {"cpp_top": {"KL": {"mean": 0.0006429922580718995, "std": 0.0005066083895835466}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.878310546875, "std": 0.051272447232492024}, "a_after": {"mean": 0.9175537109375, "std": 0.034870903464807014}, "delta_bias": {"mean": 0.000634765625, "std": 0.0019401349508819635}}, "python_top": {"KL": {"mean": 0.0009192991256713868, "std": 0.0006309584677336719}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.917802734375, "std": 0.030344172349751317}, "a_after": {"mean": 0.9083642578125, "std": 0.03469876735992966}, "delta_bias": {"mean": -0.00080078125, "std": 0.0008138612667873466}}}, "attn-8": {"cpp_top": {"KL": {"mean": 0.0033244991302490232, "std": 0.0016188582995464473}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8823876953125, "std": 0.04779005524550395}, "a_after": {"mean": 0.9184375, "std": 0.03302603301890272}, "delta_bias": {"mean": 0.003828125, "std": 0.0052063539385023075}}, "python_top": {"KL": {"mean": 0.002709224224090576, "std": 0.0016522563143010623}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.92095703125, "std": 0.028805900177428238}, "a_after": {"mean": 0.9119482421875, "std": 0.032810956837017013}, "delta_bias": {"mean": -0.00123046875, "std": 0.0012711616012548886}}}, "attn-14": {"cpp_top": {"KL": {"mean": 0.0025878238677978518, "std": 0.0026875396959948374}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.87265625, "std": 0.05596144589513361}, "a_after": {"mean": 0.917861328125, "std": 0.03486891475613157}, "delta_bias": {"mean": -0.0053271484375, "std": 0.0070311729349864455}}, "python_top": {"KL": {"mean": 0.0016994142532348632, "std": 0.0010541610227588103}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9143701171875, "std": 0.029865188221726048}, "a_after": {"mean": 0.904443359375, "std": 0.03461610782114481}, "delta_bias": {"mean": -0.0003125, "std": 0.0009427340018793213}}}, "attn-9": {"cpp_top": {"KL": {"mean": 0.0012804675102233887, "std": 0.0008461024273452506}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8768359375, "std": 0.052451990441210815}, "a_after": {"mean": 0.917060546875, "std": 0.035979258150207735}, "delta_bias": {"mean": -0.0003466796875, "std": 0.003245795309540233}}, "python_top": {"KL": {"mean": 0.0013355451822280884, "std": 0.0011710786573528825}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9186572265625, "std": 0.029592389061084162}, "a_after": {"mean": 0.9088623046875, "std": 0.03390111091518717}, "delta_bias": {"mean": -0.0004443359375, "std": 0.0012628267109830865}}}}, "top_5_mlp": {"mlp-19": {"cpp_top": {"KL": {"mean": 0.235762939453125, "std": 0.10069688658923436}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8857568359375, "std": 0.0471858984481291}, "a_after": {"mean": 0.9105224609375, "std": 0.018973777366956036}, "delta_bias": {"mean": 0.0151123046875, "std": 0.022083936588167874}}, "python_top": {"KL": {"mean": 0.08142410278320313, "std": 0.02134842131606206}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.896689453125, "std": 0.028761464188337037}, "a_after": {"mean": 0.8857177734375, "std": 0.03302548283060148}, "delta_bias": {"mean": 0.000732421875, "std": 0.006116571815252325}}}, "mlp-17": {"cpp_top": {"KL": {"mean": 0.04555496215820312, "std": 0.013908499543279511}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.88560546875, "std": 0.05661306977870904}, "a_after": {"mean": 0.9204248046875, "std": 0.028231945137844536}, "delta_bias": {"mean": 0.00505859375, "std": 0.015326611227735511}}, "python_top": {"KL": {"mean": 0.03859291076660156, "std": 0.01651210541824719}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.901875, "std": 0.034701395281316716}, "a_after": {"mean": 0.889365234375, "std": 0.04089844743963814}, "delta_bias": {"mean": 0.0022705078125, "std": 0.0029732392280886273}}}, "mlp-20": {"cpp_top": {"KL": {"mean": 0.16183807373046874, "std": 0.056812477575729534}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8929052734375, "std": 0.04568243987125618}, "a_after": {"mean": 0.9247216796875, "std": 0.021988384675613246}, "delta_bias": {"mean": 0.0080615234375, "std": 0.015209886717808907}}, "python_top": {"KL": {"mean": 0.08968902587890625, "std": 0.026955410184123795}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.8602880859375, "std": 0.046830160613359026}, "a_after": {"mean": 0.8433935546875, "std": 0.05362432463364516}, "delta_bias": {"mean": 0.0066552734375, "std": 0.006485457230805643}}}, "mlp-18": {"cpp_top": {"KL": {"mean": 0.10423309326171876, "std": 0.03743568929577691}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.850185546875, "std": 0.061934149518576426}, "a_after": {"mean": 0.88630859375, "std": 0.026911924488966416}, "delta_bias": {"mean": 0.0037548828125, "std": 0.017252701180535915}}, "python_top": {"KL": {"mean": 0.036783905029296876, "std": 0.01679209413421613}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.91505859375, "std": 0.028015939092366583}, "a_after": {"mean": 0.9014501953125, "std": 0.03327919978716858}, "delta_bias": {"mean": 0.003369140625, "std": 0.005380501402961582}}}, "mlp-21": {"cpp_top": {"KL": {"mean": 0.05429931640625, "std": 0.03268152361477039}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.855263671875, "std": 0.054357423987784245}, "a_after": {"mean": 0.893037109375, "std": 0.035310848951423515}, "delta_bias": {"mean": 0.0021044921875, "std": 0.011036045525911517}}, "python_top": {"KL": {"mean": 0.01942108154296875, "std": 0.0064105914383282845}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.921455078125, "std": 0.02857335866461219}, "a_after": {"mean": 0.9119775390625, "std": 0.03283808831106457}, "delta_bias": {"mean": -0.00076171875, "std": 0.001979238700468434}}}}, "bottom_5_mlp": {"mlp-11": {"cpp_top": {"KL": {"mean": 0.0041295957565307614, "std": 0.0034354454787068264}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8787451171875, "std": 0.049776733556192644}, "a_after": {"mean": 0.9170849609375, "std": 0.03354643235185098}, "delta_bias": {"mean": 0.0015380859375, "std": 0.0032929972486002005}}, "python_top": {"KL": {"mean": 0.002427253723144531, "std": 0.0019979486020123534}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9185546875, "std": 0.030328833547000028}, "a_after": {"mean": 0.908955078125, "std": 0.034734379191093234}, "delta_bias": {"mean": -0.0006396484375, "std": 0.001314407745653931}}}, "mlp-6": {"cpp_top": {"KL": {"mean": 0.005461063385009766, "std": 0.002014192248999655}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8658984375, "std": 0.051749693384771374}, "a_after": {"mean": 0.9075927734375, "std": 0.03384515780289633}, "delta_bias": {"mean": -0.00181640625, "std": 0.004706041035325546}}, "python_top": {"KL": {"mean": 0.0034368419647216797, "std": 0.0016422777277822498}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.923212890625, "std": 0.028675127000122584}, "a_after": {"mean": 0.9135205078125, "std": 0.033026031195894985}, "delta_bias": {"mean": -0.000546875, "std": 0.001157733087925028}}}, "mlp-8": {"cpp_top": {"KL": {"mean": 0.0034978890419006346, "std": 0.0028561247966170144}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8789990234375, "std": 0.05074783779142557}, "a_after": {"mean": 0.9152783203125, "std": 0.03574681040272106}, "delta_bias": {"mean": 0.0035986328125, "std": 0.0047037760416666664}}, "python_top": {"KL": {"mean": 0.0023284292221069335, "std": 0.001291846883346398}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9096337890625, "std": 0.032504793345676915}, "a_after": {"mean": 0.89908203125, "std": 0.037639292432187284}, "delta_bias": {"mean": 0.0003125, "std": 0.0007713226589793992}}}, "mlp-10": {"cpp_top": {"KL": {"mean": 0.002276632785797119, "std": 0.001404317921670122}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8795458984375, "std": 0.049402092096657815}, "a_after": {"mean": 0.9164794921875, "std": 0.03213873865581297}, "delta_bias": {"mean": 0.0029443359375, "std": 0.003949447831459636}}, "python_top": {"KL": {"mean": 0.001364203691482544, "std": 0.001136315496070551}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9144482421875, "std": 0.03288172748940823}, "a_after": {"mean": 0.9031396484375, "std": 0.038476961512565834}, "delta_bias": {"mean": 0.0010693359375, "std": 0.0011481559480112356}}}, "mlp-9": {"cpp_top": {"KL": {"mean": 0.003332710266113281, "std": 0.0016574035189308985}, "b_before": {"mean": 0.87740234375, "std": 0.05230138643910213}, "a_before": {"mean": 0.9172802734375, "std": 0.034346837118422485}, "b_after": {"mean": 0.8684912109375, "std": 0.055500197983094524}, "a_after": {"mean": 0.9129736328125, "std": 0.03608651776104096}, "delta_bias": {"mean": -0.0046044921875, "std": 0.004975280053516709}}, "python_top": {"KL": {"mean": 0.002308807373046875, "std": 0.001099433000015662}, "b_before": {"mean": 0.9154931640625, "std": 0.03178542437401789}, "a_before": {"mean": 0.90525390625, "std": 0.03672225479592982}, "b_after": {"mean": 0.9182666015625, "std": 0.03082580066680368}, "a_after": {"mean": 0.9085302734375, "std": 0.035412748055410004}, "delta_bias": {"mean": -0.0005029296875, "std": 0.0011048870408572422}}}}}}