{"cpp-python": {"top_5_attn": {"attn-18": {"cpp_top": {"KL": {"mean": 0.0007500755786895752, "std": 0.0025375014770702806}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.999921875, "std": 0.00025706591951330486}, "a_after": {"mean": 0.999931640625, "std": 0.00024061332913534017}, "delta_bias": {"mean": 4.8828125e-06, "std": 0.00012974512185687665}}, "python_top": {"KL": {"mean": 0.004347193241119385, "std": 0.014283988086600925}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999658203125, "std": 0.00014315853005962055}, "a_after": {"mean": 0.9999658203125, "std": 0.00014315853005962055}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-1": {"cpp_top": {"KL": {"mean": 0.000131414532661438, "std": 0.0004951953713185628}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999560546875, "std": 0.00015665329798038254}, "a_after": {"mean": 0.9999609375, "std": 0.00015013791304410303}, "delta_bias": {"mean": 9.765625e-06, "std": 9.765624999999999e-05}}, "python_top": {"KL": {"mean": 0.00032912254333496095, "std": 0.0013260368099331991}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.999951171875, "std": 0.00020233774756121776}, "a_after": {"mean": 0.999951171875, "std": 0.00020233774756121776}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-25": {"cpp_top": {"KL": {"mean": 0.0003616964817047119, "std": 0.0006639145543051759}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999658203125, "std": 0.00014315853005962055}, "a_after": {"mean": 0.999970703125, "std": 0.00013564354446455776}, "delta_bias": {"mean": 9.765625e-06, "std": 6.87037569024852e-05}}, "python_top": {"KL": {"mean": 0.0021624863147735596, "std": 0.006092493312003395}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999853515625, "std": 0.00010874096843438168}, "a_after": {"mean": 0.9999853515625, "std": 0.00010874096843438168}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-24": {"cpp_top": {"KL": {"mean": 0.0014330554008483887, "std": 0.005054152756454634}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999755859375, "std": 0.00010695454763554216}, "a_after": {"mean": 0.99998046875, "std": 9.61652273276037e-05}, "delta_bias": {"mean": 9.765625e-06, "std": 9.765624999999999e-05}}, "python_top": {"KL": {"mean": 0.0036676543951034545, "std": 0.016220816631627284}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.999931640625, "std": 0.0001965416656879774}, "a_after": {"mean": 0.999931640625, "std": 0.0001965416656879774}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-15": {"cpp_top": {"KL": {"mean": 0.0070511788129806515, "std": 0.03233606118640656}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999658203125, "std": 0.00012521113280089983}, "a_after": {"mean": 0.999970703125, "std": 0.00011654455887497169}, "delta_bias": {"mean": 9.765625e-06, "std": 9.765624999999999e-05}}, "python_top": {"KL": {"mean": 0.01883454918861389, "std": 0.05668917114709268}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.999775390625, "std": 0.0009849603407145588}, "a_after": {"mean": 0.999775390625, "std": 0.0009849603407145588}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}, "bottom_5_attn": {"attn-6": {"cpp_top": {"KL": {"mean": 0.000766180157661438, "std": 0.0035653753597120054}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999560546875, "std": 0.00014044115784407287}, "a_after": {"mean": 0.999970703125, "std": 0.00011654455887497169}, "delta_bias": {"mean": 0.0, "std": 0.00012020653415950463}}, "python_top": {"KL": {"mean": 0.004958021640777588, "std": 0.022442129184941213}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.99998046875, "std": 9.61652273276037e-05}, "a_after": {"mean": 0.9999755859375, "std": 0.00010695454763554213}, "delta_bias": {"mean": 4.8828125e-06, "std": 4.8828124999999996e-05}}}, "attn-8": {"cpp_top": {"KL": {"mean": 0.002008764743804932, "std": 0.0099928025956776}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999755859375, "std": 0.00010695454763554215}, "a_after": {"mean": 0.9999755859375, "std": 0.00010695454763554215}, "delta_bias": {"mean": 1.46484375e-05, "std": 8.371416406140883e-05}}, "python_top": {"KL": {"mean": 0.0021437829732894897, "std": 0.006483476810280068}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999560546875, "std": 0.00017133824068104946}, "a_after": {"mean": 0.9999560546875, "std": 0.00017133824068104946}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-23": {"cpp_top": {"KL": {"mean": 0.00013516247272491456, "std": 0.0006478880050037029}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.999970703125, "std": 0.00011654455887497169}, "a_after": {"mean": 0.9999755859375, "std": 0.00010695454763554215}, "delta_bias": {"mean": 9.765625e-06, "std": 9.765624999999999e-05}}, "python_top": {"KL": {"mean": 0.0014751946926116943, "std": 0.005733618396022131}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.99994140625, "std": 0.00021141774502239286}, "a_after": {"mean": 0.9999365234375, "std": 0.0002156470826771917}, "delta_bias": {"mean": 4.8828125e-06, "std": 4.8828124999999996e-05}}}, "attn-9": {"cpp_top": {"KL": {"mean": 0.0005987000465393067, "std": 0.0027682491177466718}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999755859375, "std": 0.00010695454763554216}, "a_after": {"mean": 0.99998046875, "std": 9.616522732760368e-05}, "delta_bias": {"mean": 9.765625e-06, "std": 9.765624999999999e-05}}, "python_top": {"KL": {"mean": 0.0017534738779067993, "std": 0.005843440568038087}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_after": {"mean": 0.9999560546875, "std": 0.00017133824068104946}, "delta_bias": {"mean": 4.8828125e-06, "std": 4.8828124999999996e-05}}}, "attn-13": {"cpp_top": {"KL": {"mean": 0.0072298991680145265, "std": 0.02908589982010125}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999853515625, "std": 8.371416406140883e-05}, "a_after": {"mean": 0.9999853515625, "std": 8.371416406140883e-05}, "delta_bias": {"mean": 1.46484375e-05, "std": 8.371416406140883e-05}}, "python_top": {"KL": {"mean": 0.013931733965873718, "std": 0.04206501254781966}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999560546875, "std": 0.00017133824068104946}, "a_after": {"mean": 0.9999560546875, "std": 0.00017133824068104946}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}, "top_5_mlp": {"mlp-24": {"cpp_top": {"KL": {"mean": 0.1803986769914627, "std": 0.35629207635823124}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.998515625, "std": 0.0029205652908697514}, "a_after": {"mean": 0.998671875, "std": 0.002829393881457081}, "delta_bias": {"mean": -0.0001416015625, "std": 0.0006371891260298081}}, "python_top": {"KL": {"mean": 0.04710500776767731, "std": 0.16699687594134133}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9998046875, "std": 0.0005099931326827085}, "a_after": {"mean": 0.9998046875, "std": 0.0005099931326827085}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-22": {"cpp_top": {"KL": {"mean": 0.05602943778038025, "std": 0.11203397467422176}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9996728515625, "std": 0.0006976640607330675}, "a_after": {"mean": 0.99970703125, "std": 0.0006940127485534194}, "delta_bias": {"mean": -1.953125e-05, "std": 0.0002072764087101972}}, "python_top": {"KL": {"mean": 0.14265705168247222, "std": 0.4410057708749539}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9999365234375, "std": 0.0002156470826771917}, "a_after": {"mean": 0.9999365234375, "std": 0.0002156470826771917}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-21": {"cpp_top": {"KL": {"mean": 0.08458100199699402, "std": 0.3341496805345351}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9989404296875, "std": 0.0020471082182869516}, "a_after": {"mean": 0.9990234375, "std": 0.0020458667138633685}, "delta_bias": {"mean": -6.8359375e-05, "std": 0.0002504222656017996}}, "python_top": {"KL": {"mean": 0.218005251288414, "std": 0.7047014379040036}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9998583984375, "std": 0.0005160720983891454}, "a_after": {"mean": 0.9998583984375, "std": 0.0005160720983891454}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-23": {"cpp_top": {"KL": {"mean": 0.033159432411193845, "std": 0.10691899768357131}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9991650390625, "std": 0.002220076165322487}, "a_after": {"mean": 0.999580078125, "std": 0.0007697599468530932}, "delta_bias": {"mean": -0.000400390625, "std": 0.0019375016307855019}}, "python_top": {"KL": {"mean": 0.06909270286560058, "std": 0.22839094369500829}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9998388671875, "std": 0.0009156103642387758}, "a_after": {"mean": 0.9998388671875, "std": 0.0009156103642387758}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-20": {"cpp_top": {"KL": {"mean": 0.05518685162067413, "std": 0.2714827588695148}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9997998046875, "std": 0.00076055374905131}, "a_after": {"mean": 0.9998193359375, "std": 0.0007591592250228684}, "delta_bias": {"mean": -4.8828125e-06, "std": 0.00012974512185687668}}, "python_top": {"KL": {"mean": 0.40677814662456513, "std": 1.1479390834628376}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9956884765625, "std": 0.019542474161032504}, "a_after": {"mean": 0.9956787109375, "std": 0.01954263682729122}, "delta_bias": {"mean": 9.765625e-06, "std": 6.87037569024852e-05}}}}, "bottom_5_mlp": {"mlp-12": {"cpp_top": {"KL": {"mean": 0.012221401929855347, "std": 0.048761260607760655}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999951171875, "std": 4.882812499999999e-05}, "a_after": {"mean": 0.9999951171875, "std": 4.882812499999999e-05}, "delta_bias": {"mean": 1.46484375e-05, "std": 8.371416406140883e-05}}, "python_top": {"KL": {"mean": 0.030880528092384337, "std": 0.08950622916190153}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9998095703125, "std": 0.0005590788544298775}, "a_after": {"mean": 0.9998095703125, "std": 0.0005590788544298775}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-10": {"cpp_top": {"KL": {"mean": 0.003950023055076599, "std": 0.01801312935544099}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999853515625, "std": 8.371416406140883e-05}, "a_after": {"mean": 0.9999951171875, "std": 4.882812499999999e-05}, "delta_bias": {"mean": 4.8828125e-06, "std": 0.00010962326255913504}}, "python_top": {"KL": {"mean": 0.0177927428483963, "std": 0.06364944374364975}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.999970703125, "std": 0.00013564354446455776}, "a_after": {"mean": 0.999970703125, "std": 0.00013564354446455776}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-11": {"cpp_top": {"KL": {"mean": 0.27338778853416446, "std": 0.5365025773726165}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9998095703125, "std": 0.0003980452022335268}, "a_after": {"mean": 0.9998095703125, "std": 0.0003980452022335268}, "delta_bias": {"mean": 1.46484375e-05, "std": 8.371416406140883e-05}}, "python_top": {"KL": {"mean": 0.1843091493844986, "std": 0.5305184534532794}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.99978515625, "std": 0.0006811232187886271}, "a_after": {"mean": 0.99978515625, "std": 0.0006811232187886271}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-9": {"cpp_top": {"KL": {"mean": 0.004531899094581604, "std": 0.014649786072842166}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999560546875, "std": 0.0001404411578440729}, "a_after": {"mean": 0.99998046875, "std": 9.61652273276037e-05}, "delta_bias": {"mean": -9.765625e-06, "std": 0.00013845510849064374}}, "python_top": {"KL": {"mean": 0.013533403277397155, "std": 0.054255408598862756}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9998876953125, "std": 0.0003917024184141843}, "a_after": {"mean": 0.9998876953125, "std": 0.0003917024184141843}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-13": {"cpp_top": {"KL": {"mean": 0.004440155029296875, "std": 0.01966155173678829}, "b_before": {"mean": 0.9999609375, "std": 0.00015013791304410297}, "a_before": {"mean": 0.9999755859375, "std": 0.00012749828317067712}, "b_after": {"mean": 0.9999755859375, "std": 0.00010695454763554215}, "a_after": {"mean": 0.9999755859375, "std": 0.00010695454763554215}, "delta_bias": {"mean": 1.46484375e-05, "std": 8.371416406140883e-05}}, "python_top": {"KL": {"mean": 0.05218028962612152, "std": 0.17627636490682325}, "b_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "a_before": {"mean": 0.9999609375, "std": 0.00016540232732578268}, "b_after": {"mean": 0.9997802734375, "std": 0.0008924879821947399}, "a_after": {"mean": 0.99978515625, "std": 0.0008923395591531867}, "delta_bias": {"mean": -4.8828125e-06, "std": 4.8828124999999996e-05}}}}}}