{"cpp-python": {"top_5_attn": {"attn-1": {"cpp_top": {"KL": {"mean": 0.00012088639894500375, "std": 0.0007101763366447383}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9998046875, "std": 0.001953125}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": -7.8125e-05, "std": 0.0007812499999999999}}, "python_top": {"KL": {"mean": 8.437143166247552e-05, "std": 0.0004949341032398501}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-24": {"cpp_top": {"KL": {"mean": 0.010242885823827238, "std": 0.05159210704448807}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 0.00266480026220961, "std": 0.017941497419160225}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-25": {"cpp_top": {"KL": {"mean": 0.0012016210504225454, "std": 0.009473707988789824}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "a_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "delta_bias": {"mean": 0.0001171875, "std": 0.0011718749999999997}}, "python_top": {"KL": {"mean": 0.0003618762397672981, "std": 0.0019203787898426534}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-31": {"cpp_top": {"KL": {"mean": 0.0002266751705610659, "std": 0.0012003071936928455}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 0.0010366836651519407, "std": 0.007039838999078779}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-7": {"cpp_top": {"KL": {"mean": 6.223134980245959e-05, "std": 0.00037487988923856554}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 0.00015556650015810191, "std": 0.0011191876813501241}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}, "bottom_5_attn": {"attn-13": {"cpp_top": {"KL": {"mean": 3.097540757153183e-05, "std": 0.00017181341285901246}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}, "python_top": {"KL": {"mean": 5.920487158618925e-05, "std": 0.0003561842999666949}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-12": {"cpp_top": {"KL": {"mean": 6.230265058547957e-05, "std": 0.0003565746924914399}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.99984375, "std": 0.0015625000000000003}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": -3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 2.213442608308469e-05, "std": 0.00012417181177881237}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-16": {"cpp_top": {"KL": {"mean": 0.00021898833329032642, "std": 0.0011027234918774198}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999765625, "std": 0.001987931648222606}, "a_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "delta_bias": {"mean": -7.8125e-05, "std": 0.0007812499999999999}}, "python_top": {"KL": {"mean": 0.00011294419164187275, "std": 0.0006323991461495959}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-15": {"cpp_top": {"KL": {"mean": 0.0008030621614307166, "std": 0.004767636878305668}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}, "python_top": {"KL": {"mean": 0.00016288908089336473, "std": 0.0011140869201136355}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "attn-11": {"cpp_top": {"KL": {"mean": 9.291151858633385e-05, "std": 0.000678977348830166}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}, "python_top": {"KL": {"mean": 0.00028157306251387127, "std": 0.0017710192621670756}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}, "top_5_mlp": {"mlp-32": {"cpp_top": {"KL": {"mean": 0.00854393576271832, "std": 0.04519850107019277}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "delta_bias": {"mean": 7.8125e-05, "std": 0.0007812499999999999}}, "python_top": {"KL": {"mean": 0.00847351081429224, "std": 0.06950949046715423}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-31": {"cpp_top": {"KL": {"mean": 0.24297992425155826, "std": 0.8592923464679407}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.9826104736328125, "std": 0.11450005199476491}, "a_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "delta_bias": {"mean": -0.0172332763671875, "std": 0.11394644261287558}}, "python_top": {"KL": {"mean": 0.006287749037728645, "std": 0.049799210220348736}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-24": {"cpp_top": {"KL": {"mean": 0.028874538555683102, "std": 0.11627726320030506}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.993125, "std": 0.06875}, "a_after": {"mean": 0.9997265625, "std": 0.0027343750000000003}, "delta_bias": {"mean": -0.006484375, "std": 0.06484375000000002}}, "python_top": {"KL": {"mean": 0.11971517531201244, "std": 0.4188329299530755}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-25": {"cpp_top": {"KL": {"mean": 0.055416407734155654, "std": 0.188861637797313}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0001171875, "std": 0.0011718749999999997}}, "python_top": {"KL": {"mean": 0.005849193841277156, "std": 0.026926268105034776}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-29": {"cpp_top": {"KL": {"mean": 0.001482484706557443, "std": 0.009905789239165566}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 0.0018381448451873438, "std": 0.0122600042699259}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}, "bottom_5_mlp": {"mlp-16": {"cpp_top": {"KL": {"mean": 0.0018382657402253243, "std": 0.009100452152004185}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.99953125, "std": 0.004687499999999998}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": -0.0003515625, "std": 0.003515625}}, "python_top": {"KL": {"mean": 0.00010673164524632739, "std": 0.0005597048620073809}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-14": {"cpp_top": {"KL": {"mean": 0.0033928895244025627, "std": 0.02233874786592484}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.99953125, "std": 0.004687499999999998}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": -0.0003515625, "std": 0.003515625}}, "python_top": {"KL": {"mean": 0.0016458836314996006, "std": 0.01019269886329106}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-15": {"cpp_top": {"KL": {"mean": 0.0014085328431974631, "std": 0.0067000171921488775}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999375, "std": 0.0062499999999999995}, "a_after": {"mean": 0.9999609375, "std": 0.00039062499999999986}, "delta_bias": {"mean": -0.00046875, "std": 0.004687499999999999}}, "python_top": {"KL": {"mean": 0.0005826187639104319, "std": 0.0040714416428356305}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-3": {"cpp_top": {"KL": {"mean": 0.0001077313968926319, "std": 0.0006308854666886128}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 7.345061630701367e-05, "std": 0.00040165999357834233}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}, "mlp-2": {"cpp_top": {"KL": {"mean": 0.00010536870542637188, "std": 0.0005368504715327344}, "b_before": {"mean": 0.9998828125, "std": 0.0011718749999999995}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 0.999921875, "std": 0.0007812499999999999}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 3.90625e-05, "std": 0.00039062499999999997}}, "python_top": {"KL": {"mean": 3.330810288161956e-05, "std": 0.0003172956297143371}, "b_before": {"mean": 1.0, "std": 0.0}, "a_before": {"mean": 1.0, "std": 0.0}, "b_after": {"mean": 1.0, "std": 0.0}, "a_after": {"mean": 1.0, "std": 0.0}, "delta_bias": {"mean": 0.0, "std": 0.0}}}}}}