{"cpp-python": {"top_5_attn": {"attn-13": {"cpp_top": {"KL": {"mean": 0.03919082641601562, "std": 0.016065284653695996}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.89435546875, "std": 0.0357537505401534}, "a_after": {"mean": 0.8980517578125, "std": 0.03655921190783193}, "delta_bias": {"mean": 0.00662109375, "std": 0.007173997394695732}}, "python_top": {"KL": {"mean": 0.011238174438476562, "std": 0.004617933300280371}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.930537109375, "std": 0.024511093874082603}, "a_after": {"mean": 0.910673828125, "std": 0.03422734237619395}, "delta_bias": {"mean": -0.0047900390625, "std": 0.0031220508878236327}}}, "attn-11": {"cpp_top": {"KL": {"mean": 0.09029983520507813, "std": 0.03682472998506627}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.881572265625, "std": 0.03735751559702917}, "a_after": {"mean": 0.892890625, "std": 0.038147824702447784}, "delta_bias": {"mean": -0.0010009765625, "std": 0.005517019412525258}}, "python_top": {"KL": {"mean": 0.05800872802734375, "std": 0.028297067423026245}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.914921875, "std": 0.02961825905597068}, "a_after": {"mean": 0.8948779296875, "std": 0.039668174182191866}, "delta_bias": {"mean": -0.004609375, "std": 0.004983050198126427}}}, "attn-2": {"cpp_top": {"KL": {"mean": 0.007548942565917969, "std": 0.0025133977216148583}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.90841796875, "std": 0.03295985716846146}, "a_after": {"mean": 0.9187646484375, "std": 0.03299909511559733}, "delta_bias": {"mean": -2.9296875e-05, "std": 0.002676964700411688}}, "python_top": {"KL": {"mean": 0.01486083984375, "std": 0.007057098377247171}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.921640625, "std": 0.025371503884558153}, "a_after": {"mean": 0.8966845703125, "std": 0.033260961558920235}, "delta_bias": {"mean": 0.000302734375, "std": 0.003318788991493433}}}, "attn-12": {"cpp_top": {"KL": {"mean": 0.00487813949584961, "std": 0.0029515671422643956}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.8975634765625, "std": 0.04012019780162973}, "a_after": {"mean": 0.9121435546875, "std": 0.03531797090302304}, "delta_bias": {"mean": -0.0042626953125, "std": 0.008333603536675723}}, "python_top": {"KL": {"mean": 0.0034163284301757814, "std": 0.0014156971981980503}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.924658203125, "std": 0.025136917567476}, "a_after": {"mean": 0.89896484375, "std": 0.037021119842849164}, "delta_bias": {"mean": 0.0010400390625, "std": 0.002431675486419001}}}, "attn-1": {"cpp_top": {"KL": {"mean": 0.0038873815536499025, "std": 0.002150547616533394}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9131396484375, "std": 0.034989869095104126}, "a_after": {"mean": 0.9233447265625, "std": 0.03433964244642358}, "delta_bias": {"mean": 0.0001123046875, "std": 0.0011484915005631632}}, "python_top": {"KL": {"mean": 0.004070701599121093, "std": 0.0011504262770699305}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9194580078125, "std": 0.025708781696211356}, "a_after": {"mean": 0.8938720703125, "std": 0.03686405848327818}, "delta_bias": {"mean": 0.0009326171875, "std": 0.001553558040027891}}}}, "bottom_5_attn": {"attn-15": {"cpp_top": {"KL": {"mean": 0.028169517517089845, "std": 0.010120314373488487}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.884423828125, "std": 0.03484369089175397}, "a_after": {"mean": 0.895517578125, "std": 0.03529626163503054}, "delta_bias": {"mean": -0.0007763671875, "std": 0.006627765428039563}}, "python_top": {"KL": {"mean": 0.012774295806884765, "std": 0.0051217505541695995}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9275244140625, "std": 0.023768923965664632}, "a_after": {"mean": 0.9080810546875, "std": 0.03326988648161211}, "delta_bias": {"mean": -0.0052099609375, "std": 0.004115241041685236}}}, "attn-4": {"cpp_top": {"KL": {"mean": 0.0010507732629776, "std": 0.0008143425487630201}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9068798828125, "std": 0.035922202773552646}, "a_after": {"mean": 0.9169189453125, "std": 0.034856880977085794}, "delta_bias": {"mean": 0.0002783203125, "std": 0.001449564875332254}}, "python_top": {"KL": {"mean": 0.002499804496765137, "std": 0.002333583159297916}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9255908203125, "std": 0.02498319571639514}, "a_after": {"mean": 0.8994384765625, "std": 0.03735409587153306}, "delta_bias": {"mean": 0.0014990234375, "std": 0.0016717714104565074}}}, "attn-9": {"cpp_top": {"KL": {"mean": 0.02832763671875, "std": 0.010893453173426514}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.89501953125, "std": 0.03877680950187507}, "a_after": {"mean": 0.9076708984375, "std": 0.035342090375948584}, "delta_bias": {"mean": -0.002333984375, "std": 0.0083317899719762}}, "python_top": {"KL": {"mean": 0.019297142028808594, "std": 0.008616941335634332}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9303466796875, "std": 0.023015954753309014}, "a_after": {"mean": 0.91587890625, "std": 0.026741661187294423}, "delta_bias": {"mean": -0.010185546875, "std": 0.009758475721124132}}}, "attn-5": {"cpp_top": {"KL": {"mean": 0.0011783093214035034, "std": 0.0007149050419776516}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9068408203125, "std": 0.0347245237184963}, "a_after": {"mean": 0.91693359375, "std": 0.034209498408461166}, "delta_bias": {"mean": 0.000224609375, "std": 0.0012128753916793753}}, "python_top": {"KL": {"mean": 0.002115063667297363, "std": 0.0009780631185652617}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9246142578125, "std": 0.025099371403307553}, "a_after": {"mean": 0.8970068359375, "std": 0.036193164407871}, "delta_bias": {"mean": 0.0029541015625, "std": 0.00178885067492618}}}, "attn-6": {"cpp_top": {"KL": {"mean": 0.002166128158569336, "std": 0.002012850333862295}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9051708984375, "std": 0.03609648940548047}, "a_after": {"mean": 0.917509765625, "std": 0.034484879627123315}, "delta_bias": {"mean": -0.002021484375, "std": 0.0017789213326915904}}, "python_top": {"KL": {"mean": 0.005530681610107422, "std": 0.003524842851946846}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.921015625, "std": 0.026119427728712095}, "a_after": {"mean": 0.8964111328125, "std": 0.03679952290298581}, "delta_bias": {"mean": -4.8828125e-05, "std": 0.002041742572305256}}}}, "top_5_mlp": {"mlp-13": {"cpp_top": {"KL": {"mean": 0.214420166015625, "std": 0.05422862432801505}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.856640625, "std": 0.034299539596955114}, "a_after": {"mean": 0.86314453125, "std": 0.03128135572289404}, "delta_bias": {"mean": 0.0038134765625, "std": 0.011022070698607073}}, "python_top": {"KL": {"mean": 0.370491943359375, "std": 0.10030498588161264}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.853935546875, "std": 0.039206439299054006}, "a_after": {"mean": 0.838134765625, "std": 0.041860850436839486}, "delta_bias": {"mean": -0.0088525390625, "std": 0.011397383027606236}}}, "mlp-11": {"cpp_top": {"KL": {"mean": 0.3145306396484375, "std": 0.10387612552254404}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.84560791015625, "std": 0.05139651612273181}, "a_after": {"mean": 0.862626953125, "std": 0.02978925691572545}, "delta_bias": {"mean": -0.00670166015625, "std": 0.031162010450087103}}, "python_top": {"KL": {"mean": 0.240853271484375, "std": 0.06905407449943422}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.881123046875, "std": 0.029822804256698913}, "a_after": {"mean": 0.8511181640625, "std": 0.04511788007516646}, "delta_bias": {"mean": 0.0053515625, "std": 0.012297138236508711}}}, "mlp-12": {"cpp_top": {"KL": {"mean": 0.1748974609375, "std": 0.05086199505403611}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.871845703125, "std": 0.03727736354261735}, "a_after": {"mean": 0.8853125, "std": 0.024726855153792568}, "delta_bias": {"mean": -0.0031494140625, "std": 0.01914285125084512}}, "python_top": {"KL": {"mean": 0.1459942626953125, "std": 0.03308960912174892}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.8834375, "std": 0.03451882112928024}, "a_after": {"mean": 0.858310546875, "std": 0.0407785130935495}, "delta_bias": {"mean": 0.0004736328125, "std": 0.00874551209352107}}}, "mlp-14": {"cpp_top": {"KL": {"mean": 0.0710491943359375, "std": 0.030190815618473584}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9271044921875, "std": 0.03608247600484395}, "a_after": {"mean": 0.9344482421875, "std": 0.03222935538658771}, "delta_bias": {"mean": 0.0029736328125, "std": 0.011047256258877184}}, "python_top": {"KL": {"mean": 0.11917236328125, "std": 0.06841555689751797}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9494677734375, "std": 0.02874222566133802}, "a_after": {"mean": 0.9396728515625, "std": 0.03488222786050144}, "delta_bias": {"mean": -0.0148583984375, "std": 0.010579143671538928}}}, "mlp-15": {"cpp_top": {"KL": {"mean": 0.50936279296875, "std": 0.17483635559528335}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.806212158203125, "std": 0.07509537938272305}, "a_after": {"mean": 0.8197216796875, "std": 0.04386288472687224}, "delta_bias": {"mean": -0.003192138671875, "std": 0.058735852727626464}}, "python_top": {"KL": {"mean": 0.14303131103515626, "std": 0.06081150228213798}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9219921875, "std": 0.03506386482938241}, "a_after": {"mean": 0.910810546875, "std": 0.03687214818636579}, "delta_bias": {"mean": -0.0134716796875, "std": 0.011017787362299306}}}}, "bottom_5_mlp": {"mlp-8": {"cpp_top": {"KL": {"mean": 0.01959796905517578, "std": 0.00826214688982523}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.906357421875, "std": 0.03308494820514142}, "a_after": {"mean": 0.9165478515625, "std": 0.0317024913390487}, "delta_bias": {"mean": 0.000126953125, "std": 0.002843437225894982}}, "python_top": {"KL": {"mean": 0.0343670654296875, "std": 0.01220217908505337}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9192822265625, "std": 0.027251795601670323}, "a_after": {"mean": 0.900029296875, "std": 0.03255079750064597}, "delta_bias": {"mean": -0.005400390625, "std": 0.006031054640892381}}}, "mlp-4": {"cpp_top": {"KL": {"mean": 0.01935894012451172, "std": 0.007819260209042196}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.91515625, "std": 0.03343277173141831}, "a_after": {"mean": 0.9272607421875, "std": 0.03280247089609249}, "delta_bias": {"mean": -0.001787109375, "std": 0.0030820886597532305}}, "python_top": {"KL": {"mean": 0.013326988220214844, "std": 0.006310314772105467}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9287158203125, "std": 0.02709953117168396}, "a_after": {"mean": 0.9057421875, "std": 0.036430442016642285}, "delta_bias": {"mean": -0.0016796875, "std": 0.0042506906964723075}}}, "mlp-7": {"cpp_top": {"KL": {"mean": 0.007321758270263672, "std": 0.004145549619537406}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.90390625, "std": 0.034947177169536076}, "a_after": {"mean": 0.9163134765625, "std": 0.03292495481259296}, "delta_bias": {"mean": -0.00208984375, "std": 0.002158234592230835}}, "python_top": {"KL": {"mean": 0.010766029357910156, "std": 0.006960263157457336}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9259130859375, "std": 0.02895762309402432}, "a_after": {"mean": 0.897744140625, "std": 0.04544194836853408}, "delta_bias": {"mean": 0.003515625, "std": 0.005334877397531227}}}, "mlp-5": {"cpp_top": {"KL": {"mean": 0.006991424560546875, "std": 0.003326820353468014}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.9046826171875, "std": 0.035635259997364825}, "a_after": {"mean": 0.9160009765625, "std": 0.0342604407006885}, "delta_bias": {"mean": -0.0010009765625, "std": 0.002157447773771351}}, "python_top": {"KL": {"mean": 0.007850418090820313, "std": 0.003932792341425185}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.9355908203125, "std": 0.02444019819103512}, "a_after": {"mean": 0.913701171875, "std": 0.03495576388743129}, "delta_bias": {"mean": -0.002763671875, "std": 0.0037555132581656705}}}, "mlp-6": {"cpp_top": {"KL": {"mean": 0.020012321472167968, "std": 0.009573631550770144}, "b_before": {"mean": 0.90630859375, "std": 0.03530317267047992}, "a_before": {"mean": 0.9166259765625, "std": 0.034811044044266326}, "b_after": {"mean": 0.894443359375, "std": 0.04571428733212604}, "a_after": {"mean": 0.9115185546875, "std": 0.04309457590588377}, "delta_bias": {"mean": -0.0067578125, "std": 0.00816511823876196}}, "python_top": {"KL": {"mean": 0.028642196655273438, "std": 0.011185563396672094}, "b_before": {"mean": 0.9280517578125, "std": 0.024242847296245693}, "a_before": {"mean": 0.9033984375, "std": 0.03557245056613344}, "b_after": {"mean": 0.915927734375, "std": 0.02522728035736038}, "a_after": {"mean": 0.8860546875, "std": 0.041019178331079875}, "delta_bias": {"mean": 0.0052197265625, "std": 0.005697119752357599}}}}}}