{"meta-llama/Llama-2-7b-chat-hf": {"original": 6.9423699378967285, "qbit-8-sparsity-2-q": 6.93937873840332, "qbit-8-sparsity-4-q": 6.9066267013549805, "qbit-8-sparsity-8-q": 7.004047870635986, "qbit-8-sparsity-16-q": 11.520689964294434, "qbit-8-sparsity-32-q": 707.76611328125, "qbit-8-sparsity-64-q": 2640.755859375, "qbit-4-sparsity-4-q": 6.9644389152526855, "qbit-4-sparsity-8-q": 6.978972911834717, "qbit-4-sparsity-16-q": 7.799103260040283, "qbit-4-sparsity-32-q": 32.37739562988281, "qbit-4-sparsity-64-q": 2358.94287109375, "qbit-8-sparsity-2-k": 6.9394121170043945, "qbit-8-sparsity-4-k": 6.970418930053711, "qbit-8-sparsity-8-k": 7.489911079406738, "qbit-8-sparsity-16-k": 22.658857345581055, "qbit-8-sparsity-32-k": 2781.169677734375, "qbit-8-sparsity-64-k": 3986.422119140625, "qbit-4-sparsity-4-k": 6.9644622802734375, "qbit-4-sparsity-8-k": 7.198485374450684, "qbit-4-sparsity-16-k": 9.442633628845215, "qbit-4-sparsity-32-k": 2006.654541015625, "qbit-4-sparsity-64-k": 3950.90966796875, "qbit-8-sparsity-2-qk": 6.939413070678711, "qbit-8-sparsity-4-qk": 6.914844989776611, "qbit-8-sparsity-8-qk": 6.983773708343506, "qbit-8-sparsity-16-qk": 8.863497734069824, "qbit-8-sparsity-32-qk": NaN, "qbit-8-sparsity-64-qk": NaN, "qbit-4-sparsity-4-qk": 6.964434623718262, "qbit-4-sparsity-8-qk": 6.919350624084473, "qbit-4-sparsity-16-qk": 7.144011497497559, "qbit-4-sparsity-32-qk": 14.934636116027832, "qbit-4-sparsity-64-qk": NaN}, "meta-llama/Llama-2-7b-hf": {"original": 5.472101211547852, "qbit-8-sparsity-2-q": 5.476826190948486, "qbit-8-sparsity-4-q": 5.501928329467773, "qbit-8-sparsity-8-q": 5.649573802947998, "qbit-8-sparsity-16-q": 12.617136001586914, "qbit-8-sparsity-32-q": 447.3738098144531, "qbit-8-sparsity-64-q": 2318.0458984375, "qbit-4-sparsity-4-q": 5.531469345092773, "qbit-4-sparsity-8-q": 5.61853551864624, "qbit-4-sparsity-16-q": 6.450514793395996, "qbit-4-sparsity-32-q": 37.65435791015625, "qbit-4-sparsity-64-q": 1275.9969482421875, "qbit-8-sparsity-2-k": 5.476849555969238, "qbit-8-sparsity-4-k": 5.538211822509766, "qbit-8-sparsity-8-k": 5.793616771697998, "qbit-8-sparsity-16-k": 14.047019958496094, "qbit-8-sparsity-32-k": 3749.696533203125, "qbit-8-sparsity-64-k": 5401.18505859375, "qbit-4-sparsity-4-k": 5.531519412994385, "qbit-4-sparsity-8-k": 5.663354873657227, "qbit-4-sparsity-16-k": 6.612690448760986, "qbit-4-sparsity-32-k": 527.239501953125, "qbit-4-sparsity-64-k": 5843.1826171875, "qbit-8-sparsity-2-qk": 5.47688102722168, "qbit-8-sparsity-4-qk": 5.500290393829346, "qbit-8-sparsity-8-qk": 5.601007461547852, "qbit-8-sparsity-16-qk": 7.248810768127441, "qbit-8-sparsity-32-qk": NaN, "qbit-8-sparsity-64-qk": NaN, "qbit-4-sparsity-4-qk": 5.531477928161621, "qbit-4-sparsity-8-qk": 5.563470840454102, "qbit-4-sparsity-16-qk": 5.760659694671631, "qbit-4-sparsity-32-qk": 12.007704734802246, "qbit-4-sparsity-64-qk": NaN}, "huggyllama/llama-7b": {"original": 5.677066802978516, "qbit-8-sparsity-2-q": 5.678789138793945, "qbit-8-sparsity-4-q": 5.6957292556762695, "qbit-8-sparsity-8-q": 5.941944599151611, "qbit-8-sparsity-16-q": 8.517464637756348, "qbit-8-sparsity-32-q": 63.14952850341797, "qbit-8-sparsity-64-q": 432.0509033203125, "qbit-4-sparsity-4-q": 5.687862873077393, "qbit-4-sparsity-8-q": 5.7546916007995605, "qbit-4-sparsity-16-q": 6.435661315917969, "qbit-4-sparsity-32-q": 12.61979866027832, "qbit-4-sparsity-64-q": 180.32801818847656, "qbit-8-sparsity-2-k": 5.678793907165527, "qbit-8-sparsity-4-k": 5.690573692321777, "qbit-8-sparsity-8-k": 5.7247538566589355, "qbit-8-sparsity-16-k": 5.947046279907227, "qbit-8-sparsity-32-k": 38.069732666015625, "qbit-8-sparsity-64-k": 1161.0252685546875, "qbit-4-sparsity-4-k": 5.687858581542969, "qbit-4-sparsity-8-k": 5.722049236297607, "qbit-4-sparsity-16-k": 5.8346405029296875, "qbit-4-sparsity-32-k": 6.509349346160889, "qbit-4-sparsity-64-k": 704.0198364257812, "qbit-8-sparsity-2-qk": 5.6787800788879395, "qbit-8-sparsity-4-qk": 5.688771724700928, "qbit-8-sparsity-8-qk": 5.728391647338867, "qbit-8-sparsity-16-qk": 6.327374458312988, "qbit-8-sparsity-32-qk": 88.26678466796875, "qbit-8-sparsity-64-qk": 2519.234375, "qbit-4-sparsity-4-qk": 5.6878981590271, "qbit-4-sparsity-8-qk": 5.717547416687012, "qbit-4-sparsity-16-qk": 5.80483341217041, "qbit-4-sparsity-32-qk": 7.6578593254089355, "qbit-4-sparsity-64-qk": 459.91162109375}, "lmsys/vicuna-7b-v1.5-16k": {"original": 7.153844356536865, "qbit-8-sparsity-2-q": 7.157037734985352, "qbit-8-sparsity-4-q": 7.201183795928955, "qbit-8-sparsity-8-q": 7.361227989196777, "qbit-8-sparsity-16-q": 10.949952125549316, "qbit-8-sparsity-32-q": 241.02024841308594, "qbit-8-sparsity-64-q": 4128.9130859375, "qbit-4-sparsity-4-q": 7.261904239654541, "qbit-4-sparsity-8-q": 7.401001453399658, "qbit-4-sparsity-16-q": 7.7916388511657715, "qbit-4-sparsity-32-q": 27.859703063964844, "qbit-4-sparsity-64-q": 1679.6610107421875, "qbit-8-sparsity-2-k": 7.15700626373291, "qbit-8-sparsity-4-k": 7.27395486831665, "qbit-8-sparsity-8-k": 8.617789268493652, "qbit-8-sparsity-16-k": 628.7724609375, "qbit-8-sparsity-32-k": 3119.384765625, "qbit-8-sparsity-64-k": 2724.096435546875, "qbit-4-sparsity-4-k": 7.262019634246826, "qbit-4-sparsity-8-k": 7.707176685333252, "qbit-4-sparsity-16-k": 27.585514068603516, "qbit-4-sparsity-32-k": 831.9708251953125, "qbit-4-sparsity-64-k": 4231.10205078125, "qbit-8-sparsity-2-qk": 7.156983375549316, "qbit-8-sparsity-4-qk": 7.143520832061768, "qbit-8-sparsity-8-qk": 7.73736047744751, "qbit-8-sparsity-16-qk": 30.14275550842285, "qbit-8-sparsity-32-qk": NaN, "qbit-8-sparsity-64-qk": NaN, "qbit-4-sparsity-4-qk": 7.261929512023926, "qbit-4-sparsity-8-qk": 7.369661331176758, "qbit-4-sparsity-16-qk": 8.706672668457031, "qbit-4-sparsity-32-qk": 62.50939178466797, "qbit-4-sparsity-64-qk": NaN}}