from matplotlib import pyplot as plt, font_manager
import numpy as np

plt.rcParams.update({
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'svg.fonttype': 'path',
    'svg.hashsalt': 'fixed-salt',  # any constant string for reproducibility
})
font = font_manager.FontProperties(size=12)
font_legend = font_manager.FontProperties(size=10)
strip_svg_meta: dict[str, None] = {k: None for k in ('Creator', 'Date', 'Format', 'Type')}
strip_pdf_meta: dict[str, None] = {k: None for k in ('Title', 'Author', 'Subject', 'Keywords', 'Creator', 'Producer', 'CreationDate', 'ModDate', 'Trapped')}


def plot_layer() -> None:
    fig, axs = plt.subplots(4, 3, figsize=(10., 10))

    records: list[dict[str, float|int|bool]] = [{'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.026964031159877e-05, 't_m8a': 1.7677015624940395e-05, 'speedup': 4.540904529062407}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.97654502093792e-05, 't_m8a': 1.9423870369791986e-05, 'speedup': 4.106568294104273}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.958215288817883e-05, 't_m8a': 2.053301129490137e-05, 'speedup': 3.8758149861798485}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.976941578090191e-05, 't_m8a': 2.2025834769010545e-05, 'speedup': 3.6216296280009437}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.982969563454389e-05, 't_m8a': 2.2905603982508184e-05, 'speedup': 3.485160037495875}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.956661190837621e-05, 't_m8a': 2.5016529485583307e-05, 'speedup': 3.180561554480584}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.638295087963343e-05, 't_m8a': 2.013898082077503e-05, 'speedup': 3.7927912817136242}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.6074143871665e-05, 't_m8a': 2.3450353182852268e-05, 'speedup': 3.2440510928975312}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.635147776454687e-05, 't_m8a': 2.4446260184049608e-05, 'speedup': 3.1232375500267207}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.625544629991054e-05, 't_m8a': 2.600185200572014e-05, 'speedup': 2.932692882150669}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.62335378676653e-05, 't_m8a': 2.7483359910547733e-05, 'speedup': 2.773807064194066}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.654010411351919e-05, 't_m8a': 2.897873241454363e-05, 'speedup': 2.6412509359831704}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.644001673907041e-05, 't_m8a': 2.8935997746884824e-05, 'speedup': 2.6416927941356283}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.62817719951272e-05, 't_m8a': 3.471460472792387e-05, 'speedup': 2.197397106865727}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.665364164859057e-05, 't_m8a': 3.6986516788601876e-05, 'speedup': 2.0724752775912356}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.632372714579106e-05, 't_m8a': 3.913834225386381e-05, 'speedup': 1.950101173185388}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.632127217948437e-05, 't_m8a': 4.0495955385267735e-05, 'speedup': 1.8846640720877952}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.640687096863985e-05, 't_m8a': 4.3450451456010344e-05, 'speedup': 1.7584827869048703}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.677655667066574e-05, 't_m8a': 2.3737444542348384e-05, 'speedup': 3.2344069949776535}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.654600311070681e-05, 't_m8a': 4.088678117841482e-05, 'speedup': 1.8721454931041968}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.680537085980177e-05, 't_m8a': 4.628872778266668e-05, 'speedup': 1.6592672674093751}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.645968440920114e-05, 't_m8a': 5.149252992123365e-05, 'speedup': 1.4848694466199055}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.666349411010742e-05, 't_m8a': 5.592038296163082e-05, 'speedup': 1.3709400767643038}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 7.634019572287797e-05, 't_m8a': 5.9024826623499396e-05, 'speedup': 1.2933573902694846}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.383568190038204e-05, 't_m8a': 4.4807054102420806e-05, 'speedup': 1.8710375761090845}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.339056838303805e-05, 't_m8a': 6.889828946441413e-05, 'speedup': 1.2103430873433971}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.337985817342997e-05, 't_m8a': 7.799816131591797e-05, 'speedup': 1.0689977400327986}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.384888712316752e-05, 't_m8a': 8.719562739133835e-05, 'speedup': 0.9616180264045753}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.386153355240822e-05, 't_m8a': 9.572516288608313e-05, 'speedup': 0.8760657179785307}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 8.36626049131155e-05, 't_m8a': 0.00010364818759262562, 'speedup': 0.8071786574979912}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.2575278095901014e-05, 't_m8a': 1.2007237412035466e-05, 'speedup': 4.378632344122898}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.266185104846955e-05, 't_m8a': 1.3509030453860759e-05, 'speedup': 3.898270214752478}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.27628967538476e-05, 't_m8a': 1.4200357720255852e-05, 'speedup': 3.715603352624342}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.275171622633934e-05, 't_m8a': 1.7393904738128184e-05, 'speedup': 3.0327702158046965}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.263170227408409e-05, 't_m8a': 1.90307991579175e-05, 'speedup': 2.765606522214145}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.2823334932327274e-05, 't_m8a': 2.0045357756316662e-05, 'speedup': 2.6351904303469795}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.342859774827957e-05, 't_m8a': 1.5315767377614975e-05, 'speedup': 3.4884701778879887}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.345774255692959e-05, 't_m8a': 1.8097385764122008e-05, 'speedup': 2.9538930790163813}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3490458056330683e-05, 't_m8a': 1.8805827014148235e-05, 'speedup': 2.844355529596655}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.333483964204788e-05, 't_m8a': 2.139255590736866e-05, 'speedup': 2.4931494802674194}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3524397313594816e-05, 't_m8a': 2.308461628854275e-05, 'speedup': 2.318617586906125}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.336717329919338e-05, 't_m8a': 2.4304463528096677e-05, 'speedup': 2.1957766415004123}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.339312832802534e-05, 't_m8a': 2.0905308425426482e-05, 'speedup': 2.554046428852727}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.345167685300112e-05, 't_m8a': 2.5688990950584413e-05, 'speedup': 2.0807230986931824}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.333073902875185e-05, 't_m8a': 2.73929163813591e-05, 'speedup': 1.9468806565278116}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.334198847413063e-05, 't_m8a': 2.8773315250873567e-05, 'speedup': 1.8538700879284722}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3328998386859895e-05, 't_m8a': 2.9640014283359052e-05, 'speedup': 1.7992231001319279}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3343169391155244e-05, 't_m8a': 3.07898661121726e-05, 'speedup': 1.7324911123944875}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3401698358356955e-05, 't_m8a': 1.8082676455378533e-05, 'speedup': 2.9531965851478303}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.383254867047071e-05, 't_m8a': 2.7502174489200116e-05, 'speedup': 1.9573924487901975}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.33433984965086e-05, 't_m8a': 3.162851929664612e-05, 'speedup': 1.686560094584166}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.3403127938508986e-05, 't_m8a': 3.539936989545822e-05, 'speedup': 1.5085897883555455}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.339081399142742e-05, 't_m8a': 3.8746670819818975e-05, 'speedup': 1.377945843133391}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.358201172202826e-05, 't_m8a': 4.215518664568662e-05, 'speedup': 1.271065697618275}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.364831071346998e-05, 't_m8a': 3.061102796345949e-05, 'speedup': 1.7525811540047}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.383360758423805e-05, 't_m8a': 4.8045712523162364e-05, 'speedup': 1.1204664216039133}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.345889739692211e-05, 't_m8a': 5.443594977259636e-05, 'speedup': 0.9820513396063476}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.374079011380672e-05, 't_m8a': 6.10967967659235e-05, 'speedup': 0.8796007803764343}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.363894067704678e-05, 't_m8a': 6.669754162430763e-05, 'speedup': 0.8042116601415831}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 5.372305028140545e-05, 't_m8a': 7.246238179504871e-05, 'speedup': 0.7413922776283389}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00028628144972026346, 't_m8a': 5.6870317086577416e-05, 'speedup': 5.033934473838759}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00028630973771214484, 't_m8a': 6.105359829962253e-05, 'speedup': 4.689481794456576}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002863377286121249, 't_m8a': 6.490679737180471e-05, 'speedup': 4.411521446234675}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002864053370431066, 't_m8a': 6.927272025495768e-05, 'speedup': 4.134460664876362}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002863596472889185, 't_m8a': 7.41870477795601e-05, 'speedup': 3.8599682270658557}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00028636554442346097, 't_m8a': 7.980057783424854e-05, 'speedup': 3.5885146723907506}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029016115423291923, 't_m8a': 7.070069387555122e-05, 'speedup': 4.104077885623961}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029018633998930456, 't_m8a': 7.876456901431084e-05, 'speedup': 3.684224310763133}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029018588364124295, 't_m8a': 8.245611004531383e-05, 'speedup': 3.519276903576594}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002904608957469463, 't_m8a': 8.602217771112919e-05, 'speedup': 3.3765815220621613}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.000290367248468101, 't_m8a': 8.982883300632239e-05, 'speedup': 3.232450414308112}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002904055221006274, 't_m8a': 9.422280080616474e-05, 'speedup': 3.082115152764881}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029067083355039356, 't_m8a': 0.00010109423566609621, 'speedup': 2.8752463642976562}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002906319247558713, 't_m8a': 0.00011940313316881657, 'speedup': 2.434039350918582}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002904901662841439, 't_m8a': 0.00012364141922444104, 'speedup': 2.349456744400753}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002903047176077962, 't_m8a': 0.00012860501371324062, 'speedup': 2.2573359251382565}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002904986469075084, 't_m8a': 0.00013292555790394545, 'speedup': 2.1854235670571964}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029029275942593814, 't_m8a': 0.00013875860441476105, 'speedup': 2.0920703307034483}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002910087928175926, 't_m8a': 8.67392448708415e-05, 'speedup': 3.354984162600416}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029114265367388725, 't_m8a': 0.00013101603649556636, 'speedup': 2.222190973421331}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029117869026958944, 't_m8a': 0.00014799858629703521, 'speedup': 1.9674423760047934}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002909371377900243, 't_m8a': 0.0001656268183141947, 'speedup': 1.7565823020165459}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029103265702724456, 't_m8a': 0.0001804355587810278, 'speedup': 1.6129451367201666}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029118797462433576, 't_m8a': 0.0001956597128883004, 'speedup': 1.4882367469820994}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029258078802376985, 't_m8a': 0.00016583321988582612, 'speedup': 1.764307466412385}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029344414174556734, 't_m8a': 0.0002501075863838196, 'speedup': 1.173271654763973}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029268628358840944, 't_m8a': 0.00028715606685727834, 'speedup': 1.0192585752815724}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029344857297837734, 't_m8a': 0.00032241521682590244, 'speedup': 0.9101573302504314}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0002926715780049562, 't_m8a': 0.00035080128721892836, 'speedup': 0.834294481429042}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00029342499002814294, 't_m8a': 0.00038250531163066626, 'speedup': 0.7671135043255657}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016380284167826176, 't_m8a': 3.625915199518204e-05, 'speedup': 4.517558538049295}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016397638246417045, 't_m8a': 4.030289314687252e-05, 'speedup': 4.068600779269239}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001637709764763713, 't_m8a': 4.322708025574684e-05, 'speedup': 3.788619900012763}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016379993036389352, 't_m8a': 4.641646705567837e-05, 'speedup': 3.528918522976104}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016382158640772105, 't_m8a': 4.90537341684103e-05, 'speedup': 3.339635385255117}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001635839184746146, 't_m8a': 5.2508960478007796e-05, 'speedup': 3.1153524462387345}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016441957838833333, 't_m8a': 4.563691094517708e-05, 'speedup': 3.6027762392999834}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016430155001580715, 't_m8a': 5.067369155585766e-05, 'speedup': 3.242344202113189}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016363272350281477, 't_m8a': 5.3468088619410994e-05, 'speedup': 3.060381018434419}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016375452186912298, 't_m8a': 5.565699189901352e-05, 'speedup': 2.9422093484004015}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016356158815324307, 't_m8a': 5.86231080815196e-05, 'speedup': 2.790053163435126}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001635607900097966, 't_m8a': 6.190275400876999e-05, 'speedup': 2.64222153971735}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001635074857622385, 't_m8a': 6.386547535657883e-05, 'speedup': 2.560185841400701}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016388782113790513, 't_m8a': 7.141064666211605e-05, 'speedup': 2.2950054200370236}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001641900623217225, 't_m8a': 7.48936114832759e-05, 'speedup': 2.1923106533377275}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001634097760543227, 't_m8a': 7.80749935656786e-05, 'speedup': 2.092984816154463}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016333787236362695, 't_m8a': 8.08796864002943e-05, 'speedup': 2.019516638024855}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016321283392608165, 't_m8a': 8.449948206543922e-05, 'speedup': 1.931524666620846}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016443481389433147, 't_m8a': 5.33489165827632e-05, 'speedup': 3.0822521698118166}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016443685349076988, 't_m8a': 7.16658839955926e-05, 'speedup': 2.294492781263713}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001647700248286128, 't_m8a': 8.38506706058979e-05, 'speedup': 1.9650412291040544}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001640696981921792, 't_m8a': 9.511527139693498e-05, 'speedup': 1.7249564216400504}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001640498787164688, 't_m8a': 0.000104750357568264, 'speedup': 1.5661032814094245}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001643531359732151, 't_m8a': 0.00011514626629650592, 'speedup': 1.427342294799205}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016502743680030108, 't_m8a': 9.309584181755782e-05, 'speedup': 1.7726617384663577}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016451739799231292, 't_m8a': 0.00012736716121435166, 'speedup': 1.2916782977948258}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001650064829736948, 't_m8a': 0.0001474753897637129, 'speedup': 1.1188747033526778}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016528973914682865, 't_m8a': 0.00016587025672197341, 'speedup': 0.9965001707562446}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.00016570779867470264, 't_m8a': 0.00018359558749943972, 'speedup': 0.9025696147256717}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 2, 'rep': 0, 't_torch': 0.0001658139731734991, 't_m8a': 0.00020040313992649317, 'speedup': 0.8274020718154356}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.955458108335734e-05, 't_m8a': 1.9372565671801565e-05, 'speedup': 4.10655885395479}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.957077212631702e-05, 't_m8a': 2.054094336926937e-05, 'speedup': 3.8737642520041335}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.947610225528479e-05, 't_m8a': 2.2129843942821027e-05, 'speedup': 3.5913539408879123}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.953392155468464e-05, 't_m8a': 2.381329797208309e-05, 'speedup': 3.3398952823722357}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.96034000813961e-05, 't_m8a': 2.4889436550438406e-05, 'speedup': 3.198280520335602}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.97886187210679e-05, 't_m8a': 2.65782717615366e-05, 'speedup': 3.0020243391647443}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.555789779871702e-05, 't_m8a': 2.196161076426506e-05, 'speedup': 3.4404533715560346}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.564410753548145e-05, 't_m8a': 2.8562228195369243e-05, 'speedup': 2.6483965822997497}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.571698632091283e-05, 't_m8a': 3.0157554894685745e-05, 'speedup': 2.510713702928728}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.575986068695784e-05, 't_m8a': 3.192028310149908e-05, 'speedup': 2.373408169534684}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.562404219061136e-05, 't_m8a': 3.440338931977749e-05, 'speedup': 2.198156742281707}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.58744552731514e-05, 't_m8a': 3.625191748142242e-05, 'speedup': 2.0929777110971814}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.571491785347462e-05, 't_m8a': 3.070942778140307e-05, 'speedup': 2.4655268210280967}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.571062166243791e-05, 't_m8a': 3.714080154895782e-05, 'speedup': 2.0384757060947805}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.58013902232051e-05, 't_m8a': 3.9096794091165063e-05, 'speedup': 1.9388134496770515}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.581045012921094e-05, 't_m8a': 4.206192865967751e-05, 'speedup': 1.8023531622287763}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.577476184815169e-05, 't_m8a': 4.384996276348829e-05, 'speedup': 1.7280462074016996}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.555348239839076e-05, 't_m8a': 4.629618767648935e-05, 'speedup': 1.6319590486876996}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.567933667451144e-05, 't_m8a': 2.5976617820560933e-05, 'speedup': 2.913363748786802}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.598673272877932e-05, 't_m8a': 4.3573502451181414e-05, 'speedup': 1.7438747967050117}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.591573055833578e-05, 't_m8a': 4.867953062057495e-05, 'speedup': 1.5595000524974074}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.604091241955758e-05, 't_m8a': 5.382231064140797e-05, 'speedup': 1.4128139708861889}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.604097761213779e-05, 't_m8a': 5.837864615023136e-05, 'speedup': 1.3025478086020403}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 7.596562337130309e-05, 't_m8a': 6.11918279901147e-05, 'speedup': 1.241434123909079}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.376691211014986e-05, 't_m8a': 4.8828289844095706e-05, 'speedup': 1.7155405683387643}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.405802771449089e-05, 't_m8a': 7.301545329391956e-05, 'speedup': 1.1512361277292913}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.363788295537234e-05, 't_m8a': 8.268109615892172e-05, 'speedup': 1.0115720139294184}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.35697939619422e-05, 't_m8a': 9.218717832118273e-05, 'speedup': 0.9065229621280161}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.354767505079508e-05, 't_m8a': 0.0001001646090298891, 'speedup': 0.8341037404325561}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 8.363372925668954e-05, 't_m8a': 0.00010877414140850306, 'speedup': 0.7688751037123953}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.2791726775467394e-05, 't_m8a': 1.4224332757294178e-05, 'speedup': 3.711367533102459}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.2815851755440235e-05, 't_m8a': 1.7019083723425864e-05, 'speedup': 3.1033310966525196}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.300951842218637e-05, 't_m8a': 1.8084811046719552e-05, 'speedup': 2.931162414981488}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.264204833656549e-05, 't_m8a': 1.9298193044960497e-05, 'speedup': 2.7278226626669775}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.272405780851841e-05, 't_m8a': 2.0290051586925982e-05, 'speedup': 2.598517681566245}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.278355255723e-05, 't_m8a': 2.1271689794957638e-05, 'speedup': 2.4813991303005043}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3565564565360546e-05, 't_m8a': 1.794694364070892e-05, 'speedup': 2.9846622153455793}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.348431970924139e-05, 't_m8a': 2.082214877009392e-05, 'speedup': 2.5686263363010324}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3394656628370286e-05, 't_m8a': 2.1936556324362755e-05, 'speedup': 2.434049166097695}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.427141021937132e-05, 't_m8a': 2.3078212514519693e-05, 'speedup': 2.35162971071639}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.353154893964529e-05, 't_m8a': 2.434368245303631e-05, 'speedup': 2.198991423870158}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3376495838165284e-05, 't_m8a': 2.5430385023355485e-05, 'speedup': 2.0989259812285126}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.341537017375231e-05, 't_m8a': 2.327786572277546e-05, 'speedup': 2.294685037275123}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3524834103882315e-05, 't_m8a': 2.8291016817092895e-05, 'speedup': 1.891937446078065}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.335077084600925e-05, 't_m8a': 2.9233604669570924e-05, 'speedup': 1.8249809234624335}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3412414155900475e-05, 't_m8a': 2.9981952160596848e-05, 'speedup': 1.781485537359259}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.336145590990782e-05, 't_m8a': 3.1242292374372485e-05, 'speedup': 1.707987854107636}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.352821946144104e-05, 't_m8a': 3.2912985421717166e-05, 'speedup': 1.6263556397445855}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.37857860326767e-05, 't_m8a': 1.9578798674046994e-05, 'speedup': 2.7471443436401106}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.362239759415388e-05, 't_m8a': 2.9558315873146057e-05, 'speedup': 1.8141222194215136}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3364096209406855e-05, 't_m8a': 3.3119379542768e-05, 'speedup': 1.611264973744338}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.343854427337646e-05, 't_m8a': 3.696504328399897e-05, 'speedup': 1.4456507966949512}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3311360068619254e-05, 't_m8a': 4.033799935132265e-05, 'speedup': 1.3216163648649375}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.344986356794834e-05, 't_m8a': 4.3812473304569724e-05, 'speedup': 1.2199690986715743}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.370727647095919e-05, 't_m8a': 3.366469219326973e-05, 'speedup': 1.5953592019384744}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.388737842440605e-05, 't_m8a': 5.178923439234495e-05, 'speedup': 1.0405131308983249}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3578052669763564e-05, 't_m8a': 5.8046484366059305e-05, 'speedup': 0.9230197703599685}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.366326216608286e-05, 't_m8a': 6.43312307074666e-05, 'speedup': 0.834171234965266}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.364078842103481e-05, 't_m8a': 6.963604036718607e-05, 'speedup': 0.7703021041717854}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 5.3728626109659674e-05, 't_m8a': 7.559662871062756e-05, 'speedup': 0.7107278065973645}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002863763775676489, 't_m8a': 6.400559004396201e-05, 'speedup': 4.474240099512438}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002863790038973093, 't_m8a': 7.051700726151466e-05, 'speedup': 4.061133831662244}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00028639079350978135, 't_m8a': 7.603016961365938e-05, 'speedup': 3.7668046114463634}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002864005984738469, 't_m8a': 8.184122014790773e-05, 'speedup': 3.4994663808316733}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002863220097497106, 't_m8a': 8.73588053509593e-05, 'speedup': 3.2775403532526264}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002863676389679313, 't_m8a': 9.351001307368278e-05, 'speedup': 3.0624275364209725}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002906633475795388, 't_m8a': 7.784645818173886e-05, 'speedup': 3.7338031089476376}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029097038507461547, 't_m8a': 8.730879239737987e-05, 'speedup': 3.3326584538047905}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029057171940803525, 't_m8a': 9.146873094141483e-05, 'speedup': 3.1767328180615593}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002907745521515608, 't_m8a': 9.7294713370502e-05, 'speedup': 2.988595598655809}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029065875988453627, 't_m8a': 0.00010247016604989767, 'speedup': 2.8365208244417257}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002905468000099063, 't_m8a': 0.00010872093308717013, 'speedup': 2.672408999442196}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002915173638612032, 't_m8a': 0.0001086650900542736, 'speedup': 2.6827140502584834}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029111237451434136, 't_m8a': 0.00012461395561695098, 'speedup': 2.3361137448295715}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002912313723936677, 't_m8a': 0.0001282130004838109, 'speedup': 2.2714652281337155}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002912014443427324, 't_m8a': 0.00013379723764955998, 'speedup': 2.1764383888510728}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002904502423480153, 't_m8a': 0.00013786416966468095, 'speedup': 2.106785563315404}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.000290604786016047, 't_m8a': 0.00014369564224034549, 'speedup': 2.0223632497496418}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002917567314580083, 't_m8a': 9.13185691460967e-05, 'speedup': 3.1949332341294037}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002918772157281637, 't_m8a': 0.00013647324219346046, 'speedup': 2.1387138682790807}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029191093612462285, 't_m8a': 0.00015480045694857836, 'speedup': 1.8857239951274165}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029169533401727675, 't_m8a': 0.00017236363515257835, 'speedup': 1.692325261990817}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029124527983367444, 't_m8a': 0.0001874024234712124, 'speedup': 1.5541169342370524}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002912749033421278, 't_m8a': 0.00020269001834094524, 'speedup': 1.4370461146842157}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002934809233993292, 't_m8a': 0.00017771579138934612, 'speedup': 1.6514059955221463}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002939023328945041, 't_m8a': 0.00026632197760045526, 'speedup': 1.1035601926004985}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002931924760341644, 't_m8a': 0.00030139078106731175, 'speedup': 0.972798421358096}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002936034724116325, 't_m8a': 0.0003376874132081866, 'speedup': 0.8694534084710582}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00029299242049455644, 't_m8a': 0.0003660442139953375, 'speedup': 0.800429044613415}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0002937023062258959, 't_m8a': 0.0003958238372579217, 'speedup': 0.7420025743283302}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001634488459676504, 't_m8a': 4.0786704048514366e-05, 'speedup': 4.007405103713055}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001632802030071616, 't_m8a': 4.5069019310176374e-05, 'speedup': 3.622892299551184}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016319402772933244, 't_m8a': 4.8287967219948766e-05, 'speedup': 3.3796002839795167}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016335339192301035, 't_m8a': 5.18921697512269e-05, 'speedup': 3.147939134288139}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016338471043854952, 't_m8a': 5.505425203591585e-05, 'speedup': 2.967703753961854}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016364470310509206, 't_m8a': 5.78042846173048e-05, 'speedup': 2.8310133788266265}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016304091457277535, 't_m8a': 4.958224575966597e-05, 'speedup': 3.2882922520908773}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016306302603334187, 't_m8a': 5.4458742961287496e-05, 'speedup': 2.994248804994576}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016303981933742763, 't_m8a': 5.749963317066431e-05, 'speedup': 2.8354932083394364}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016341925598680973, 't_m8a': 6.0675380751490596e-05, 'speedup': 2.6933371321744044}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016357535030692816, 't_m8a': 6.34941877797246e-05, 'speedup': 2.57622557318801}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016328204236924648, 't_m8a': 6.752093508839607e-05, 'speedup': 2.4182432034669437}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016438954975456, 't_m8a': 6.774884741753339e-05, 'speedup': 2.4264552980722147}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001635269494727254, 't_m8a': 7.552053593099118e-05, 'speedup': 2.1653308925422925}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016384913679212332, 't_m8a': 7.890627160668374e-05, 'speedup': 2.0765033432177082}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016412161849439145, 't_m8a': 8.155945409089327e-05, 'speedup': 2.0122942254062592}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016450576297938823, 't_m8a': 8.476381842046976e-05, 'speedup': 1.9407545111212388}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001641318378970027, 't_m8a': 8.807369880378247e-05, 'speedup': 1.8635738038284115}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016380187775939703, 't_m8a': 5.7167841121554375e-05, 'speedup': 2.8652801040905094}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016350557096302508, 't_m8a': 7.621299847960472e-05, 'speedup': 2.1453764347925586}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016406322922557593, 't_m8a': 8.773075230419636e-05, 'speedup': 1.87007662554523}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016495639830827713, 't_m8a': 9.862310998141766e-05, 'speedup': 1.6725937596102765}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001649490436539054, 't_m8a': 0.00010842079389840365, 'speedup': 1.521378305055319}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016410225350409747, 't_m8a': 0.00011903037503361703, 'speedup': 1.378658627747338}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016418691352009773, 't_m8a': 9.975484572350979e-05, 'speedup': 1.645904139586102}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016579764802008868, 't_m8a': 0.00013458670023828745, 'speedup': 1.2319021695794743}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016530204005539418, 't_m8a': 0.0001547550894320011, 'speedup': 1.0681525283730806}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016617766954004764, 't_m8a': 0.00017383792996406556, 'speedup': 0.9559344705404546}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.00016475954186171294, 't_m8a': 0.00019016586244106293, 'speedup': 0.8663991514921663}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 3, 'rep': 0, 't_torch': 0.0001648102356120944, 't_m8a': 0.00020723258797079324, 'speedup': 0.7952911133615833}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.957640383392573e-05, 't_m8a': 2.287775184959173e-05, 'speedup': 3.478331453068271}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.939493004232646e-05, 't_m8a': 2.460417989641428e-05, 'speedup': 3.2268878855782215}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.93826561421156e-05, 't_m8a': 2.598618995398283e-05, 'speedup': 3.054801657445317}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.941297069191933e-05, 't_m8a': 2.7467630803585053e-05, 'speedup': 2.891147447691572}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.938504591584205e-05, 't_m8a': 2.8628308326005936e-05, 'speedup': 2.7729562296116788}, {'n_out_features': 6144, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.934378553181886e-05, 't_m8a': 3.023388423025608e-05, 'speedup': 2.624333179539559}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.568734418600798e-05, 't_m8a': 2.670826483517885e-05, 'speedup': 2.8338547881371996}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.559360470622778e-05, 't_m8a': 3.283446375280619e-05, 'speedup': 2.3022640258520193}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.561932504177093e-05, 't_m8a': 3.5767074674367904e-05, 'speedup': 2.114216097632461}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.589531689882278e-05, 't_m8a': 3.936019167304039e-05, 'speedup': 1.9282252873480537}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.582332752645016e-05, 't_m8a': 4.0260099805891515e-05, 'speedup': 1.8833367997601052}, {'n_out_features': 6144, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.56147401407361e-05, 't_m8a': 4.153138771653175e-05, 'speedup': 1.8206649066685852}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.58966077119112e-05, 't_m8a': 3.376239258795977e-05, 'speedup': 2.24796295209769}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.573122344911099e-05, 't_m8a': 3.968316875398159e-05, 'speedup': 1.9083965778693652}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.5542320497334e-05, 't_m8a': 4.195829760283232e-05, 'speedup': 1.8004143355004625}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.567084394395351e-05, 't_m8a': 4.47157546877861e-05, 'speedup': 1.6922635986421726}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.59188262745738e-05, 't_m8a': 4.6471337787806985e-05, 'speedup': 1.633669911144524}, {'n_out_features': 6144, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.577544450759887e-05, 't_m8a': 4.8286077566444876e-05, 'speedup': 1.5693021327592158}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.604586519300938e-05, 't_m8a': 3.0907722190022466e-05, 'speedup': 2.460416355675614}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.59928897023201e-05, 't_m8a': 4.666939191520214e-05, 'speedup': 1.628323973888464}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.597114611417055e-05, 't_m8a': 5.156954750418663e-05, 'speedup': 1.4731784510617025}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.590733561664819e-05, 't_m8a': 5.744231026619673e-05, 'speedup': 1.3214533897554193}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.613477017730474e-05, 't_m8a': 6.180962454527616e-05, 'speedup': 1.2317623790375116}, {'n_out_features': 6144, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 7.585789728909731e-05, 't_m8a': 6.621785648167133e-05, 'speedup': 1.145580683513884}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.375864941626787e-05, 't_m8a': 5.3515310399234295e-05, 'speedup': 1.5651343286886046}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.365347050130367e-05, 't_m8a': 7.811418548226356e-05, 'speedup': 1.0709126643879279}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.401358127593994e-05, 't_m8a': 8.687405381351709e-05, 'speedup': 0.9670733387932211}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.361199218779802e-05, 't_m8a': 9.617772419005632e-05, 'speedup': 0.8693488319870492}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.372648991644383e-05, 't_m8a': 0.0001038262089714408, 'speedup': 0.806409968599299}, {'n_out_features': 6144, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 8.362123183906078e-05, 't_m8a': 0.00011181541532278061, 'speedup': 0.7478506572431814}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.2440647967159745e-05, 't_m8a': 1.7103449441492556e-05, 'speedup': 3.066086063314222}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.24304686114192e-05, 't_m8a': 1.8966437317430974e-05, 'speedup': 2.764381508973925}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.2414193749427794e-05, 't_m8a': 2.0118063315749168e-05, 'speedup': 2.605329992594069}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.2425757981836795e-05, 't_m8a': 2.1074071526527406e-05, 'speedup': 2.4876900467878187}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.242170859128237e-05, 't_m8a': 2.22978675737977e-05, 'speedup': 2.3509740748879184}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.2424159832298755e-05, 't_m8a': 2.341416385024786e-05, 'speedup': 2.238993464280545}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.3344763815402985e-05, 't_m8a': 1.937986724078655e-05, 'speedup': 2.7525866484335078}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.337866209447384e-05, 't_m8a': 2.215405460447073e-05, 'speedup': 2.409430826432193}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.33783221617341e-05, 't_m8a': 2.3109442554414273e-05, 'speedup': 2.3098057011132007}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.338310543447733e-05, 't_m8a': 2.4456383660435677e-05, 'speedup': 2.1827881904239943}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.347978603094816e-05, 't_m8a': 2.547493577003479e-05, 'speedup': 2.0993099458116955}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.323808360844851e-05, 't_m8a': 2.6590713299810888e-05, 'speedup': 2.002130706618808}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.35016106441617e-05, 't_m8a': 2.5532575324177742e-05, 'speedup': 2.09542554814277}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.347561649978161e-05, 't_m8a': 2.9522476717829704e-05, 'speedup': 1.8113526521128807}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.3436711430549625e-05, 't_m8a': 3.0715159140527247e-05, 'speedup': 1.739750433525911}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.358994472771883e-05, 't_m8a': 3.2266645692288874e-05, 'speedup': 1.660846474058065}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.346375610679388e-05, 't_m8a': 3.2768740318715574e-05, 'speedup': 1.631547492726125}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.348683055490255e-05, 't_m8a': 3.4651047550141813e-05, 'speedup': 1.5435848072848133}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.346705112606287e-05, 't_m8a': 2.137270290404558e-05, 'speedup': 2.501651352480188}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.352856498211622e-05, 't_m8a': 3.066939953714609e-05, 'speedup': 1.7453411475266616}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.377715732902289e-05, 't_m8a': 3.474433533847332e-05, 'speedup': 1.5477964049429955}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.3455086424946785e-05, 't_m8a': 3.816370107233524e-05, 'speedup': 1.4006787843670703}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.364733841270209e-05, 't_m8a': 4.185213800519705e-05, 'speedup': 1.2818302951701142}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.376299843192101e-05, 't_m8a': 4.482502769678831e-05, 'speedup': 1.1993968814830893}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.400954000651837e-05, 't_m8a': 3.645820077508688e-05, 'speedup': 1.4814099121266815}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.373835936188698e-05, 't_m8a': 5.369474831968546e-05, 'speedup': 1.000812203121651}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.367359798401594e-05, 't_m8a': 6.015350762754679e-05, 'speedup': 0.8922771106939834}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.3948285058140754e-05, 't_m8a': 6.575290579348803e-05, 'speedup': 0.8204699763015436}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.417207255959511e-05, 't_m8a': 7.12314611300826e-05, 'speedup': 0.760507670349009}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 5.3707138635218146e-05, 't_m8a': 7.695918995887042e-05, 'speedup': 0.6978651758668595}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00028634610399603844, 't_m8a': 7.889957167208195e-05, 'speedup': 3.629247889787467}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002863665446639061, 't_m8a': 8.701641205698251e-05, 'speedup': 3.2909486600801188}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00028640469163656234, 't_m8a': 9.310352057218552e-05, 'speedup': 3.0761961510843787}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002864065980538726, 't_m8a': 9.926564246416092e-05, 'speedup': 2.885254061164994}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002864083331078291, 't_m8a': 0.00010434496402740478, 'speedup': 2.7448218107833924}, {'n_out_features': 24576, 'batch_size': 1, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00028641329891979697, 't_m8a': 0.00011027667950838805, 'speedup': 2.59722454644648}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002905762549489737, 't_m8a': 8.572672307491303e-05, 'speedup': 3.3895644733212436}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029045618511736395, 't_m8a': 9.74907437339425e-05, 'speedup': 2.9793206410450055}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029025238379836083, 't_m8a': 0.00010385650023818016, 'speedup': 2.794744509324964}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002905781799927354, 't_m8a': 0.00011046316660940648, 'speedup': 2.6305436365065353}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029025925416499375, 't_m8a': 0.00011648642644286156, 'speedup': 2.491786064940112}, {'n_out_features': 24576, 'batch_size': 2, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029038190189749005, 't_m8a': 0.0001229885146021843, 'speedup': 2.3610489388928095}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002909604357555509, 't_m8a': 0.00011505267210304737, 'speedup': 2.5289324483915596}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029121372289955615, 't_m8a': 0.00013195055443793535, 'speedup': 2.206991278968307}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029043404664844276, 't_m8a': 0.00013662643544375897, 'speedup': 2.125752938698289}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002903190441429615, 't_m8a': 0.00014212457370012997, 'speedup': 2.042708284603256}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029040831327438357, 't_m8a': 0.00014625326078385114, 'speedup': 1.98565359649369}, {'n_out_features': 24576, 'batch_size': 4, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002903941189870238, 't_m8a': 0.0001525940727442503, 'speedup': 1.903049795870697}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002918983260169625, 't_m8a': 9.893906768411398e-05, 'speedup': 2.9502837741398165}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002919174060225487, 't_m8a': 0.00014334689639508723, 'speedup': 2.036440365042694}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002913633966818452, 't_m8a': 0.0001613196600228548, 'speedup': 1.8061245395667618}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029139877669513226, 't_m8a': 0.00017776658665388822, 'speedup': 1.6392213080092832}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002916044602170587, 't_m8a': 0.0001931571215391159, 'speedup': 1.509674910733262}, {'n_out_features': 24576, 'batch_size': 8, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029133071936666966, 't_m8a': 0.00020936736650764941, 'speedup': 1.3914810327235292}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002933470001444221, 't_m8a': 0.00019123796559870242, 'speedup': 1.5339370465798998}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002935130279511213, 't_m8a': 0.0002766397502273321, 'speedup': 1.0609936847829113}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002931325118988752, 't_m8a': 0.00031488268170505764, 'speedup': 0.9309261160746998}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002936344658955932, 't_m8a': 0.0003487855941057205, 'speedup': 0.8418767026444032}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00029289568588137626, 't_m8a': 0.0003773126341402531, 'speedup': 0.7762678992946266}, {'n_out_features': 24576, 'batch_size': 16, 'n_in_features': 4096, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0002936562318354845, 't_m8a': 0.0004079930502921343, 'speedup': 0.7197579263304082}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016379635687917472, 't_m8a': 4.527123831212521e-05, 'speedup': 3.6181108135339954}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016310085449367762, 't_m8a': 5.0921345129609105e-05, 'speedup': 3.202995798295198}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016323635820299386, 't_m8a': 5.492874514311552e-05, 'speedup': 2.9717838588463192}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0001635658936575055, 't_m8a': 5.9173221699893474e-05, 'speedup': 2.7641877349023902}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016362386848777532, 't_m8a': 6.186302006244659e-05, 'speedup': 2.6449382575019444}, {'n_out_features': 4096, 'batch_size': 1, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016380794532597066, 't_m8a': 6.568933371454477e-05, 'speedup': 2.4936764625715897}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016326931864023208, 't_m8a': 5.314751621335745e-05, 'speedup': 3.072002800371656}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016358608473092317, 't_m8a': 5.83562757819891e-05, 'speedup': 2.803230373063181}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016343245096504688, 't_m8a': 6.200874224305153e-05, 'speedup': 2.6356356386725537}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016367532964795827, 't_m8a': 6.557120010256767e-05, 'speedup': 2.496146622174588}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016367919370532035, 't_m8a': 6.909203436225652e-05, 'speedup': 2.369002378004008}, {'n_out_features': 4096, 'batch_size': 2, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016357637383043766, 't_m8a': 7.309404667466878e-05, 'speedup': 2.237889147915053}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016450552362948657, 't_m8a': 7.019080687314271e-05, 'speedup': 2.3436904483346486}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016396877449005842, 't_m8a': 7.934937905520201e-05, 'speedup': 2.0664153449264946}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016392929200083017, 't_m8a': 8.301325049251318e-05, 'speedup': 1.9747364550628537}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016441726032644511, 't_m8a': 8.665807824581862e-05, 'speedup': 1.8973102526004664}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0001645606905221939, 't_m8a': 8.947803918272256e-05, 'speedup': 1.8391182017986056}, {'n_out_features': 4096, 'batch_size': 4, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016460902430117131, 't_m8a': 9.33765135705471e-05, 'speedup': 1.762852541895422}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016421447414904832, 't_m8a': 5.989027116447687e-05, 'speedup': 2.7419223682936}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.000163392448797822, 't_m8a': 7.987617235630751e-05, 'speedup': 2.045571839233475}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016454051341861485, 't_m8a': 9.071379061788321e-05, 'speedup': 1.8138423308944773}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016542228776961565, 't_m8a': 0.00010262239072471857, 'speedup': 1.6119512184563687}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016518508549779654, 't_m8a': 0.00011219892837107182, 'speedup': 1.4722519002274723}, {'n_out_features': 4096, 'batch_size': 8, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016430675704032184, 't_m8a': 0.00012258963100612164, 'speedup': 1.3402989770979652}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.0, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016474301181733608, 't_m8a': 0.00010615761391818523, 'speedup': 1.5518718416589705}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.01, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016625584941357374, 't_m8a': 0.00014020889066159725, 'speedup': 1.1857725186261008}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.02, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016573400981724262, 't_m8a': 0.00016093713231384754, 'speedup': 1.0298059088939187}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.03, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.0001663613384589553, 't_m8a': 0.00017975223064422609, 'speedup': 0.9255036105127693}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.04, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016506039910018443, 't_m8a': 0.00019527739752084016, 'speedup': 0.8452611576952681}, {'n_out_features': 4096, 'batch_size': 16, 'n_in_features': 12288, 'group_size': 128, 'density': 0.05, 'is_bf16': True, 'bit_width': 4, 'rep': 0, 't_torch': 0.00016512101795524358, 't_m8a': 0.0002136525036767125, 'speedup': 0.7728485045281559}]
    layer_names = ['Q·K·V', 'O', 'Gate·Up', 'Down']
    layer_sizes = [(6144, 4096), (4096, 4096), (24576, 4096), (4096, 12288)]
    inlier_bitwidths = np.asarray([2, 3, 4])
    outlier_rates = np.asarray([0., .01, .02, .03, .04, .05])
    batch_sizes = np.asarray([1, 2, 4, 8, 16])
    data = np.zeros([len(layer_sizes), len(inlier_bitwidths), len(outlier_rates), len(batch_sizes)])
    for record in records:
        data[
            layer_sizes.index((record['n_out_features'], record['n_in_features'])),
            inlier_bitwidths.tolist().index(record['bit_width']),
            outlier_rates.tolist().index(record['density']),
            batch_sizes.tolist().index(record['batch_size']),
        ] = record['speedup']

    plot_colors = '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#000000'

    for layer_i, layer_name in enumerate(layer_names):
        for bitwidth_i, bitwidth in enumerate(inlier_bitwidths):
            for outlier_i, outlier in enumerate(outlier_rates):
                axs[layer_i, bitwidth_i].plot(np.log2(batch_sizes), data[layer_i, bitwidth_i, outlier_i, :], label=f'{round(outlier * 100.)}%', color=plot_colors[outlier_i], marker='o', zorder=4.)

    for ax in axs.flatten():
        ax.axhline(y=1., color='black', linestyle='--', linewidth=1, zorder=2.)
        # ax.set_xlim(np.min(np.log2(batch_sizes)), np.max(np.log2(batch_sizes)))
        ax.set_xticks(np.log2(batch_sizes))
        ax.set_xticklabels([f'{round(2 ** tick)}' for tick in ax.get_xticks()], fontproperties=font)
        ax.set_ylim(0., 5.5)
        ax.set_yticks(np.arange(0, 5.1, 1.))
        ax.set_yticklabels([f'{round(tick)}' for tick in ax.get_yticks()], fontproperties=font)
        ax.grid(True)
        ax.tick_params(axis='both', which='both', length=0)
        ax.set_facecolor((1., 1., 1., 1.))

    for layer_name, ax in zip(layer_names, axs[:, 0]):
        ax.set_ylabel(f'Speedup ({layer_name} Layer)', fontproperties=font)

    for bitwidth, ax in zip(inlier_bitwidths, axs[-1, :]):
        ax.set_xlabel(f'Batch Size ({bitwidth}-Bit Inlier)', fontproperties=font)

    handles, labels = axs[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, title='Outlier Rate', ncol=len(labels), framealpha=1., loc='lower center', bbox_to_anchor=(.5, -.05), title_fontproperties=font_legend, prop=font_legend)

    # fig.suptitle('Qwen3-8B Layerwise Inference Speedup (SSQR Kernel vs PyTorch BF16 Kernel) on A6000', fontproperties=font)
    fig.set_facecolor((1., 1., 1., 0.))
    fig.tight_layout()
    fig.savefig(f'5_speedup.pdf', bbox_inches='tight', pad_inches=.01, transparent=False, metadata=strip_pdf_meta)
    fig.savefig(f'5_speedup.svg', bbox_inches='tight', pad_inches=.01, transparent=False, metadata=strip_svg_meta)
    fig.show()


if __name__ == '__main__':
    plot_layer()
