{"GEMV": {"(1, 2048, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 2048, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4736, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4736, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 896, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 896, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1280, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1280, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1664, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1664, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1920, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1920, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}}, "GEMV_REVSPLITK": {"(1, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 8, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2}, "(1, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 16, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1}, "(1, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 2048, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4736, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4736, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 896, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 896, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1280, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1280, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1664, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1664, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1920, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1920, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(2, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "A_load_order": 1, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 2, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}}, "GEMV_SPLITK": {"(1, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 1024, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 1024, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 1024, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 1024, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 8, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(1, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 512, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1, 2048, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 2048, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4736, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4736, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 3072, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4480, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6016, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 11008, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6912, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 896, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 1, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 896, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1280, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 1, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1280, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 1, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1664, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1664, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 1, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1920, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 2, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1920, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 1, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 4096, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3200, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4864, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6400, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 14336, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 8, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 7424, 128, 32)": {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 4, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "dot_prod_mode": 0, "num_warps": 4, "num_ctas": 1, "num_stages": 2, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}}, "GEMM_SPLITK": {"(64, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(2, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(64, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(16, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(16, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(64, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(8, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(8, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(8, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(64, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(16, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(2, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(4, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(32, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(2, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(16, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(8, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(8, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(4, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(16, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(4, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(32, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(4, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(4, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(32, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(8, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(64, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(8, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(16, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(64, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(16, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(32, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(64, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(4, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(8, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(16, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 2, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(32, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(64, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 1, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1, 2048, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 2048, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3072, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4736, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4736, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 3072, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 4480, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6016, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 11008, 6912, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3072, 11008, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4480, 11008, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4480, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6016, 11008, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6016, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6912, 11008, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6912, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 896, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 896, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1280, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1280, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1664, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1664, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1920, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 1024, 1920, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 3200, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 4864, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 6400, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 14336, 7424, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 3200, 14336, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 3200, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 4, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4864, 14336, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 4864, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 6400, 14336, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 6400, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 7424, 14336, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "SPLIT_K": 16, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 8, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(1, 4096, 7424, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 2, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}, "(2, 4096, 4096, 128, 32)": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "SPLIT_K": 8, "A_load_order": 0, "meta_evict_policy": "", "atomic_mode": "relaxed", "num_warps": 4, "num_ctas": 1, "num_stages": 1, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, "reg_inc_consumer": 0}}, "GEMM": {"(2048, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(8192, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4096, 6144, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 4096, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 28672, 4096, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4096, 4096, 14336, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2048, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4096, 4608, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 3584, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 37888, 3584, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 3584, 18944, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(8192, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2048, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2048, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(2048, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(2048, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(4096, 5120, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 3072, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 16384, 3072, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 3072, 8192, 64, 8)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(4096, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(256, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(128, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2048, 6144, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2048, 4096, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2048, 28672, 4096, 4096, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2048, 4096, 14336, 14336, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(4096, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(4096, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(128, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(2048, 4608, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2048, 3584, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(2048, 37888, 3584, 3584, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(2048, 3584, 18944, 18944, 1)": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 14336, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 3072, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 512, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 18944, 3584, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 2048, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 256, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 11008, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 2048, 11008, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 2560, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 22016, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 5120, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 3072, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 16384, 3072, 3072, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 3072, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(128, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 2560, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 2048, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 22016, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 2048, 11008, 11008, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 6144, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 28672, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 3072, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 16384, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3072, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4608, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3584, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 37888, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 3584, 18944, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 2560, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 2048, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 22016, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 2048, 11008, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 14336, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 3072, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 512, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 18944, 3584, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 256, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 11008, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 13824, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 13824, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 27648, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 27648, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 7168, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 55296, 5120, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 55296, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 13824, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 13824, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 27648, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 27648, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 7168, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 55296, 5120, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 5120, 55296, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 5120, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(512, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 5120, 13824, 13824, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 13824, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(512, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 5120, 27648, 27648, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 27648, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 7168, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 5120, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 55296, 5120, 5120, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 5120, 55296, 55296, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 4096, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 4096, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 1024, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(128, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 1024, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(256, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 1024, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(128, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 2048, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 2048, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(512, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 8192, 2048, 2048, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 4096, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 4096, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 8192, 4096, 4096, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 8192, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 8192, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(256, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 8192, 8192, 8192, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 14336, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 14336, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(256, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(1024, 8192, 14336, 14336, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 7168, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 8192, 7168, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 2}, "(256, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(1024, 8192, 7168, 7168, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 6400, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(256, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(1024, 6400, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 4}, "(256, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 4}, "(512, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 6400, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(128, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(512, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 12800, 1024, 64, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(128, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(256, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(512, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 1}, "(1024, 12800, 1024, 128, 8)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 8, "A_load_order": 2, "meta_evict_policy": "", "num_warps": 8, "num_ctas": 1, "num_stages": 1}, "(128, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(256, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(512, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}, "(1024, 12800, 1024, 1024, 1)": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "A_load_order": 0, "meta_evict_policy": "", "num_warps": 4, "num_ctas": 1, "num_stages": 2}}}