network_name: "llama1b"
layer_name: "qo_proj"
in_f: 2048
out_f: 2048
blast:
  b: 16
  rank: 256
  compile: True
monarch:
  b: 16
  rank: 256
low_rank:
  rank: 256

triton_blast_verify:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp32
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp32
    - triton_funcs.blast_kernels.triton_blast_bmm_fp32
triton_blast_benchmark:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp16
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp16
    - triton_funcs.blast_kernels.triton_blast_bmm_fp16
  profile:
    - True
    - True
    - True