network_name: "llama7b"
layer_name: "qkvo_proj"
in_f: 4096
out_f: 4096
blast:
  b: 16
  rank: 1024
  compile: True
monarch:
  b: 16
  rank: 1024
low_rank:
  rank: 1024

triton_blast_verify:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp32
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp32
    - triton_funcs.blast_kernels.triton_blast_bmm_fp32
triton_blast_benchmark:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp16
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp16
    - triton_funcs.blast_kernels.triton_blast_bmm_fp16
  profile:
    - True
    - True
    - True