network_name: "llama1b"
layer_name: "gate_proj"
in_f: 2048
out_f: 8192
blast:
  b: 16
  rank: 512
  compile: True
monarch:
  b: 16
  rank: 512
low_rank:
  rank: 512

triton_monarch_verify: 
  funcs:
    - triton_funcs.monarch_kernels.triton_monarch_right_fp32
    - triton_funcs.monarch_kernels.triton_monarch_right_ideal_fp32
    - triton_funcs.monarch_kernels.triton_monarch_right_left_fp32
triton_monarch_benchmark: 
  funcs:
    - triton_funcs.monarch_kernels.triton_monarch_right_fp16
    - triton_funcs.monarch_kernels.triton_monarch_right_ideal_fp16
    - triton_funcs.monarch_kernels.triton_monarch_right_left_fp16
  profile:
    - True
    - True
    - True

triton_blast_verify:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp32
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp32
    - triton_funcs.blast_kernels.triton_blast_bmm_fp32
triton_blast_benchmark:
  funcs:
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_fp16
    - triton_funcs.blast_kernels.triton_blast_partial_grouped_persistent_fp16
    - triton_funcs.blast_kernels.triton_blast_bmm_fp16
  profile:
    - True
    - True
    - True

