
# llama and mistral model are used for benchmark
llama_model:
  base_model: models/llama_finetuned_model   # Please modify
  device: auto
  instruct_prompt: True
  world_size: 4                              # Please modify
  replace_with_kernel_inject: False
  enable_lora: False
  lora_path: null
  params:
    system_prompt: False
    max_new_tokens: 4096
    do_sample: True
    temperature: 0.6
    top_p: 0.9
    max_length: 4096
    repetition_penalty: 1.0

# Qwen 32b is used for dataset generation
qwen_32b_model:
  base_model: models/Qwen3-32B   # Please modify
  device: auto
  instruct_prompt: True
  world_size: 4                              # Please modify
  replace_with_kernel_inject: False
  enable_lora: False
  lora_path: null

  params:
    system_prompt: False
    max_new_tokens: 2048                    # Please modify
    do_sample: True
    temperature: 0.6
    top_p: 0.9
    max_length: 4096
    repetition_penalty: 1.0