generation_engine: vllm 
model_name_or_path: null
download_dir: null

save_dir: null
override: false 


tasks:
 - "entity_bios"
 - "pop_bio_param"
 - "pop_bio_cf"
 - "askhistorians"
 - "eli5"
 - "expertqa"
 - "facts"
 - "xsum"

# vllm serving
max_model_len: 4096
tensor_parallel_size: 1
gpu_memory_utilization: 0.8
max_num_batched_tokens: 8192
disable_custom_all_reduce: true 
swap_space: 4

sampling:
  repetition_penalty: 1.2
  max_tokens: 2048
  temperature: 0.0
  top_p: 1
  top_k: -1
  min_p: 0
  presence_penalty: 0.0
  stop_tokens: 
    - "[/INST]"
    - "<|endoftext|>"
  seed: 42


thinking_sampling:
  temperature: 0.6
  max_tokens: 2048
  top_p: 0.85
  top_k: 20
  min_p: 0
  presence_penalty: 1.5
  repetition_penalty: 1.2
  stop_tokens: 
    - "<|im_end|>"
    - "\n\n\n"
  seed: 42

max_instances: null 

add_fewshot: false 
is_thinking: false 

