defaults:
  - override /model_cfg@_global_: qwen7bi
  - override /data_cfg@_global_: conductor_countdown
  - override /trainer_cfg@_global_: conductor_grpo
  - _self_

### IMPORTANT ###
### if user_content_format is "no_overall_task" then router_question_format_style in conductor_countdown.yaml is v0_2
### if user_content_format is "include_overall_task" then router_question_format_style in conductor_countdown.yaml is v0
### if router_question_format is v0_random_names(2) then model_id_format is random_names
overall_task: countdown
router_question_format_style: v0
user_content_format: include_overall_task
model_id_format: numeric

vllm_gpu_memory_utilization: 0.9
vllm_tensor_parallel_size: 1
use_vllm: true
vllm_mode: colocate

logging_prob: 1.0  # Log everything for debugging

# saving: 
save_strategy: steps
save_steps: 20
push_to_hub: false
tags:

### b200 service info ###
b200_ports:
  meta-llama/Llama-3.1-8B-Instruct: 8321
  google/gemma-3-27b-it: 8322
  Qwen/Qwen2.5-32B-Instruct: 8323
  deepseek-ai/DeepSeek-R1-Distill-Qwen-32B: 8324

b200_models:
  - "meta-llama/Llama-3.1-8B-Instruct"
  - "google/gemma-3-27b-it"
  - "Qwen/Qwen2.5-32B-Instruct"
  - "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"

b200_server: "poc-b200-grp01-0zmx"
# b200_server: "poc-b200-grp01-x2sr"
### b200 service info ###

### local vllm service info ###
open_ports:
  google/gemma-3-27b-it: 8326
  meta-llama/Llama-3.1-8B-Instruct: 8327
  Qwen/Qwen2.5-32B-Instruct: 8324
  deepseek-ai/DeepSeek-R1-Distill-Qwen-32B: 8325

local_servers:
  google/gemma-3-27b-it: "localhost"
  meta-llama/Llama-3.1-8B-Instruct: "localhost"
  Qwen/Qwen2.5-32B-Instruct: "localhost"
  deepseek-ai/DeepSeek-R1-Distill-Qwen-32B: "localhost"
### local vllm service info ###

random_names:
  - Kangaroo
  - Deer
  - Giraffe
  - Leopard

# logging:
logging_strategy: steps
logging_steps: 1
report_to: wandb
wandb_project: conductor_experiments
wandb_group_name: conductor_countdown/qwen7bi
wandb_run_name: conductor_grpo_qwen7bi_binaccess_hidenames_1repeat_16chunk_noalldefault_temp0.2

# dirs:
results_dir: results
exp_name: ${now:%Y.%m.%d}${now:%H%M%S}
output_dir: ${results_dir}/${wandb_group_name}/${wandb_run_name}/${exp_name}

# in case output dir exists, resume_from will be ignored
resume_from:

evaluate_only: false
use_custom_checkpoint: null

seed: 42

hydra:
  run:
    dir: ${output_dir}

