defaults:
  - _self_
  - model_cfg@_global_: qwen1bi
  - data_cfg@_global_: r1_countdown
  - trainer_cfg@_global_: grpo
  - run_cfg@_global_: default

# Default setting should be used only for testing

# saving: 
save_strategy: steps
save_steps: 25
push_to_hub: false
tags:

per_device_train_batch_size: 1
vllm_gpu_memory_utilization: 0.3
vllm_mode: "colocate"

# logging:
logging_strategy: steps
logging_steps: 5
report_to: "wandb"
wandb_project: rl4lm
wandb_group_name: ${data_log_name}/${model_log_name}
wandb_run_name: ${trainer_log_name}

# dirs:
results_dir: results
exp_name: ${now:%Y.%m.%d}${now:%H%M%S}
output_dir: ${results_dir}/${wandb_group_name}/${wandb_run_name}/${exp_name}

# in case output dir exists, resume_from will be ignored
resume_from:
evaluate_only: false
save_final_model: true
call_post_training: null

seed: 42

hydra:
  run:
    dir: ${output_dir}

