model_names: 
# - Qwen/Qwen2.5-1.5B-Instruct
# - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
# - microsoft/Phi-3-mini-4k-instruct
# - microsoft/Phi-3.5-MoE-instruct
# - microsoft/Phi-3.5-mini-instruct 
# - meta-llama/Llama-3.2-1B-Instruct
# - meta-llama/Llama-3.2-3B-Instruct
# - meta-llama/Meta-Llama-3.1-8B-Instruct
# - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
# - allenai/OLMo-7B-Instruct-hf
# - microsoft/Phi-3-medium-128k-instruct

sft_dataset: data/final_dpo_data.json
dpo_dataset: data/final_dpo_data.json
output_dir: ../exp_outputs_3ep
use_bf16: true
batch_size: 4
lr: 0.00005
epochs: 3
beta: 0.1
