model_names: 
#- Qwen/Qwen2.5-1.5B-Instruct
#- deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
#- microsoft/Phi-3-mini-4k-instruct 
#- microsoft/Phi-3.5-mini-instruct 
#- meta-llama/Llama-3.2-1B-Instruct
#- meta-llama/Llama-3.2-3B-Instruct
# - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
# - allenai/OLMo-7B-Instruct-hf
# - meta-llama/Meta-Llama-3.1-8B-Instruct

adapters_path:

sft_dataset: data/final_dpo_data.json
dpo_dataset: data/final_dpo_data.json
model_dir: ../exp_outputs_3ep
output_dir: ./dpo_outputs_3ep
use_bf16: true
batch_size: 2
lr: 0.00005
epochs: 3
beta: 0.1
