type: "dpo-train-claim-rewriter"
input_dir: "data/task_outputs/dpo_claim_rewriter_dataset/"
learning_rate: 0.000001
output_dir: "data/task_outputs/dpo_claim_rewriter_model_from_sft/"
# model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
model_name: "data/task_outputs/merged_sft_model/"
dependencies:
  dataset: "configs/prepare_dpo_claim_rewriter_dataset.yaml"
  sft_model: "configs/merge_sft_claim_rewriter.yaml"