forget_loss: dpo # choices = [grad_ascent, grad_diff, KL, dpo]
attribution: none # choices = [g_prod,none]
unification: none # choices = [exp, none]
tau: none # choices = [float, none]



ckpt_result: checkpoints/ft_epoch5_lr1e-05_phi_full_wd0.01/checkpoint-1000/eval_results/ds_size${ds_size}/eval_log_aggregated.json
# model_path:  checkpoints/ft_epoch5_lr1e-05_phi_full_wd0.01/checkpoint-5000/${forget_loss}_forget01_lr1e-05_wd_0.01_ep1_bs1_DA_${attribution}_${unification}_tau${tau}/checkpoint-40
model_path: checkpoints/ft_epoch5_lr1e-05_phi_full_wd0.01/checkpoint-5000/forget05_dpo_taunone_lr1e-05_wd_0.01_ep5_bs1_DA_none_none/checkpoint-200

model_family: phi
split: forget05_perturbed
save_dir: ${model_path}/eval_results/ds_size${ds_size}
retain_result: ${model_path}/eval_results/ds_size${ds_size}/eval_log_aggregated.json



#dataset path for each of the 4 datasets to be evaluated
data_path: [locuslab/TOFU, locuslab/TOFU, locuslab/TOFU, locuslab/TOFU]

split_list:
  - world_facts_perturbed
  - retain_perturbed
  - real_authors_perturbed
  - ${split}


question_key: [question, question, question, question]
answer_key: [answer, answer, answer, answer]

base_answer_key: [paraphrased_answer, answer, answer, paraphrased_answer]
perturbed_answer_key: [perturbed_answer, perturbed_answer, perturbed_answer, perturbed_answer]

eval_task: [eval_real_world_wo_options, eval_retain, eval_real_author_wo_options, eval_forget]

# eval_task: [eval_log, eval_real_author_wo_options, eval_real_world_wo_options, eval_log_forget]

generation:
  max_length: 200
  max_new_tokens: null

save_generated_text: true

ds_size: 300

overwrite: false
use_pretrained: false

batch_size: 30
reinitialize_weights: false

use_flash_attention_2: true

