torchrun --nproc_per_node=2 --master_port=$master_port forget.py --config-name=forget.yaml save_job_name=$JOB_ID forget_loss=idk num_epochs=10 split=forget10 retain_set=retain90 gradient_accumulation_steps=16 batch_size=1 npo_coeff=0.1375 beta=0.1 grad_diff_coeff=1 model_family=llama3.1 model_path=$MODEL_PATH LoRA.r=8 lr=1e-4

torchrun --nproc_per_node=8 --master_port=$master_port forget_rwku.py --config-name=forget.yaml save_job_name=$JOB_ID forget_loss=mix_retain_npo_grad_diff num_epochs=10 split=$SPLIT retain_set=$RETAIN_SET gradient_accumulation_steps=4 batch_size=1 npo_coeff=10 beta=0.1 model_family=llama3.1 model_path=$MODEL_PATH grad_diff_coeff=1 trigger=true trigger_file_path=data_then_0.33 lr=1.4e-5 mix_retain_coeff=20
