CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=grad_diff weight_decay=0.01 beta=1.0 npo_coeff=1.0
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=grad_ascent weight_decay=0.01 beta=1.0 npo_coeff=1.0
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=npo_grad_diff weight_decay=0.01 beta=0.1 npo_coeff=0.5
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=dpo_grad_diff weight_decay=0.01 beta=0.1 npo_coeff=0.5
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=simnpo_grad_diff weight_decay=0.01 beta=2.0 npo_coeff=0.2 gamma=0.0
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=wgd weight_decay=0.01 beta=1.0 npo_coeff=1.0 gamma=0.0
CUDA_VISIBLE_DEVICES=1 torchrun --nproc_per_node=1 --master_port=27393 forget.py --config-name=forget_base.yaml split=forget05 model_family=llama3.2-1b batch_size=4 gradient_accumulation_steps=8 forget_loss=satimp_gd weight_decay=0.01 beta=5.0 npo_coeff=1.0 gamma=0.1