master_port=45159

# export CUDA_VISIBLE_DEVICES=1,0
model_name=llama-2
model_path="mistralai/Mistral-7B-Instruct-v0.2"


#! Ours different data usage
data_modes=(
    # "forget_more_retain_perturb"
    # "forget_more_retain"
    # "forget_retain"
    # "forget_more_retain_perturb"
    # "forget_retain_perturb"
    # "forget_more_retain"
    # "forget_more"
    "forget_retain"
    "forget"
)

# COMMON="lightning.trainer.devices=2 data.batch_size=8 gradient_accumulation_steps=2 model_train.num_layer=8 model_train.Lora.r=32 model_train.weight_decay=0.01 OUTPUTMODELDIR=trained_models/hf-ours-Lora-wmdp BASELOGDIR=hf-outputs_lightning_tune-wmdp"

COMMON="lightning.trainer.devices=2 data.batch_size=8 gradient_accumulation_steps=2 model_train.num_layer=8 model_train.Lora.r=32 model_train.weight_decay=0.01 OUTPUTMODELDIR=trained_models2/hf-ours-Lora-copyright BASELOGDIR=hf-outputs_lightning_tune-copyright"

lrs=(
    # 5e-5
    5e-4
    1e-3
    # 1e-4
    # 3e-3
)

# export CUDA_VISIBLE_DEVICES=3

for lr in "${lrs[@]}"; do
    for i in "${!data_modes[@]}"; do
        data_mode=${data_modes[$i]}

        # CUDA_VISIBLE_DEVICES=1,0 torchrun --nproc_per_node=2 --master_port=$((master_port + i*100)) \
        CUDA_VISIBLE_DEVICES=3,0 torchrun --nproc_per_node=2 --master_port=$((master_port + i*100)) \
        	scripts/hf_forget_train.py \
            project="harry-ours-hf" \
            data=harry \
            lr=$lr \
            model_train=remember_uniform \
            model_train.model_name=$model_name \
            model_train.model_path=$model_path \
            data_mode=$data_mode \
            $COMMON
        sleep 2
    done
done