CVD=(${CUDA_VISIBLE_DEVICES//,/ })
if [ -z "$CUDA_VISIBLE_DEVICES" ]; then
    CVD=($(seq 0 $(($(nvidia-smi -L | wc -l) - 1))))
fi
NUM_GPUS=${#CVD[@]}

if [ "$NUM_GPUS" = "1" ]; then
    cmd="python ./train_peft.py"
else
    cmd="accelerate launch --config_file=default_config.yaml --num_processes $NUM_GPUS --main_process_port 3142 ./train_peft.py"
fi

cmd="$cmd \
    --add_lora \
    --shuffle_dataset \
    --lora_r 8 \
    --lora_alpha 16 \
    --num_train_epochs 10 \
    --per_device_train_batch_size 64 \
    --logging_strategy=epoch \
    --logging_steps 1 \
    --use_wandb \
    --torch_empty_cache_steps 5"

# Tofu
old_dataset_name="Glow-AI/WaterDrum-TOFU"
new_dataset_name="dataset/Glow-AI/WaterDrum-TOFU"
prompt_field="question"
tofu_cmd="$cmd \
    --model_name=meta-llama/Llama-2-7b-chat-hf \
    --dataset_prompt_field=$prompt_field \
    --forget_dataset_split=forget \
    --forget_dataset_prompt_field=$prompt_field \
    --duplicate_dataset_prompt_field=$prompt_field \
    --wandb_project=train_WaterDrum_TOFU \
    --eval_on_subsets"
for subset in "" "unwatermarked_"
do
    if [ "$subset" = "unwatermarked_" ]; then
        subset_cmd="$tofu_cmd \
            --dataset_name=$old_dataset_name \
            --forget_dataset_name=$old_dataset_name \
            --duplicate_dataset_name=$old_dataset_name "
    else
        subset_cmd="$tofu_cmd \
            --dataset_name=$new_dataset_name \
            --forget_dataset_name=$new_dataset_name \
            --duplicate_dataset_name=$new_dataset_name "
    fi
    response_field="answer"
    for seed in 41
    do
        for forget_pct in "10" #"05" "01"
        do
            # Train full, retrained model
            for split in full retain
            do
                curr_cmd="$subset_cmd \
                    --dataset_subset=${subset}forget_${forget_pct} \
                    --dataset_split=retain \
                    --forget_dataset_subset=${subset}forget_${forget_pct} \
                    --duplicate_dataset_subset=${subset}forget_${forget_pct} \
                    --shuffle_seed $seed \
                    --seed $seed \
                    --dataset_response_field=$response_field \
                    --forget_dataset_response_field=$response_field \
                    --duplicate_dataset_response_field=$response_field"
                this_cmd="$curr_cmd \
                    --wandb_run_name=${seed}_${subset}${forget_pct}_${split} \
                    --output_dir=trained_models/seed_${seed}/${subset}forget_${forget_pct}/${split} \
                    --final_model_output_dir=trained_models/seed_${seed}/${subset}forget_${forget_pct}/${split}"
                if [ "$split" = "full" ]; then
                    this_cmd="$this_cmd \
                        --add_forget_to_retain"
                fi

                echo "$subset $seed $forget_pct $split"
                eval $this_cmd

                for dup in semantic exact
                do
                    this_cmd="$curr_cmd \
                        --duplicate_dataset_split=${dup}_duplicate \
                        --wandb_run_name=${seed}_${subset}${forget_pct}_${split}_${dup}dup \
                        --output_dir=trained_models/seed_${seed}/${subset}forget_${forget_pct}/${split}_${dup}dup \
                        --final_model_output_dir=trained_models/seed_${seed}/${subset}forget_${forget_pct}/${split}_${dup}dup \
                        --add_duplicate_to_retain"
                    if [ "$split" = "full" ]; then
                        this_cmd="$this_cmd \
                            --add_forget_to_train"
                    fi
                    echo "$subset $seed $forget_pct $split $dup"
                    eval $this_cmd
                done
            done
        done
    done
done