#!/bin/bash

proxy_models_dir="/data/proxy_models"
runid=5
model_size="small"
gold_model="microsoft/deberta-v3-large"
N=1000
num_steps=200
eval_steps=10
batch_size=20
num_optim_tokens=4
one_prompt="False"
mode="prepend"
lr=0.5
gold_checkpoint_path_1="${proxy_models_dir}/gold_ensemble/deberta_large_gold1.pkl"
gold_checkpoint_path_2="${proxy_models_dir}/gold_ensemble/deberta_large_gold2.pkl"

######## ROBUST ##########
robust_type="pez"
reward_model="microsoft/deberta-v3-${model_size}"
reward_checkpoint_path="${proxy_models_dir}/proxy_robust/deberta-v3_${model_size}_gm_labeled_${robust_type}.pkl"
output_file="../data/final_runs/${robust_type}_on_pez/${model_size}_proxyrobust_${robust_type}_${mode}_${N}N_${num_steps}sattack_${eval_steps}seval_${num_optim_tokens}tokens_fixed.npy"
input_file="/data/private_models/misc/wb_data/worst_generations_llama_7b.json"
# sbatch --gpus=1 --partition=xx --time=10:00:00 --cpus-per-gpu=8 --output="slurm_redteam_wb_${model_size}_${robust_type}robust_type_pez_${mode}_run${runid}-%j.out" pez_attack.sh $reward_model $reward_checkpoint_path $gold_model $gold_checkpoint_path_1 $gold_checkpoint_path_2 $N $num_steps $eval_steps $num_optim_tokens $batch_size $one_prompt $output_file $input_file $lr $mode
sbatch --gpus=1 --time=10:00:00 --cpus-per-gpu=8 --output="slurm_redteam_wb_${model_size}_${robust_type}_on_pez_${mode}_run${runid}_${lr}lr-%j.out" pez_attack.sh $reward_model $reward_checkpoint_path $gold_model $gold_checkpoint_path_1 $gold_checkpoint_path_2 $N $num_steps $eval_steps $num_optim_tokens $batch_size $one_prompt $output_file $input_file $lr $mode


######## SAM ROBUST ##########
# robust_type="sam"
# reward_model="microsoft/deberta-v3-${model_size}"
# reward_checkpoint_path="${proxy_models_dir}/final_proxy/deberta-v3-${model_size}_gm_labeled_5e-06_8_2_epoch2_sam1.pkl"
# output_file="../data/final_runs/${robust_type}_on_pez/${model_size}_proxyrobust_${robust_type}_${mode}_${N}N_${num_steps}sattack_${eval_steps}seval_${num_optim_tokens}tokens.npy"
# input_file="/data/private_models/xx_models/misc/wb_data/worst_generations_llama_7b.json"
# # sbatch --gpus=1 --partition=xx --time=10:00:00 --cpus-per-gpu=8 --output="slurm_redteam_wb_${model_size}_${robust_type}robust_type_pez_${mode}_run${runid}-%j.out" pez_attack.sh $reward_model $reward_checkpoint_path $gold_model $gold_checkpoint_path_1 $gold_checkpoint_path_2 $N $num_steps $eval_steps $num_optim_tokens $batch_size $one_prompt $output_file $input_file $lr $mode
# sbatch --gpus=1 --partition=xx --time=10:00:00 --cpus-per-gpu=8 --output="slurm_redteam_wb_${model_size}_${robust_type}_on_pez_${mode}_run${runid}-%j.out" pez_attack.sh $reward_model $reward_checkpoint_path $gold_model $gold_checkpoint_path_1 $gold_checkpoint_path_2 $N $num_steps $eval_steps $num_optim_tokens $batch_size $one_prompt $output_file $input_file $lr $mode

######## NON-ROBUST ##########
reward_model="microsoft/deberta-v3-${model_size}"
reward_checkpoint_path="${proxy_models_dir}/final_proxy/deberta-v3-${model_size}_gm_labeled_5e-06_8_2_epoch2_sam0.pkl "
output_file="../data/redteam_wb_attack/pez_${runid}/${model_size}_proxynonrobust_${mode}_${N}N_${num_steps}sattack_${eval_steps}seval_${num_optim_tokens}tokens.npy"
input_file="/data/private_models/misc/wb_data/worst_generations_llama_7b.json"
sbatch --gpus=1 --time=10:00:00 --cpus-per-gpu=8 --output="slurm_redteam_wb_${model_size}_pez_${mode}_run${runid}-%j.out" pez_attack.sh $reward_model $reward_checkpoint_path $gold_model $gold_checkpoint_path_1 $gold_checkpoint_path_2 $N $num_steps $eval_steps $num_optim_tokens $batch_size $one_prompt $output_file $input_file $lr $mode

