source /u/audreyh/workspace/setup_conda.sh
source activate rlhf
cd code 

rewards=(
   "oasst-rm" 
    "rm-gemma-2b" 
    "grm-llama-3b"
   "eurus-rm-7b" 
    "rm-mistral-7b" 
    "beaver-7b" 
    "armo-rm" 
    "fsfairx-8b" 
    )

task="gsm8k"
policy=$1
repeats=1

for reward in "${rewards[@]}"; do
    python plot.py policy=$policy reward=$reward
done 

# python plot.py

conda deactivate