# source /u/audreyh/workspace/setup_conda.sh
# source activate rlhf
cd code 

rewards=(
    # "armo-rm" 
    "oasst-rm" 
    # "rm-gemma-2b" 
    # "grm-llama-3b"
    )

task="gsm8k"
policy="gemma-2-2b"
repeats=10

for reward in "${rewards[@]}"; do
    python rlhf.py use_multiprocessing=True task=$task policy=$policy reward=$reward method=piref repeats=$repeats user=ah

    python rlhf.py use_multiprocessing=True task=$task policy=$policy reward=$reward method=bon repeats=$repeats user=ah

    python rlhf.py use_multiprocessing=True task=$task policy=$policy reward=$reward method=rejection repeats=$repeats user=ah

    python plot.py task=$task policy=$policy reward=$reward refresh_data=True user=ah
done 

cd ..
# conda deactivate
