
# Set a unique ID for this experiment (can be passed as an argument, or use $$ for PID/randomness)
if [ -z "$PBRR_UNIQUE_ID" ]; then
	export PBRR_UNIQUE_ID=$$
fi

SEED=${SEED:-1}   # default to 1 if not set

python -m learn_reward.pbrr with env_to_run=tomato level=7 reward_fun=proxy exp_algo=ORPO checkpoint_to_load_current_policy=None seed=$SEED num_rollout_workers=10 num_gpus=1 num_training_iters=100


