for seed in 1 2 3 4 5;
do
    python train_ppo.py --num_train_steps=25_000_000  --intrinsic_reward=re3 --env_id=procgen_AllMazeHard --beta=0.1 --two_head  --rwd_norm_type=minmax  --update_proportion=1.0 --seed=${seed};
done

for seed in 1 2 3 4 5;
do
    python train_ppo.py --num_train_steps=25_000_000  --intrinsic_reward=re3 --env_id=procgen_AllMazeHard --beta=0.1  --rwd_norm_type=minmax  --update_proportion=1.0 --seed=${seed};
done