#!/bin/bash

seeds=(3831217417 1756129809 4075310593 2904435568 4115729820 2726071262 1865059290 1408779145 3716099507 411100252)
envs=(HalfCheetah-v4 Ant-v4 Hopper-v4 Walker2d-v4)

# Note: we use n_envs=gradient_steps=12 to makes things 10x faster thanks to JIT compilation
# the results are equivalent to n_envs=train_freq=gradient_steps=1 as we have a 1:1 ratio
# between gradient steps and collected data
for env_id in ${envs[*]}; do
  for seed in ${seeds[*]}; do
   OMP_NUM_THREADS=1 python train.py --algo sac --env $env_id --seed $seed \
   --eval-freq 25000 --verbose 0 --n-eval-envs 5 --eval-episodes 20 \
   --log-interval 100 -c hyperparams/sac.py -param n_envs:12 gradient_steps:12 --vec-env subproc -P
  done
done

# Swimmer
for seed in ${seeds[*]}; do
 OMP_NUM_THREADS=1 python train.py --algo sac --env Swimmer-v4 --seed $seed \
 --eval-freq 25000 --verbose 0 --n-eval-envs 5 --eval-episodes 20 \
 --log-interval 100 -c hyperparams/sac.py -param n_envs:12 gradient_steps:12 gamma:0.9999 --vec-env subproc -P
done
