[base]
package = mujoco
env_name = HalfCheetah-v4 Hopper-v4 Swimmer-v4 Walker2d-v4 Ant-v4 Humanoid-v4 Reacher-v4 InvertedPendulum-v4 InvertedDoublePendulum-v4 Pusher-v4 HumanoidStandup-v4
policy_name = CleanRLPolicy
# rnn_name = Recurrent

# The following is from https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_continuous_action.py
[train]
seed =  1
torch_deterministic = True
cpu_offload = False
device = cuda
total_timesteps = 1_000_000
learning_rate = 3e-4
anneal_lr = True
gamma = 0.99
gae_lambda = 0.95
update_epochs = 10
norm_adv = True
clip_coef = 0.2
clip_vloss = True
vf_coef = 0.5
vf_clip_coef = 0.2
max_grad_norm = 0.5
ent_coef = 0.0
target_kl = None

num_envs = 1
num_workers = 1
env_batch_size = None
zero_copy = False
data_dir = experiments
checkpoint_interval = 200
batch_size = 2048
minibatch_size = 32
bptt_horizon = 1
compile = False
compile_mode = reduce-overhead