defaults:
  - sac@algo
  - continuous_cartpole@env
  - _self_

hydra:
  job:
    name: continuous-cartpole-sac
  run:
    dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    dir: multirun/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}

parallel: True
batch_T: 128
batch_B: 16
device: "cpu"
algo:
  replay_length: 2560  # 20 iterations
  learning_starts: 10000
  replay_ratio: 64
  target_update_tau: 0.01
pi_model:
  hidden_sizes: [64, 64]
  hidden_nonlinearity: Tanh
q_model:
  hidden_sizes: [64, 64]
  hidden_nonlinearity: Tanh
distribution:
  deterministic_eval_mode: True
eval:
  max_traj_length: 2000
  max_trajectories: 40
  n_eval_envs: 16
runner:
  n_steps: 204800  # 100 iterations
  log_interval_steps: 10240  # log every 5 iterations
  eval_interval_steps: 10240  # evaluate every 5 iterations
