meta_data:
  script_path: run_scripts/sac_exp_script.py
  exp_name: test_sac_ant_fixed
  description: Train an agent using Soft-Actor-Critic
  num_workers: 2
  num_gpu_per_worker: 1
  num_cpu_per_worker: 8
  mem_per_worker: 16gb
  partitions: p100,max12hours
  node_exclusions: gpu048,gpu024,gpu025,gpu012,gpu027
# -----------------------------------------------------------------------------
variables:
  seed: [723894, 23789]

# -----------------------------------------------------------------------------
constants:
  net_size: 256
  num_hidden_layers: 2

  rl_alg_params:
    num_epochs: 301
    num_steps_per_epoch: 10000
    num_steps_between_train_calls: 1000
    num_train_steps_per_train_call: 1000
    num_steps_per_eval: 10000
    max_path_length: 1000
    min_steps_before_training: 0

    eval_deterministic: true

    batch_size: 256
    replay_buffer_size: 1000000
    no_terminal: false
    wrap_absorbing: false

    save_best: false
    freq_saving: 10
    save_replay_buffer: false
    save_environment: false
    save_algorithm: false

  sac_params:
    reward_scale: 5.0
    discount: 0.99
    soft_target_tau: 0.005
    policy_lr: 0.0003
    qf_lr: 0.0003
    vf_lr: 0.0003
    policy_mean_reg_weight: 0.001
    policy_std_reg_weight: 0.001

  env_specs:
    env_name: 'ant'
    env_kwargs: {}
    eval_env_seed: 78236
    training_env_seed: 24495
