# Configuration for our method
_target_: src.algorithms.AutoQD

defaults:
  - qd: cma_mae
  - _self_

agent:
  _target_: src.agents.ToeplitzAgent # src.agents.MLPAgent
  layers: [128, 128] # List of ints, possibly empty
  activation: "tanh" # "relu" or "tanh"
  state_dim: ${env.state_dim}
  action_dim: ${env.action_dim}

embedding:
  _target_: src.embeddings.RFF
  dim: 100
  state_dim: ${env.state_dim}
  action_dim: ${env.action_dim}
  kernel_width: null
  normalize: true
  gamma: 0.999

measures_dim: 4
measure_margin: 1.2
update_interval: [20, 50, 100, 200, 300] # List or int (for fixed periodic updates)
total_iterations: 500
refine_frac: 1.0 # (top) fraction of solutions whose embeddings will be considered to refine the measure map
map_type: "cwpca"
# Options:
  # greedy: greedily select anchors that maximize distance to previous anchors
  # subspace: greedily select anchors that maximize distance to the affine
  #   subspace spanned by previous anchors
  # pca: Use pca to find the new coordinate system
  # wpca: Use weighted pca (by objectives) to find the new coordinate system
  # cwpca: Use calibrated weighted pca to find new coordinate system. Calibration tries to uniformly spread the current solutions in [-1, 1]^k
n_evals: 5 # Number of trajectories used for policy evaluation (estimating return and embeddings)
