# General
# -------
# Name of the environment.
env-name: "AntPos-v1"

# Additional parameters for the environment (eg. bound for task sampling).
env-kwargs:
  low: -3.0
  high: 3.0
  normalization_scale: 10.0
  max_episode_steps: 100

# Discount factor gamma.
gamma: 0.99

# Discount factor lambda used in "Generalized Advantage Estimation" (GAE).
gae-lambda: 1.0

# If "true", then the first order approximation of MAML is applied.
first-order: false

# Policy network
# --------------
# Number of hidden units in each layer.
hidden-sizes: [64, 64]

# Non-linear activation function to apply after each hidden layer.
nonlinearity: "tanh"

# Task-specific
# -------------
# Number of trajectories to sample for each task.
fast-batch-size: 20

# Number of gradient steps in the inner loop / fast adaptation.
num-steps: 1

# Step size for each gradient step in the inner loop / fast adaptation.
fast-lr: 0.1

# Optimization
# ------------
# Number of outer-loop updates (ie. number of batches of tasks).
num-batches: 500

# Number of tasks in each batch of tasks.
meta-batch-size: 40

# TRPO-specific
# -------------
# Size of the trust-region.
max-kl: 1.0e-2

# Number of iterations of Conjugate Gradient.
cg-iters: 10

# Value of the damping in Conjugate Gradient.
cg-damping: 1.0e-5

# Maximum number of steps in the line search.
ls-max-steps: 15

# Ratio to use for backtracking during the line search.
ls-backtrack-ratio: 0.8