group: GAIL
verbose: false
env_type : 'gym'
env_id: 'AntWall-v0'
env: "tools.environments.create('gym', 'AntWall-v0', normalize_states=False, time_limit=500)"
reward_gamma: 0.99
discount_factor: 0.99
learning_rate: 0.0005
expert_episodes: 100
window: 25
normalize_func: "max"
cost_condition: "lambda s, a: s[0] <= -1."
cost_comparison: "lambda mc, c: __import__('tools').utils.mse(mc, c)"
accrual_comparison: "lambda ea, a: __import__('tools').utils.wasserstein_distance2d(ea.reshape(1, -1), a.reshape(1, -1))"
input_format: "lambda s, a: [*s]"
vector_input_format: "lambda S, A: S"
state_reduction: "lambda s: s[:1]"
vector_state_reduction: "lambda S: S[:, :, :1]"
action_reduction: "lambda a: a"
vector_action_reduction: "lambda A: A"
i: 1
beta: 0.1
plot_interval: 10

PPO:
  policy_name: 'MlpPolicy'
  learning_rate: 0.0005
  n_steps: 4000
  n_epochs: 80
  reward_gamma: 0.99
  reward_gae_lambda: 0.97
  clip_range: 0.2
  ent_coef: 0
  reward_vf_coef: 0.5
  max_grad_norm: 0.5
  use_sde: false
  sde_sample_freq: -1
  target_kl: null
  shared_layers: null
  policy_layers: [64, 64]
  reward_vf_layers: [64, 64]
  batch_size: 64
#  eval_every: 2048
  timesteps: 500000

DISC:
  disc_batch_size: null
  disc_eps: 1e-05
  disc_layers: [64, 64]
  disc_learning_rate: 0.0005
  disc_normalize": false
  disc_plot_every: 1
  clip_obs: 20
  use_cost_net: true
  learn_cost: true