group: GAIL
verbose: false
env_type : 'driving'
env_id: 'exiD'
env: "tools.environments.create('driving', 'exiD', normalize_states=False, normalize_actions=False, time_limit=1000)"
reward_gamma: 0.99
discount_factor: 0.99
learning_rate: 0.0005
expert_episodes: 100
window: 25
normalize_func: "max"
cost_condition: ""
cost_comparison: ""
accrual_comparison: "lambda ea, a: __import__('tools').utils.mse(ea, a)"
input_format: "lambda s, a: [s[0]/10., a[0]/5.]"
vector_input_format: "lambda S, A: torch.cat((S/10., A/5.), dim=-1)"
state_reduction: "lambda s: s"
vector_state_reduction: "lambda S: S"
action_reduction: "lambda a: a"
vector_action_reduction: "lambda A: A"
i: 2
beta: 0.1
plot_interval: 10

PPO:
  policy_name: 'MlpPolicy'
  learning_rate: 0.0005
  n_steps: 1000
  n_epochs: 80
  reward_gamma: 0.99
  reward_gae_lambda: 0.97
  clip_range: 0.2
  ent_coef: 0
  reward_vf_coef: 0.5
  max_grad_norm: 0.5
  use_sde: false
  sde_sample_freq: -1
  target_kl: null
  shared_layers: null
  policy_layers: [64, 64]
  reward_vf_layers: [64, 64]
  batch_size: 64
#  eval_every: 2048
  timesteps: 250000

DISC:
  disc_batch_size: null
  disc_eps: 1e-05
  disc_layers: [64, 64]
  disc_learning_rate: 0.0005
  disc_normalize": false
  disc_plot_every: 1
  clip_obs: 20
  use_cost_net: true
  learn_cost: true