env: mpe_formation

env_args:
  n_agents: 4
  grid_size: 5
  episode_limit: 50
  gamma: 0.99
  alpha_gap: 0.3
  dist_gap: 0.1

learner_log_interval: 20000
log_interval: 20000
runner_log_interval: 20000
t_max: 10050000
test_interval: 20000
test_nepisode: 24
test_greedy: True
