env: gather

env_args:
  n_agents: 5
  episode_limit: 8
  map_height: 3
  map_width: 5
  catch_reward: 10
  catch_fail_reward: -5
  other_reward: 5
  target_reward: 0.00
  obs_last_action: False
  state_last_action: True

learner_log_interval: 10000
log_interval: 10000
runner_log_interval: 10000
t_max: 1005000
test_interval: 10000
test_nepisode: 300
test_greedy: True