env: coordination_game_matching

env_args:
  n_agents: 6
  n_actions: 3
  episode_limit: 1
  reward: 1

learner_log_interval: 100
log_interval: 100
runner_log_interval: 100
t_max: 20000
test_interval: 100
test_nepisode: 32
test_greedy: True

