env: room7

env_args:
  n_agents: 2 #8
  reward_win: 10
  size: 2
  obs_last_action: False
  state_last_action: False
  print_rew: False
  is_print: False
  print_steps: 1000

learner_log_interval: 10000
log_interval: 10000
runner_log_interval: 10000
t_max: 40100000 #10050000 #201000 #
test_interval: 10000
test_nepisode: 300
test_greedy: True