env: matrixgame3

env_args:
  n_states: 3
  n_actions: 3
  trans: [[[0,0,0],[0,2,1],[0,1,2]],[[0,1,0],[1,2,1],[0,1,0]],[[0,0,2],[0,0,2],[2,2,1]]]
  # rewards: [[[6,-2,-2],[-2,4,4],[-2,4,4]],[[6,0,6],[0,10,0],[6,0,6]],[[3,3,-3],[3,3,-3],[-3,-3,4]]]
  rewards: [[[0.6,0,0],[0,0.4,0.4],[0,0.4,0.4]],[[0.6,0,0.6],[0,1,0],[0.6,0,0.6]],[[0.3,0.3,0],[0.3,0.3,0],[0,0,0.4]]]
  episode_limit: 10

use_rnn: False
test_greedy: True
test_nepisode: 10
test_interval: 500
log_interval: 500
runner_log_interval: 100
learner_log_interval: 100
t_max: 105000

hidden_dim: 16
obs_agent_id: True
obs_last_action: False

random_output: True