env: matrixgame

env_args:
  n_actions: 3
  # rewards: [[8,-12,-12],[-12,0,0],[-12,0,0]]
  rewards: [[2,2,0],[2,2,0],[0,0,3]]
  # rewards: [[0.6,0.6,0],[0.6,0.6,0],[0,0,0.9]]
  # rewards: [[0.5,0.5,1],[0.5,0.5,1],[1,1,0]]
  # rewards: [[1, -12, -12], [-12, 0, 0], [-12, 0, 0]]
  episode_limit: 10

use_rnn: False
test_greedy: True
test_nepisode: 10
test_interval: 500
log_interval: 500
runner_log_interval: 100
learner_log_interval: 100
t_max: 105000

hidden_dim: 4
obs_agent_id: True
obs_last_action: False

random_output: True