env: k_coloring2

env_args:
  difficulty: "7"
  obs_instead_of_state: False
  obs_last_action: False
  obs_pathing_grid: False
  obs_terrain_height: False
  reward_death_value: 10
  reward_defeat: 0
  reward_negative_scale: 0.5
  reward_only_positive: True
  reward_scale: True
  reward_scale_rate: 20
  reward_sparse: False
  reward_win: 300
  replay_dir: ""
  replay_prefix: ""
  state_last_action: True
  debug: False
  n_agents: 10
  grid_size: 20
  sight_range: 3
  random_start: True
  n_actions: 3
  episode_limit: 50
  full_obs: False
  use_one_hot: False
  is_print: False
  explicit_credit_assignment: False
  prob_change: 0.   # 1. / (3. * n_agents * n_actions), e.g. 0.02

learner_log_interval: 10000
log_interval: 10000
runner_log_interval: 10000
t_max: 10050000
test_interval: 10000
test_nepisode: 300
test_greedy: True
print_rew: False
print_steps: 1000
