env: "sc2_v2"

env_args:
  # change_fov_with_move: False  # if True, we split the full field-of-view into 4 90-degree sectors (instead of 12 30-degree sectors), each of which corresponds to `move north, south, east, west'.
  #  %%%%%%%%%%%%%%%%%%%%%% new config compared to v1 %%%%%%%%%%%%%%%%%%%%%%
  capability_config:
    n_units: -1 # NOTE modified during env init
    n_enemies: -1 # NOTE modified during env init
    team_gen:
      dist_type: "weighted_teams"
      unit_types:
        - "stalker"
        - "zealot"
        - "colossus"
      weights:
        - 0.45
        - 0.45
        - 0.1
      exception_unit_types:
        - "colossus"
      observe: True

    start_positions:
      dist_type: "surrounded_and_reflect"
      p: 0.5
      map_x: 32
      map_y: 32

  map_name: "ANY" # NOTE modified during env init
  obs_own_pos: True
  obs_starcraft: True
  #  conic_fov: True
  # Since our target is not to design more efficient exploration algorithms, we keep the field-of-view and attack of the agents a full circle as in SMAC-V1.
  conic_fov: False
  num_fov_actions: 12
  kill_unit_step_mul: 2
  fully_observable: False
  #  %%%%%%%%%%%%%%%%%%%%%% new config compared to v1 %%%%%%%%%%%%%%%%%%%%%%


  continuing_episode: False
  difficulty: "7"
  game_version: null
  move_amount: 2
  obs_all_health: True
  obs_instead_of_state: False
  obs_last_action: False
  obs_own_health: True
  obs_pathing_grid: False
  obs_terrain_height: False
  obs_timestep_number: False
  reward_death_value: 10
  reward_defeat: 0
  reward_negative_scale: 0.5
  reward_only_positive: True
  reward_scale: True
  reward_scale_rate: 20
  reward_sparse: False
  reward_win: 200
  replay_dir: ""
  replay_prefix: ""
  state_last_action: True
  state_timestep_number: False
  step_mul: 8
  seed: null
  heuristic_ai: False
  heuristic_rest: False
  debug: False

test_greedy: True
test_nepisode: 32

# offline collection config
num_episodes_collected: 2000
offline_data_quality: "medium"
offline_data_folder: "dataset"
save_replay_buffer: True
stop_winrate: 0.0
stop_return: 0
max_size: 2000

# offline dataset, used only for single-task offline
offline_bottom_data_path: ""
offline_max_buffer_size: 4000
offline_data_shuffle: False
offline_data_type: "h5"

data_device: "cpu"

# --- RL settings ---
n_reuse_heads: 6
delta_m_thres: 0.9
ebm_noise_scale: 0.1
alpha_temp: 1

gamma: 0.99
offline_batch_size: 8
tune_all: False
standardise_returns: False
standardise_rewards: False
reward_scale: 10

rnn_hidden_dim: 128
entity_embed_dim: 128
pa_hidden_dim: 128
pa_hidden_layer: 4

cont_train_steps: 20000
t_max: 20000

test_interval: 250
log_interval: 250
runner_log_interval: 250
learner_log_interval: 250