env: sc2

env_args:
  continuing_episode: False
  difficulty: "7"
  game_version: null
  map_name: "ANY"
  move_amount: 2
  obs_all_health: True
  obs_instead_of_state: False
  obs_last_action: False
  obs_own_health: True
  obs_pathing_grid: False
  obs_terrain_height: False
  obs_timestep_number: False
  reward_death_value: 10
  reward_defeat: 0
  reward_negative_scale: 0.5
  reward_only_positive: True
  reward_scale: True
  reward_scale_rate: 20
  reward_sparse: False
  reward_win: 200
  replay_dir: ""
  replay_prefix: ""
  state_last_action: True
  state_timestep_number: False
  step_mul: 8
  seed: null
  heuristic_ai: False
  heuristic_rest: False
  debug: False

test_greedy: True
test_nepisode: 32
# test_interval: 1000
# log_interval: 1000
# runner_log_interval: 1000
# learner_log_interval: 1000
# save_model_interval: 100000
# t_max: 40000 # 40000 training gradient steps

# offline dataset, used only for single-task offline
offline_data_folder: "dataset"
offline_bottom_data_path: ""
offline_data_quality: "expert"
offline_max_buffer_size: 4000
offline_data_shuffle: False
offline_data_type: "h5"

# --- RL settings ---

tune_all: False
standardise_returns: False
standardise_rewards: False
reward_scale: 10

data_device: "cpu"
offline_batch_size: 16

# --- Alg settings for COMAD ---
n_reuse_heads: 8
delta_m_thres: 0.3
ebm_noise_scale: 0.1
alpha_temp: 1

cont_train_steps: 20000
stage1_steps: 10000
t_max: 20000

test_interval: 250
log_interval: 250
runner_log_interval: 250
learner_log_interval: 250