# ! make these nums power of 2, or may cause unexpected situation.
# * batch_size_run, inner_loop_episodes, meta_update_times, save_population_episodes, episodes_per_teammate, save_BR_episodes: 2048

# ==== meta defaults, select sub config ====
exp_name: "default" # name of this experinment

# ==== stage 1 config ====
population_alg: "qmix"
population_size: 5
optimize_meta: True # Set 'False' for no diversity among population
inner_loop_episodes: 128 # runs of each "inner loop"
meta_update_times: 8 # gradient update times for each "meta update"
cur_episodes: 32 # num of trajectories for estimating loss

save_population: False
save_population_episodes: 2048 # save population after this many episodes

# ==== stage 2 config ====
use_history: False # weather use history checkpoints to enhance population diversity. If set False, use last checkpoint only
episodes_per_teammate: 64

train_test_split: False # if set false, do test on training set
test_percent: 0.2

save_BR: False
save_BR_episodes: 2048

# ===== stage 3 config =====
points_per_teammate: 64

# --- pymarl options ---
runner: "episode" # Runs 1 env for an episode
mac: "basic" # Basic controller
env: "sc2" # Environment name
env_args: {} # Arguments for the environment
batch_size_run: 1 # Number of environments to run in parallel
test_nepisode: 1 # Number of episodes to test for
test_interval: 2000 # Test after {} timesteps have passed
test_greedy: True # Use greedy evaluation (if False, will set epsilon floor to 0
log_interval: 2000 # Log summary of stats after every {} timesteps
runner_log_interval: 2000 # Log runner stats (not test stats) every {} timesteps
learner_log_interval: 2000 # Log training stats every {} timesteps
t_max: 10000 # Stop running after this many timesteps
use_cuda: True # Use gpu by default unless it isn't available
buffer_cpu_only: True # If true we won't keep all of the replay buffer in vram

# --- Logging options ---
use_tensorboard: True # ! (False) Log results to tensorboard
save_model: False # Save the models to disk
save_model_interval: 2000000 # Save models after this many timesteps
checkpoint_path: "" # Load a checkpoint from this path
evaluate: False # Evaluate model for test_nepisode episodes and quit (no training)
load_step: 0 # Load model trained on this many timesteps (0 if choose max possible)
save_replay: False # Saving the replay of the model loaded from checkpoint_path
local_results_path: "results" # Path for local results
local_saves_path: "saves" # Path for store and load pre-trained models

# --- RL hyperparameters ---
gamma: 0.99
batch_size: 32 # Number of episodes to train on
buffer_size: 32 # Size of the replay buffer
lr: 0.0005 # Learning rate for agents
critic_lr: 0.0005 # Learning rate for critics
optim_alpha: 0.99 # RMSProp alpha
optim_eps: 0.00001 # RMSProp epsilon
grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm

# --- Agent parameters ---
agent: "rnn" # Default rnn agent
rnn_hidden_dim: 64 # Size of hidden state for default rnn agent
obs_agent_id: True # Include the agent's one_hot id in the observation
obs_last_action: True # Include the agent's last action (one_hot) in the observation

# --- Experiment running params ---
repeat_id: 1
label: "default_label"
