# --- Defaults ---

# --- pymarl options ---
algo_name: "iql"
runner: "episode" # Runs 1 env for an episode
mac: "dcntrl_mac" # Basic controller
env: sc2 # "mod_act" # Environment name
run_script: "run_default" 
seed: 112358
env_args: {}  # Arguments for the environment
batch_size_run: 1 # Number of environments to run in parallel
test_nepisode: 32 # Number of episodes to test for
test_interval: 10000 # Test after {} timesteps have passed
test_greedy: True # Use greedy evaluation (if False, will set epsilon floor to 0
log_interval: 10000 # Log summary of stats after every {} timesteps
runner_log_interval: 10000 # Log runner stats (not test stats) every {} timesteps
learner_log_interval: 10000 # Log training stats every {} timesteps
t_max: 2050000 # Stop running after this many timesteps
use_cuda: True # Use gpu by default unless it isn't available
buffer_cpu_only: True # If true we won't keep all of the replay buffer in vram

# --- Logging options ---
use_tensorboard: True # Log results to tensorboard
save_model: True # Save the models to disk
save_model_interval: 2000000 # Save models after this many timesteps
checkpoint_path: "" # "/scratch/cluster/clw4542/marl_results/qmix_5v6/models/qmix_sc2_None_diff=7_act=epsilon-greedy_seed=112358_07-11-23-03-27"
evaluate: False # Evaluate model for test_nepisode episodes and quit (no training)
transfer: False
load_step: 0  # Load model trained on this many timesteps (0 if choose max possible)
save_replay: False # Saving the replay of the model loaded from checkpoint_path
local_results_path: "/scratch/cluster/clw4542/marl_results/iql_3m/" 

# --- Arguments for the algorithm
action_selector: "epsilon_greedy"

# --- RL hyperparameters ---pyt
gamma: 0.99
batch_size: 32 # Number of episodes to train on
buffer_size: 5000 # Size of the replay buffer
lr: 0.0005 # Learning rate for agents
critic_lr: 0.0005 # Learning rate for critics
optim_alpha: 0.99 # RMSProp alphas
optim_eps: 0.00001 # RMSProp epsilon
grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm

# --- Agent parameters ---
agent: "rnn" # Default rnn agent
critic: null
rnn_hidden_dim: 64 # Size of hidden state for default rnn agent
obs_agent_id: True 
obs_last_action: True 

# --- Experiment running params ---
repeat_id: 1
label: ""