# --- Defaults ---
# --- pymarl options ---
algo_name: "ippo"
runner: "ippo" # Runs 1 env for an episode
mac: "dcntrl_mac" # Basic controller
env: sc2 # "mod_act" # Environment name
run_script: "run_ippo" 
seed: 112358
env_args: {}  # Arguments for the environment
batch_size_run: 1 # Number of environments to run in parallel
test_nepisode: 128 # 32  # Number of episodes to test for
test_interval: 50000 # Test after {} timesteps have passed
test_greedy: True # Use greedy evaluation (if False, will set epsilon floor to 0
log_interval: 50000 # Log summary of stats after every {} timesteps
runner_log_interval: 50000 # Log runner stats (not test stats) every {} timesteps
learner_log_interval: 50000 # Log training stats every {} timesteps
t_max: 10050000 # 10050000 # Stop running after this many timesteps
use_cuda: True # Use gpu by default unless it isn't available
buffer_cpu_only: True # If true we won't keep all of the replay buffer in vram

# --- Logging options ---
use_tensorboard: True # Log results to tensorboard
save_model: True # Saves the models to disk
save_model_interval: 1000000 # Save models after this many timesteps
load_step: 0 # Load model closest to the specified timesteps (if 0 choose max possible)
checkpoint_paths: [""] # need to have something in list, even if an empty string
  # "/scratch/cluster/clw4542/marl_results/ippo_5v6/models/ippo_sc2_saved-batchsize=1000_diff=7_seed=112358_01-14-11-52-27/"
evaluate:  False # Evaluate model for test_nepisode episodes and quit (no training)
save_replay: False # Saving the replay of the model loaded from checkpoint_path
save_eval_traj: False # Saving evaluation trajectories 
local_results_path: "/scratch/cluster/clw4542/marl_results/ippo_5v6" # Path for local results

# --- GAIL params ---
rew_type: "mixed" # options: [gail, env, mixed] # train ippo+gail
update_gail: True
gail_load_models: False # be sure to specify gail_model_paths
gail_state_discrim: True # if False, use state-action discriminators
gail_use_same_model: True
gail_obs_discr: False
gail_sil: False # do self-imitation learning rather than loading external data

gail_rew_coef: 0.3 # important to tune!
gail_epoch: 120 # important to tune!
gail_buffer_size: 1024 # failure with 1023 # number of episodes in gail buffer
gail_batch_size: 64 #  number of episodes of data to update GAIL with. Must be less than number of expert episodes for now.

save_agent_batches: False  # log agent batches for gail data
save_agent_batchsize: 5005 # number of episodes in saved batch.
save_agent_batches_interval: 1000000 # save agent batches after this many timesteps

# -- GAIL ABLATION EXP PARAMS --- 
gail_mask_ally_feats: False
gail_exp_eps: 1000
gail_exp_use_same_data_idxs: True # if True, each agent gets same idx of expert data. Else, each agent gets offset expert data (see gail_learner.py)
gail_exp_use_single_seed: True # if True, each agent uses same data path for expert data. Else, use a different seed of expert data per agent. 
# if training gail, this must be specified. Make sure to include timestep!
# gail data paths must be list of length 1 or length n_agents (1 gail data path per agent)

# gail_data_paths: ["/fs/nexus-projects/Guided_MARL/manav_dm2_results/agents_batches/ippo_sc2__seed=112358_02-21-15-11-52/_eval"]
gail_data_paths: ["/fs/nexus-projects/Guided_MARL/dm2_demonstrations/ippo_5v6/ippo_sc2_saved-batchsize=5000-ts=5m-concurrent-demos-diff-policies-DATAONLY_seed=92814_01-17-22-42-49/_eval"]
gail_model_paths: []

# --- Arguments for the algorithm
action_selector: "epsilon_greedy" # not actually used for ippo

# --- IPPO 5v6 specific params --- 
ppo_epoch: 10
clip_param: 0.05

# --- RL hyperparameters ---
gamma: 0.99
batch_size: 255 # 1023 # Number of episodes to train on # CHANGE NAME TO MINIBATCH SIZE
buffer_size: 256 # 1024 #  # Size of the replay buffer
lr: 0.0005 # Learning rate for agents
critic_lr: 0.0005 # Learning rate for critics
optim_alpha: 0.99 # RMSProp alphas
optim_eps: 0.00001 # RMSProp epsilon
grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm

# --- Agent parameters ---
agent: "ippo"
critic: "ippo"
rnn_hidden_dim: 64 # Size of hidden state for rnn layers
mlp_hidden_dim: 64 # Size of MLP layers
obs_agent_id: True 
obs_last_action: True 

# --- Experiment running params ---
repeat_id: 1
label: ""