dl_toolbox: "torch"  # The deep learning toolbox. Choices: "torch", "mindspore", "tensorlayer"
project_name: "XuanCe_Benchmark"
logger: "tensorboard"  # Choices: tensorboard, wandb.
wandb_user_name: "your_user_name"
render: True
render_mode: 'rgb_array' # Choices: 'human', 'rgb_array'.
fps: 15
test_mode: False
device: "cuda:0"

agent: "OWQMIX"  # choice: CWQMIX, OWQMIX
env_name: "StarCraft2"
env_id: "5m_vs_6m"
learner: "WQMIX_Learner"
policy: "Weighted_Mixing_Q_network"
representation: "Basic_RNN"
vectorize: "Subproc_StarCraft2"

# recurrent settings for Basic_RNN representation
use_rnn: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ]  # the units for each hidden layer
activation: "relu"
alpha: 0.1
use_parameter_sharing: True
use_actions_mask: True

hidden_dim_mixing_net: 32  # hidden units of mixing network
hidden_dim_hyper_net: 64  # hidden units of hyper network
hidden_dim_ff_mix_net: 256  # hidden units of mixing network

seed: 1
parallels: 8
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99  # discount factor
double_q: True  # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 1000000
start_training: 1000  # start training after n episodes
running_steps: 10000000  # 10M
n_epochs: 8  # The number of training epochs after interaction.
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 100000
test_episode: 16

log_dir: "./logs/wqmix/"
model_dir: "./models/wqmix/"
