agent: "DCG"  # Options: DCG, DCG_S
env_name: "StarCraft2"
env_id: "25m"
fps: 15
policy: "DCG_Policy"
representation: "Basic_RNN"
vectorize: "Subproc_StarCraft2"
runner: "StarCraft2_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ]  # the units for each hidden layer
hidden_utility_dim: 64  # hidden units of the utility function
hidden_payoff_dim: 64  # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [64, ]  # hidden units of the bias network with global states as input
activation: "relu"

low_rank_payoff: False  # low-rank approximation of payoff function
payoff_rank: 5  # the rank K in the paper
graph_type: "FULL"  # specific type of the coordination graph
n_msg_iterations: 8  # number of iterations for message passing during belief propagation
msg_normalized: True  # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 8
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99  # discount factor
double_q: True  # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000  # start training after n episodes
running_steps: 5000000  # 5M
train_per_step: False  # True: train model per step; False: train model per episode.training_frequency: 1
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 50000
test_episode: 16
log_dir: "./logs/dcg/"
model_dir: "./models/dcg/"
