agent: "COMA"  # the learning algorithms
env_name: "mpe"  # Environment name.
env_id: "simple_spread_v3"  # Environment map.
continuous_action: False  # Continuous action space or not.
learner: "COMA_Learner"
policy: "Categorical_COMA_Policy"  # Name of policy.
representation: "Basic_MLP"  # Name of representation.
representation_critic: "Basic_MLP"  # Name of representation for critic.
vectorize: "DummyVecMultiAgentEnv"  # Method to vectorize the environment.
runner: "MARL"  # Runner.

use_rnn: False  # If to use recurrent neural networks.
rnn: "GRU"  # Choice of recurrent networks: GRU or LSTM.
fc_hidden_sizes: [64, ]  # The fully connected layer for Basic_RNN representation.
recurrent_hidden_size: 64  # The size of hidden layers of recurrent networks.
N_recurrent_layers: 1  # Number of recurrent layers.
dropout: 0  # dropout should be a number in range [0, 1], the probability of an element being zeroed.
normalize: "LayerNorm"  # Layer normalization.
initialize: "orthogonal"  # Network initializer.
gain: 0.01  # Gain value for network initialization.

representation_hidden_size: [128, ]  # A list of hidden units for each layer of Basic_MLP representation networks.
actor_hidden_size: [128, ]  # A list of hidden units for each layer of actor network.
critic_hidden_size: [128, ]  # A list of hidden units for each layer of critic network.
activation: "relu"  # The activation function of each hidden layer.

seed: 1  # Random seeds.
parallels: 10  # Number of environments that to be implemented in parallel.
buffer_size: 250  # Total buffer size.
n_epochs: 10  # Number of epochs to update the model.
n_minibatch: 1  # Number of minibatch.
learning_rate_actor: 0.0007  # Learning rate of actor.
learning_rate_critic: 0.0007  # Learning rate of critic.
start_greedy: 0.5  # The start value of greedy epsilon.
end_greedy: 0.01  # The end value of greedy epsilon.
decay_step_greedy: 2500000  # The steps to decay the greedy epsilon.
sync_frequency: 200  # The frequency to synchronize target networks.
running_steps: 10000000  # The total running steps.

use_global_state: False  # If to use global state to replace merged observations.
use_parameter_sharing: True  # If to use parameter sharing for all agents' policies.
use_actions_mask: False  # If to use action masks.

gamma: 0.95  # Discount factor.
td_lambda: 0.1  # TD lambda.
use_advnorm: True  # If to use advantage normalization.
use_gae: True  # If to use GAE trick.
gae_lambda: 0.95  # The GAE lambda when use GAE trick.
use_grad_clip: False  # If to clip the gradient of the networks.
grad_clip_norm: 10  # The max norm of the gradient.
clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()

eval_interval: 100000  # The interval between every two trainings.
test_episode: 5  # The episodes to test in each test period.

log_dir: "./logs/coma/"
model_dir: "./models/coma/"
