dl_toolbox: "torchAgent"  # The deep learning toolbox. Choices: "torchAgent", "mindspore", "tensorlayer"
project_name: "XuanCe_Benchmark"
logger: "tensorboard"  # Choices: tensorboard, wandb.
wandb_user_name: "your_user_name"
render: True
render_mode: 'rgb_array' # Choices: 'human', 'rgb_array'.
test_mode: False
device: "cuda:0"

agent: "MASAC"  # the learning algorithms_marl
env_name: "mpe"
env_id: "simple_spread_v3"
continuous_action: True
policy: "Gaussian_MASAC_Policy"
representation: "Basic_Identical"
vectorize: "Dummy_Pettingzoo"
runner: "Pettingzoo_Runner"

representation_hidden_size: []  # the units for each hidden layer
actor_hidden_size: [64, 64]
critic_hidden_size: [64, 64]
activation: 'leaky_relu'
activation_action: 'sigmoid'

seed: 1
parallels: 16
buffer_size: 100000
batch_size: 256
lr_a: 0.01  # learning rate for actor
lr_c: 0.001  # learning rate for critic
gamma: 0.95  # discount factor
tau: 0.001  # soft update for target networks
alpha: 0.01
use_automatic_entropy_tuning: True

start_noise: 1.0
end_noise: 0.01
sigma: 0.1  # random noise for continuous actions
start_training: 1000  # start training after n episodes
running_steps: 10000000
train_per_step: False  # True: train model per step; False: train model per episode.
training_frequency: 1

use_grad_clip: True
grad_clip_norm: 0.5

eval_interval: 100000
test_episode: 5
log_dir: "./logs/masac/"
model_dir: "./models/masac/"