# --- ADER specific parameters ---

rew_scale: 1

action_range: ~
action_selector: ~
agent: mlp_gaussian
obs_agent_id: True
obs_last_action: False
agent_output_type: ~
batch_size_run: 4
batch_size: 1024
buffer_size: 1000000
buffer_warmup: 1000
discretize_actions: False
double_q: False
epsilon_decay_mode: ~
epsilon_start: ~
epsilon_finish: ~
epsilon_anneal_time: ~
exploration_mode: "gaussian"
start_steps: 10000 # Number of steps for uniform-random action selection, before running real policy. Helps exploration.
#act_noise: 0.1 # Stddev for Gaussian exploration noise added to policy at training time.
#ou_theta: 0.15 # D
#ou_sigma: 0.2 # D
#ou_noise_scale: 0.3
#final_ou_noise_scale: 0.
gamma: 0.99
grad_norm_clip: 0.5
learner: "ader_continuous_learner"
learn_interval: 1
lr: 0.001
critic_lr: 0.001
td_lambda: 0.8
critic_train_reps: 1
q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
mac: "continuous_mac"
mixer: "qmix"
mixing_embed_dim: 64
skip_connections: False
gated: False
hypernet_layers: 2
hypernet_embed: 64
hyper_initialization_nonzeros: 0
name: "wsac_mamujoco"
n_runners: ~
n_train: 1
optimizer: adam # D
optimizer_epsilon: 0.01 # D
#ou_stop_episode: 100 # training noise goes to zero after this episode
rnn_hidden_dim: 400
run_mode: ~
runner: "parallel"
runner_scope: 'transition'
target_update_interval: ~
recurrent_critic: False
target_update_mode: "soft"
target_update_tau: 0.001
test_greedy: ~
test_interval: 4000
test_nepisode: 10
testing_on: True
t_max: 2000000
save_model: False
save_model_interval: 10000
verbose: False
weight_decay: True
weight_decay_factor: 0.0001
env_args:
  state_last_action: False # critic adds last action internally
agent_return_logits: False
q_embed_dim: 1

burn_in_period: 100

adap_total_alpha_tau: 0.9   # xi
adap_total_alpha_start: -2  #  initialize alpha --> -2 means alpha_init = e**-2


contribution_temperature: -1000
contribution_temperature2: 1


algname: "ader"