# --- MAOSD3QN specific parameters ---

action_selector: "epsilon_soft_optimistic"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 5000
runner: "episode"

buffer_size: 5000
batch_size: 128

target_update_interval_or_tau: 0.01

lr: 0.0005
agent_loss_scaler: 1
warmup_steps: 0
hidden_dim: 64

obs_agent_id: True
obs_last_action: False
obs_individual_obs: False

agent: "dueling_rnn"
agent_output_type: "pi_logits"
v_type: "mean"
learner: "maosdqn_learner"
mac: "maosdqn_mac"
standardise_rewards: True
translate_rewards: False
standardise_returns: False
use_rnn: False
critic_type: "maosdqn_critic"

sample_type: "sars"
alpha: 50
beta: 10

sample_epsilon: 0.05
test_greedy: False

name: "maosdqn"

t_max: 205000
