# --- DAVE specific parameters ---

action_selector: "multinomial"

runner: "episode"

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "dave_learner"
mixer: "dave"
mixing_embed_dim: 32
hypernet_layers: 2
hypernet_embed: 64

name: "dave"
mac: "dave_mac"
sample_num: 100
action_embed_dim: 4
encoder_embed_dim: 64
anti_ego_start: 0.5
anti_ego_end: 0.0
anti_ego_decay: 500000