# --- COMA specific parameters ---

action_selector: "multinomial"
epsilon_start: .8
epsilon_finish: .01
epsilon_anneal_time: 2000000
mask_before_softmax: False
use_centralized_V: True


runner: "episode"  # parallele or episode

batch_size_run: 32
buffer_size: 32
batch_size: 32 # Number of episodes to train on


# update the target network every {} training steps
target_update_interval: 200

lr: 0.0005
critic_lr: 0.001
td_lambda: 1.0


agent_output_type: "pi_logits"
learner: "random_agent"
critic_q_fn: "ppo"
critic_train_mode: "seq"
critic_train_reps: 1
q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
ctde: False
name: "random"

