# --- PG specific parameters ---

action_selector: "multinomial"
epsilon_start: .8
epsilon_finish: .01
epsilon_anneal_time: 2000000
mask_before_softmax: False
gain: 0.01
runner: "episode"

buffer_size: 32
batch_size_run: 32
batch_size: 32

# update the target network every {} training steps
target_update_interval: 200

lr: 0.0005
critic_lr: 0.0005
td_lambda: 1.0
spike_variation: False
la_update: False
create_hessian_graph: False
naive_learner: False
iid_agents: False
hess_lr: 1
td_resolution: 1

rnn_hidden_dim: 64 # Size of hidden state for default rnn agent
obs_agent_id: False # Include the agent's one_hot id in the observation
obs_last_action: False # Include the agent's last action (one_hot) in the observation
critic_hidden_dim: 64 # Size of hidden state for default rnn agent
h_lr: 0.00000000000001
eps_limit: 1
update_order: False
his_disc: False
only_critics: False
kick_in: 1

# use COMA
agent_output_type: "pi_logits"
learner: "pg_learner"
critic_q_fn: "coma"
critic_baseline_fn: "coma"
critic_train_mode: "seq"
critic_train_reps: 1
q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
name: "pg"
ppo: False

