num_steps: 50000000  # It equals to 200M frames.
algorithm: "SA"
K: 5 #32
moment_lr: 0.00005
prop_lr: 0.00005
memory_size: 100000
epi_memory_size: 10000
gamma: 0.99
multi_step: 1
update_interval: 4
target_update_interval: 1000 #10000
start_steps: 100 #50000
epsilon_train: 0.01
epsilon_eval: 0.001
epsilon_decay_steps: 250000  # It equals to 1M frames.
double_q_learning: False
alea_comparison_measure: "wang"
alea_comparison_hyps: !!python/tuple [0.0]
epi_comparison_measure: "oneversusall"
epi_comparison_hyps: !!python/tuple [0., 1]
epistemic_method: "none"
num_ensembles: 5
use_per: False
log_interval: 1
eval_interval: 25000 #250000
num_eval_steps: 125 #125000  # It equals to 500k frames.
max_episode_steps: 2700 #27000
grad_cliping:
loss: "MMD"
theta: !!python/tuple [0.,1.,0.]
gammas: !!python/tuple [1,10]
