num_steps: 50000000  # It equals to 200M frames.
batch_size: 32
N: 32
num_cosines: 64
ent_coef: 0  # You can use entropy loss as a regularizer.
kappa: 1.0
quantile_lr: 5.e-5
fraction_lr: 2.5e-9
memory_size: 1000000
gamma: 0.99
multi_step: 1
update_interval: 4
target_update_interval: 200 #10000
start_steps: 500
epsilon_train: 0.01
epsilon_eval: 0.001
epsilon_decay_steps: 250000  # It equals to 1M frames.
double_q_learning: False
dueling_net: False
noisy_net: False
alea_comparison_measure: "wang"
alea_comparison_hyps: !!python/tuple [0.0]
epi_comparison_measure: "oneversusall"
epi_comparison_hyps: !!python/tuple [0., 1]
epistemic_method: "ensemble"
num_ensembles: 10
use_per: False
log_interval: 1
eval_interval: 25000 #250000
num_eval_steps: 125 #125000  # It equals to 500k frames.
max_episode_steps: 100 #27000
grad_cliping:
