name: test

check_already_ran: False
overwrite: True

local: True
blob: False

alg: rl

env_type: bsuite
env_id: cartpole_0.0
train_type: uni
train_seed: 0
train_size: small

norm: id

b_config: train_bc
b_network: mlp
b_width: 64
b_depth: 2
b_step: 10000 
b_norm: id
b_lr: 0.001

wrap_unc: True
unc_type: s

unc_config: train_uncertainty
unc_network: mlp
unc_width: 256
unc_depth: 2
unc_feature_dim: 64
unc_n_comp: 5
unc_noise_scale: 0.0
unc_prior_scale: 1.0
unc_lr: 0.0001
unc_step: 10000
unc_norm: id
unc_seed: 0

learner: soft_spibb_qr
actor: greedy_spibb_qr
network: mlp
width: 256
depth: 2

use_heuristic: True
epsilon: 0.0
tau: 0.0
alpha: 0.0
min_prob: 0.0

train_steps: 100001

eval_period: 10000
eval_episodes: 100
ckpt_period: 10000

seed: 0
batch_size: 256
lr: 0.00003
target_update_period: 1000

discount: 0.99


