# model learning
lr_z: 1.0e-2
lr_policy: 1.0e-4
clip_policy_logit_min: -50.0
clip_policy_logit_max: 50.0
clip_grad_norm: 10.0
init_logz: False
offline_select: 'random'
sa_or_ssr: 'sa'
sa_hid_dim: 128
sa_n_layers: 2

# trainer
num_active_learning_rounds: 25000
num_samples_per_online_batch: 8
num_samples_per_offline_batch: 8

num_steps_per_batch: 1
num_online_batches_per_round: 1
num_offline_batches_per_round: 1
target_mix_backpolicy_weight: 0.25

# policy
explore_epsilon: 0.01

# guide
parallelize: True
num_guide_workers: 30

# experiment
replicate: 0
saved_models_dir: 'saved_models/'
save_every_x_active_rounds: 200

# monitor
monitor_num_samples: 100
monitor_fast_every: 10
monitor_slow_every: 200

# experiment-specific settings
all_dataset: 'datasets/tfbind8/tfbind8-exact-v0-all.pkl'
offline_dataset: 'datasets/tfbind8/offline_sample1000.pkl'
forced_stop_len: 8
Bsize: 1000
task: 'il'
ralpha: 0
beta: 2
flag: 'proxy'
run_name: 'tf8_1000_proxy_2_0.5'