# model learning
lr_z: 1.0e-3
lr_policy: 1.0e-5
clip_policy_logit_min: -50.0
clip_policy_logit_max: 50.0
clip_grad_norm: 10.0
init_logz: False
offline_select: 'random'
sa_or_ssr: 'sa'
sa_hid_dim: 1024
sa_n_layers: 2

# trainer
num_active_learning_rounds: 25000
num_samples_per_online_batch: 8
num_samples_per_offline_batch: 8
num_steps_per_batch: 1
num_online_batches_per_round: 1
num_offline_batches_per_round: 1
target_mix_backpolicy_weight: 0.5

# policy
explore_epsilon: 0.05 

# guide
parallelize: True
num_guide_workers: 30

# experiment
replicate: 0
saved_models_dir: 'saved_models/'
save_every_x_active_rounds: 100

# monitor
monitor_num_samples: 100
monitor_fast_every: 10
monitor_slow_every: 200

# experiment-specific settings
blocks_file: 'datasets/sehstr/block_18.json'
all_dataset: 'datasets/sehstr/block_18_stop6.pkl'
offline_dataset: 'datasets/sehstr/offline_sample1000.pkl'
forced_stop_len: 6
Bsize: 1000
task: 'il'
flag: 'proxy'
ralpha: 0.5
beta: 2
run_name: 'seh_1000_proxy_2_0.5'