N_confset_dilution: 400
N_confset_size: 100
N_experiments: 20
N_iterations: 200
N_offline_trajs: 20
N_rollouts: 10
V_init: small
W: 10
baseline_or_bridge: null
bc_loss: log-loss
bc_print_evals: true
confset_base: bcnoise
confset_dilution: bignoise
confset_noise: 0.01
embedding_name: avg_sa
env_id: Reacher-v5
episode_length: 50
exclude_outliers: false
expert_in_candidates: true
expert_in_confset: false
expert_in_eval: false
filter_pi_t_gamma: 10.0
filter_pi_t_yesno: true
fresh_embeddings: false
fresh_offline_trajs: false
gamma_debug_mode: false
hidden_dim: null
initial_pos_noise: null
loaded_run_behaviour: null
n_bc_epochs: 100
n_embedding_samples: 200
norm_obs: false
plot_logy: true
plot_scores: false
plot_slim: true
project_w: false
radius: 0.65
retrain_w_from_scratch: false
run_ID: paper_run
run_baseline: true
run_bridge: true
run_dir: exps/Reacher/paper_run
save_results: true
seed: 42
ucb_beta: 1
use_wandb: false
verbose: []
w_epochs: 100
w_initialization: uniform
w_regularization: null
w_sigmoid_slope: 1
w_trainfunc: mle
which_eval_space: pi_zero
which_plot_subopt:
- cumulative_regret
- raw_reward
which_policy_selection: max_uncertainty
