N_confset_size: 200
N_experiments: 30
N_iterations: 25
N_offline_trajs: 2
N_rollouts: 10
W: 1
baseline_search_space: all_policies
delta_offline: 0.05
delta_online: 0.05
do_offline_BC: true
env: StarMDP_with_random_flinging
env_move_prob: 0.7
episode_length: 8
gamma_t_hardcoded_value: 0.2
loaded_run_behaviour: null
n_transition_model_epochs_offline: 5
n_transition_model_epochs_online: 5
offlineradius_formula: hardcode_radius
offlineradius_override_value: 0.9
online_confset_bonus_multiplier: 0.01
online_confset_recalc_phi: false
phi_name: id_short
plot_logy: true
plot_slim: true
replace_mle_with_optimal_policy_in_offline_confset: true
run_ID: paper_run
run_baseline: false
run_bridge: false
save_results: true
use_true_T_in_online: false
verbose: []
w_MLE_epochs: 10
w_initialization: uniform
w_sigmoid_slope: 10
which_confset_construction_method: rejection-sampling-from-all
which_hellinger_calc: exact
which_plot_subopt: cumulative_regret
xi_formula: smaller_start
