N_confset_size: 1000
N_experiments: 10
N_iterations: 25
N_offline_trajs: 10
N_rollouts: 10
W: 1
baseline_search_space: augmented_ball
delta_offline: 0.05
delta_online: 0.05
do_offline_BC: true
env: Gridworld
env_move_prob: 0.8
episode_length: 10
gamma_t_hardcoded_value: 0.15
loaded_run_behaviour: null
n_transition_model_epochs_offline: 5
n_transition_model_epochs_online: 5
offlineradius_formula: hardcode_radius
offlineradius_override_value: 0.96
online_confset_bonus_multiplier: 0.008
online_confset_recalc_phi: false
phi_name: state_counts
plot_logy: true
plot_slim: true
replace_mle_with_optimal_policy_in_offline_confset: null
run_ID: paper_run
run_baseline: false
run_bridge: false
save_results: true
use_true_T_in_online: false
verbose: []
w_MLE_epochs: 10
w_initialization: uniform
w_sigmoid_slope: 10
which_confset_construction_method: rejection-sampling-from-sample
which_hellinger_calc: exact
which_plot_subopt: cumulative_regret
xi_formula: smaller_start
