# resources
cuda: true
compile: false
cudagraphs: true

# env
sync_vec_env: true
num_envs: 4
action_repeat: 1
capture_video: false
normalize_observations: false

# logging
wandb_project: "fugu"
measure_burnin: 3
wasserstein_two: false

# training mode
num_timesteps: 10000000
learning_starts: 0
eval_steps: 10
eval_every: 10000
deque_window: 20

# evaluation mode
num_episodes: 10

# model
layer_norm: false

# optimization
actor_lr: 3e-4
clip_norm: 20.  # only actor
qnets_lr: 1e-3
reward_lr: 1e-3

# algorithm
segment_len: 1
batch_size: 256
gamma: 0.99
rb_capacity: 4000000
polyak: 0.005
prefer_td3_over_sac: false
bcq_style_targ_mix: false
actor_update_delay: 2
actor_noise_std: 0.1  # for TD3
targ_actor_smoothing: true
td3_std: 0.2
td3_c: 0.5
crit_targ_update_freq: 1
alpha_init: 0.2
autotune: true
log_alpha_lr: 1e-3

method: "ngt"

minimax_only: true

num_bc_iters: 10000000

mmd_sigma: 1.0

input_mode: "sa"
activation: "leaky_0.05"
dropout: false
spectral_norm: true

label_smooth: 0.1
ent_reg_scale: 0.0001
grad_pen: false
grad_pen_targ: 1.
grad_pen_scale: 10.
one_sided_pen: true


out_size: 32
out_scale: 5
v2: false

ngt_loss: "huber"

temperature: 1.0
survivorship: false
tighter_percentile_range: true  # 05-95 if false, 10-90 if true
stretch_with_symexp: false  # wrap final reward with symexp temperature 1.0

p_proportion_of_exp_per_update: 1.0
e_proportion_of_exp_per_update: 1.0
advers_p_ascent_scale: 1.0

hlgauss_minmax_value: 10.
hlgauss_num_bins: 101
hlgauss_sigma_p: 0.05
hlgauss_sigma_e: 0.15
hlgauss_sigma_i: 0.15
