algo: "iql"
state_dim: 41
h_state_dim: 41
action_dim: 2
response_dim: 1
h_response_dim: 1
max_buffer_size: 900000
discount: 0.9
tau: 0.001
expectile: 0.8

lr: 0.00005
alpha: 2.5

test_every: 300
test_n_user: 50
test_immediate: True
log_every: 300

n_epoch: 20
batch_size: 3000

save_model: True
use_tensorboard: True