algo: "iql"
state_dim: 245
h_state_dim: 178
action_dim: 8
response_dim: 8
h_response_dim: 2
max_buffer_size: 3000000
discount: 0.9
tau: 0.001
lr: 0.00005
alpha: 2.5
expectile: 0.8

test_every: 1000
test_n_user: 500
test_immediate: True
log_every: 1000

n_epoch: 5
batch_size: 4096

save_model: True
use_tensorboard: True