alpha_multiplier: 1.0
backup_entropy: false
batch_size: 256
bc_steps: 0
buffer_size: 10000000
checkpoints_path: null
cql_clip_diff_max: .inf
cql_clip_diff_min: -.inf
cql_importance_sample: true
cql_lagrange: true
cql_max_target_backup: false
cql_alpha: 10.0
cql_n_actions: 10
cql_target_action_gap: 5.0
cql_temp: 1.0
device: cuda
discount: 0.99
env: maze2d-umaze-dense-v1
eval_freq: 5000
group: cql-maze2d-umaze-dense-v1-multiseed-v0
load_model: ''
max_timesteps: 1000000
n_episodes: 100
name: CQL
normalize: true
normalize_reward: false
orthogonal_init: true
policy_lr: 3.0e-05
project: CORL
qf_lr: 0.0003
seed: 0
soft_target_update_rate: 0.005
target_update_period: 1
q_n_hidden_layers: 3
reward_scale: 1.0
reward_bias: 0.0
use_automatic_entropy_tuning: true
