alpha_multiplier: 1.0
backup_entropy: false
batch_size: 256
bc_steps: 0
buffer_size: 10000000
checkpoints_path: null
cql_alpha: 1.0
cql_alpha_online: 1.0
cql_clip_diff_max: .inf
cql_clip_diff_min: -200
cql_importance_sample: true
cql_lagrange: false
cql_max_target_backup: true
cql_n_actions: 10
cql_target_action_gap: 0.8
cql_temp: 1.0
device: cuda
discount: 0.99
env: pen-cloned-v1
eval_freq: 5000
group: cql-pen-cloned-v1-multiseed-v0
load_model: ''
offline_iterations: 1000000
online_iterations: 1000000
mixing_ratio: 0.5
n_episodes: 10
name: CQL
normalize: false
normalize_reward: false
orthogonal_init: true
policy_lr: 0.0001
project: CORL
qf_lr: 0.0003
seed: 0
soft_target_update_rate: 0.005
target_update_period: 1
q_n_hidden_layers: 3
reward_scale: 1.0
reward_bias: 0.0
use_automatic_entropy_tuning: true
is_sparse_reward: false