alpha_multiplier: 1.0
backup_entropy: false
batch_size: 256
bc_steps: 0
buffer_size: 10000000
checkpoints_path: null
cql_clip_diff_max: .inf
cql_clip_diff_min: -200
cql_importance_sample: true
cql_lagrange: true
cql_max_target_backup: true
cql_alpha: 5.0
cql_alpha_online: 5.0
cql_n_actions: 10
cql_target_action_gap: 0.8
cql_temp: 1.0
device: cuda
discount: 0.99
env: antmaze-medium-play-v2
eval_freq: 50000
group: cql-antmaze-medium-play-v2-multiseed-v0
load_model: ''
offline_iterations: 1000000
online_iterations: 1000000
n_episodes: 100
name: CQL
normalize: false
normalize_reward: true
orthogonal_init: true
policy_lr: 0.0001
project: CORL
qf_lr: 0.0003
seed: 0
soft_target_update_rate: 0.005
target_update_period: 1
q_n_hidden_layers: 5
reward_scale: 10.0
reward_bias: -5.0
use_automatic_entropy_tuning: true
