algo: CPQL
alpha_multiplier: 1.0
backup_entropy: false
batch_size: 256
bc_steps: 0
buffer_size: 10000000
checkpoints_path: null
cql_clip_diff_max: .inf
cql_clip_diff_min: -200
cql_importance_sample: true
cql_lagrange: true
cql_max_target_backup: true
cql_alpha: 10.0
cql_n_actions: 10
cql_target_action_gap: 0.8
cql_temp: 1.0
device: cuda
discount: 0.99
env: antmaze-medium-play-v0
eval_freq: 5000
load_model: ''
max_timesteps: 1000000
n_episodes: 100
normalize: false
normalize_reward: true
orthogonal_init: true
policy_lr: 0.0001
qf_lr: 0.0003
seed: 0
soft_target_update_rate: 0.005
target_update_period: 1
q_n_hidden_layers: 5
reward_scale: 10.0
reward_bias: -5.0
use_automatic_entropy_tuning: true
nstep: 5
peng_lmbda: 0.3