algo: CPQL
alpha_multiplier: 1.0
backup_entropy: false
batch_size: 256
bc_steps: 0
buffer_size: 10000000
checkpoints_path: null
cql_clip_diff_max: .inf
cql_clip_diff_min: -.inf
cql_importance_sample: true
cql_lagrange: false
cql_max_target_backup: false
cql_alpha: 10.0
cql_n_actions: 10
cql_target_action_gap: -1.0
cql_temp: 1.0
device: cuda
discount: 0.90
env: pen-human-v0
eval_freq: 5000
load_model: ''
max_timesteps: 1000000
n_episodes: 10
normalize: true
normalize_reward: false
orthogonal_init: true
policy_lr: 1.0e-04
qf_lr: 0.0003
seed: 0
soft_target_update_rate: 0.005
target_update_period: 1
q_n_hidden_layers: 3
reward_scale: 1.0
reward_bias: 0.0
use_automatic_entropy_tuning: true
nstep: 5
peng_lmbda: 0.5
