defaults:
 - /gpt2: gpt2_plain

name: is_ilql
dataset: null
alpha: 0.005
gamma: 1.0
beta: 1.0
transition_weight: 0.0
clip_weight: null
value_max: null
value_min: null
detach_v: false
detach_q: false
detach_pi: false
double_q: false
seperate_policy: false
seperate_target: false
tau: 0.5
exp_weights: false
dm_margin: 0.0
advanced_mlp: false
cql_temp: 1.0
K: 1
load:
  name: is_ilql
  checkpoint_path: null
  strict_load: true
