experiment: pop_explore4_poptd
env: halfcheetah-random-v2

orthogonal_init: True
n_epochs: 1600
eval_period: 20
eval_n_trajs: 40


cql:
  use_pop: True
  cql_importance_sample: False
  pop_learn_g: True
  pop_backprop_dual_grad: True
  reweight_cql_loss: True
  dual_grad_clip: 1

  cql_min_q_weight:
    - 1.0
    - 0.4
    - 0.2
    - 0.1
  dual_lr_gain:
    - 1e-2
    - 1e-3
    - 1e-4
  g_lr_gain:
    - 1e0
  pop_rank:
    - 256

seed:
  - 0xCAFE0000
  - 0xCAFE0001
  - 0xCAFE0002
  - 0xCAFE0003
  - 0xCAFE0004