alpha: 0.2
batch_size: 128
actor_lr: 0.0003
critic_lr: 0.0003
gamma: 0.99
max_epochs_since_update_decay_interval: 150000.0

state_dim: 17
action_dim: 3
hidden_sizes: 256
max_action: 1

temperature_opt: False

tau: 0.005
update_interval: 2
expl_noise: 0.2

eval_episode: 10
eval_freq: 10000
start_steps: 5000
max_step: 1000000
tar_env_interact_freq: 10

device: cuda

save_freq: 50000

lam: 0.7
temp: 3.0
proportion: 0.25
metric: 'cosine'

env: HalfCheetah

runner:
  filter_data:
    seq_len: 20
    dir: data/costs_20
  experiment:
    seed: 123
    eta: 3.5
    grad_norm: 5.0
    K: 20
    exp_name: qt
    save_path: ./save/
    max_iters: 500
    num_steps_per_iter: 1000
    lr_decay: true
    early_stop: true
    k_rewards: true
    use_discount: true
    v_target: true
    use_mean_reduce: true
    relabel_adv: true
    rtg_no_q: true
    adv_scale: 2.0
    command_state_normalization: true
    adv_mean_reduce: true
    proportion: 0.5
    ot_filter: true
    ot_proportion: 0.5
    pi_reg: true
    pi_reg_weight: 0.5
    training_normalization: true
    rescale_reward: true
    use_weighted_qloss: true

tar_env_config:
  env_name: HalfCheetah
  env_targets: [12000, 9000, 6000]
  param:
    bt jnt lower limit:
    - 0.01
    bt jnt upper limit:
    - 0.01
