# @package _global_
name: train_${env.name}_${seed}

path: 

defaults:
  - beta: bc
  - q: sarsa
  # - pi: reverse_kl
  - pi: exp_weight
  - baseline: value_sample

# which learners to train
train_beta: true
train_q: true
train_pi: true
train_baseline: false

tag: 
# model params
model_tag: # the pretrained model tag
beta:
  load_path: null
q:
  load_path: null
  model_save_path: ${path}/models/${tag}/${name}_q
pi:
  load_path: null
  model_save_path: ${path}/models/${tag}/${name}_pi
baseline:
  load_path: null
  model_save_path: ${path}/models/${tag}/${name}_baseline

resampling: uniform
topn: 100
weight_func: linear
weight_num: 3 
# weight_path: ../TD3_BC/weights/nstep_nstep_10_1000000_5_1000000_norm=1_scale=False_${env.name}_%s.npy
weight_path: ../TD3_BC/weights/1000000_5_1000000_scale=False_${env.name}_%s.npy
iter: 1
std: 2.0
eps: 0.1
eps_max: 0
weight_ensemble: mean
# train loop hyperparameters
beta_steps: 5e5
baseline_steps: 0
steps: 1
q_steps: 2e6
pi_steps: 1e5

beta_save_freq: 1e5
pi_save_freq: 1e6
q_save_freq: 1e6
log_freq: 2e3
eval_freq: 5e3

eval_samples: 1000
eval_episodes: 10
log_dir: ${path}/logs/${name}

seed: 0
device: cuda

# data parameters
data_path: ${path}/data/${env.name}.pt
env_type: d4rl
env:
  name: halfcheetah-medium-v2
discount: 0.99
state_dim: ???
action_dim: ???

# hydra parameters
hydra/hydra_logging: none
hydra/job_logging: none
hydra:
  output_subdir: null
  run:
    dir: .
  job:
    config:
      override_dirname:
        exclude_keys:
          - name