algorithm:
  class: HindsightPreferenceLearning
  expectile: 0.7
  beta: 0.3333
  max_exp_clip: 100.0
  discount: 0.99
  tau: 0.005
  seq_len: 100
  future_len: 5
  z_dim: 64 # [64, 128]
  prior_sample: 5 # [5, 10]
  vae_steps: 250000 # tune
  reward_steps: 50000
  kl_loss_coef: 5.0 # [0.5, 5.0]
  kl_balance_coef: 0.8
  reg_coef: 0.0001

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
  activate: false
  entity: null
  project: null

env: hopper-medium-expert-v2
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

optim:
  default:
    class: Adam
    lr: 0.0003

network:
  encoder:
    embed_dim: 128
    num_layers: 3
    num_heads: 4
    dropout: 0.1
  decoder:
    embed_dim: 64
    hidden_dims: [256, 256] # shallower?
    ortho_init: false
  prior:
    hidden_dims: [256, 256]
    ortho_init: false
  reward:
    class: Critic
    hidden_dims: [256, 256, 256] # tune
    ortho_init: false
  actor:
    class: SquashedGaussianActor
    hidden_dims: [256, 256, 256]
    reparameterize: false
    conditioned_logstd: false
    logstd_min: -5
    logstd_max: 2
    ortho_init: false
  critic:
    class: Critic
    ensemble_size: 2
    hidden_dims: [256, 256, 256]
    ortho_init: false
  value:
    class: Critic
    ensemble_size: 1
    hidden_dims: [256, 256, 256]
    ortho_init: false


dataset:
  - class: D4RLOfflineDataset
    env: hopper-medium-expert-v2
    batch_size: 64 # [64, 128]
    mode: trajectory
    segment_length: 100
    padding_mode: none
  - class: IPLComparisonOfflineDataset
    env: hopper-medium-expert-v2
    batch_size: 8
    mode: human
  - class: D4RLOfflineDataset
    env: hopper-medium-expert-v2
    batch_size: 6400
    mode: transition

dataloader:
  - num_workers: 2
    batch_size: null
  - num_workers: 2
    batch_size: null
  - num_workers: 2
    batch_size: null
  # num_workers: 0  # use the main thread to sample data
  # batch_size: null  # do not merge the data along batch axis

trainer:
  env_freq: null
  total_steps: 1000000
  log_freq: 500
  profile_freq: 500
  eval_freq: 10000   # don't do eval

eval:
  function: eval_offline
  num_ep: 10
  deterministic: true

schedulers:

processor: null
