defaults:/home/ubuntu/duxinghao/interpret_codebook/interpretable_option/supplementary/offline/skill_condition
  # Set default options
  - _self_
  - model: option
  - env: crafter

cuda_deterministic: False

wandb: False
llm_interpret: True
llm_update_epoch: 50
llm_update_decay: 0

debris: 10
smooth_gamma: 10

seed: 0
resume: True
load_options: False
freeze_loaded_options: False
checkpoint_path: 

eval: False

render: False
render_path: ./eval_${env.name}/

batch_size: 128
max_iters: 80
warmup_steps: 2500 # 5000

lr_decay: 0.1
decay_steps: 100000

# options config
option_dim: 128
codebook_dim: 16

parallel: False
savedir: 'checkpoints'
savepath: ## to be filled in code

method: ## to be filled in code
use_iq: False ## use IQ-Learn objective instead of BC

learning_rate: 1e-4
lm_learning_rate: 1e-6
weight_decay: 1e-4
os_learning_rate: 1e-4

trainer:
  device:  ## to be filled in code
  state_il: False
  num_eval_episodes: 100
  eval_every: 1
  K: ${model.K}

model:   
  # Model specific configuration

env:   
  # Env specific configuration
    skip_words: ['go', 'to', 'the', 'a', '[SEP]']

option_selector:    
  # Option configuration
  option_transformer:

iq:
  alpha: 0.1
  div: chi
  loss: value 
  gamma: 0.99
  # Don't use target updates
  use_target: False

# Extra args
log_interval: 1  # Log every this many iterations
save_interval: 19 # Save networks every this many iterations
hydra_base_dir: ""
exp_name: ''
project_name: ${env.name}
