defaults:
  - _self_
  - agent: potil
  - suite: dmc
  - override hydra/launcher: submitit_local

# Root Dir
root_dir: 'path_to_dir/GDIL'

# replay buffer
replay_buffer_size: 150000
replay_buffer_num_workers: 1
nstep: 3
batch_size: 512 # 128
progress_guide_start: 0.1
progress_guide_end: 1.0
progress_guide: false
progress_guide_len: 0
progress_truncate: 
mask_reward_by_progress: 
mask_reward_by_progress_offset: 1000000
adaptive_truncate: false
adaptive_truncate_alpha: 
adaptive_truncate_offset: 
expl_mode_offset: -1000000 # disabled by default
buffer_truncate_by_progress: false
biased_sampling: true
sample_by_length: false
ot_truncate_by_progress: false
adaptive_progress: false
adaptive_progress_threshold: -0.05
adaptive_progress_offset: 0
adaptive_progress_mode: expert_sum # expert or fixed or expert_sum
adaptive_progress_percentile: 1.0
adaptive_progress_num_traj: 500
ref_score_percentile: 10
agent_score_percentile: 50
max_progress_delta: 10000
reward_bound_smooth: false
oversample_timeout: 
# misc
seed: 2
device: cuda
save_video: true
save_train_video: false
use_tb: true
test_debug: false
save_every_model: false
adaptive_discount: false
adaptive_discount_mode:
adaptive_discount_paras: 

# experiment
obs_type: 'pixels' # pixels, features
experiment: ${agent.name}_${suite.name}_${obs_type}_${task_name}_seed_${seed}

# expert dataset
num_demos: 10 #50(openaigym), 10(dmc), 1(metaworld), 1(particle), 1(robotgym)
expert_dataset: '${root_dir}/ROT/expert_demos/${suite.name}/${task_name}/expert_demos.pkl'

# Load weights
load_bc: false

# Weights
bc_weight: '${root_dir}/ROT/weights/${suite.name}_${obs_type}/${task_name}/bc.pt'

# Train with BC loss
bc_regularize: false
bc_weight_type: 'qfilter' # linear, qfilter

hydra:
  run:
    dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M%S}_${experiment}
  sweep:
    dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M%S}
    subdir: ${hydra.job.num}
  launcher:
    tasks_per_node: 1
    nodes: 1
    submitit_folder: ./exp/${now:%Y.%m.%d}/${now:%H%M%S}_${experiment}/.slurm
