defaults:
  - task@_global_: metaworld_window-open-v2
  - agent: drqv2

# task settings
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# snapshot
save_snapshot: false
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 2
nstep: 3
batch_size: 256
# misc
seed: 1
device: cuda
save_video: true
save_train_video: false
use_tb: true
# experiment
experiment: exp
# agent
lr: 1e-4
feature_dim: 50

gradient_update: 1

# reward learning
segment: 50
activation: tanh
num_unsup_frames: 0
num_interact: 30000
reward_lr: 3e-5
reward_batch: 10
reward_update: 50
feed_type: 0
reset_update: 100
topK: 5
ensemble_size: 3
max_feedback: 200
large_batch: 10
label_margin: 0.0
teacher_type: 0
teacher_noise: 0
teacher_margin: 0
teacher_thres: 0
reward_stack: True
reward_max_episodes: 20
reward_capacity: 2000

# augmentation
img_shift: 0
time_shift: 5
time_crop: 5
aug_ratio: 5

# scheduling
reward_schedule: 0

# PLS
tau_min: 1.0
tau_max: 1.0
tau_delta: 0.0

# CER
max_rr: 4
init_k: 25
step_k: 0.9
increase_q: 5.0
rreset: 3
qreset: True

hydra:
  run:
    dir: ./exp_PoLiCER/${task_name}/seed${seed}/max_feedback${max_feedback}_n${reward_batch}_l${segment}/t${tau_min}_${tau_max}_${tau_delta}_aug_${aug_ratio}_${img_shift}_${time_shift}_${time_crop}_res${rreset}_${qreset}_${init_k}_${increase_q}_${step_k}_${max_rr}
  