defaults:
  - _self_
  - task@_global_: coffee_push
  - override hydra/launcher: submitit_local

# task settings
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_seed_frames: 4000
# eval
eval_every_frames: 20000
num_eval_episodes: 20
# snapshot
save_snapshot: true
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 1
nstep: 3
batch_size: 256
demo_path_prefix: /dev/null/SAMPO/mbrl/demonstrations
demo: false

# misc
seed: 1
device: cuda
save_video: true
save_train_video: false
use_tb: true
# experiment
experiment: exp
exp_name: mbpo
# agent
lr: 1e-4
feature_dim: 50

# metaworld settings
camera: corner
duration: 100
succ_bonus: 10.0
agent_update_times: 2

agent:
  _target_: drqv2.DrQV2Agent
  obs_shape: ??? # to be specified later
  action_shape: ??? # to be specified later
  device: ${device}
  lr: ${lr}
  critic_target_tau: 0.01
  update_every_steps: 1
  use_tb: ${use_tb}
  num_expl_steps: 2000
  hidden_dim: 1024
  feature_dim: ${feature_dim}
  stddev_schedule: ${stddev_schedule}
  stddev_clip: 0.3
  beta: 0.0 # intrinsic bonus
  delay_steps: 1


# mbpo
gen_every_steps: 200
gen_batch: 32
gen_horizon: 10
update_gen_every_step: 10
update_tokenizer_every_step: 40
update_gen_times: 1
init_update_gen_steps: 1000
init_gen_times: 20
real_ratio: 0.5
start_mbpo: 4000


world_model:
  load_pretrained_model: true
  config_name: /dev/null/SAMPO/configs/llama/config.json
  vqgan_type: ctx_vqgan
  pretrained_model_name_or_path: /dev/null/SAMPO/pretrained_models/SAMPO-oxe-64-act-free/tokenizer
  pretrained_transformer_path: /dev/null/SAMPO/pretrained_models/SAMPO-oxe-64-act-free/transformer
  load_internal_llm: true
  llama_attn_drop: 0.1
  fast_reward_predictor: false
  symlog: true

  context_length: 2
  segment_length: 12
  action_dim: 4

  batch_size: 16
  selected_params: true
  tok_lr: 1e-4
  tok_wd: 0.0
  tok_beta1: 0.9
  tok_beta2: 0.999
  max_grad_norm: 1.0
  max_target_frames: 5

  model_lr: 1e-4
  model_wd: 0.0
  embed_no_wd: true

  reward_lr: 1e-4
  reward_wd: 1e-6
  reward_weight: 1.0

hydra:
  run:
    dir: ./log_mbrl/${now:%Y.%m.%d}/${exp_name}_${now:%H%M%S}_${hydra.job.override_dirname}
  sweep:
    dir: ./exp/${now:%Y.%m.%d}/${now:%H%M}_${agent_cfg.experiment}
    subdir: ${hydra.job.num}
  launcher:
    timeout_min: 4300
    cpus_per_task: 10
    gpus_per_node: 1
    tasks_per_node: 1
    mem_gb: 160
    nodes: 1
    submitit_folder: ./exp/${now:%Y.%m.%d}/${now:%H%M%S}_${agent_cfg.experiment}/.slurm
