# environment
env: minigrid
task: MiniGrid-FourRooms-New
tasks: [MiniGrid-FourRooms-New,MiniGrid-FourRooms-New,MiniGrid-FourRooms-New,MiniGrid-FourRooms-New]
render_mode: null
action_repeat: 1
modality: 'state'
discount: 0.99
max_steps: 100/${action_repeat}
train_steps: 300000/${action_repeat}
agent_poses: [[1,1],[25,1],[1,25],[25,25]]
# goal_poses: [[8, 1], [25, 8], [1, 22], [18, 24]]
goal_poses: [[8, 12], [17, 12], [12, 19], [18, 15]]
goal_radius: 12
dense_reward: false
task_idx: 0
size: 27
two_staged: false
secondary_reward: 0.3
put_blocks: true

# planning
iterations: 6
num_samples: 512
num_elites: 64
mixture_coef: 0.05
min_std: 0.05
temperature: 0.5
momentum: 0.1
use_q: true

# learning
batch_size: 512
max_buffer_size: 100000
se_buffer_trajectories: 10
horizon: 10
horizons: [10, 10, 10, 10]
reward_coef: 0.5
value_coef: 0.1
consistency_coef: 2
recon_coef: 1
kl_coef: 0.02
rho: 0.5
kappa: 0.1
lr: 1e-3
std_schedule: linear(0.5, ${min_std}, 50000)
horizon_schedule: linear(1, ${horizon}, 10000)
per_alpha: 0.6
per_beta: 0.4
grad_clip_norm: 10
seed_steps: 5000
wm_freeze_steps: 0
update_freq: 40
tau: 0.01
mu: 0.0001
phi: 10000
save_freq: 50000
save_image_freq: 10000
k: 1
reviewer_reward_threshold: 0.9
sigmoid_threshold: 9.5
use_learner_reward: true
forgetting_coef: 1.0
unified_wm: true
reviewer_ratio: 0.5
cost_coef: 0.5
detach: true

# architecture
use_encoder: false
use_reviewer: false
use_vae: false
update_vae: false
# enc_dim: 256
enc_dim: 64
mlp_dim: 512
latent_dim: 50
vae_enc_dim: 64
gumble_temp: 1.0
use_crelu: false
stochastic_vae: false
lambda_ewc: 10  # Adjust as needed
fisher_batch_size: 128  # Adjust as needed
fisher_num_batches: 500  # Adjust as needed
vae_data_update_freq: 10

# wandb (insert your own)
use_wandb: true
wandb_project: wm-finetuning
wandb_entity: dmc-finetuning

# misc
seed: 1
exp_name: default
eval_freq: 10000
eval_episodes: 10
save_video: false ## cannot save video in minigrid
save_model: true
ckpt: null
se_buffer_path: null
load_policy: false
load_old: false
show_plots: false
eval_mode: false
train_learner: true
save_buffer: false
save_heatmaps: true
first_step_qr_only: false
reset_reviewer_env: true
pre_rollout_ckpt: null
pre_rollout_steps: 100
use_schedule: true