defaults:
  - _self_
  - STTransformer: SpaTemTransformer
  - Separate_Fusion: separate_encoder
  - world_model: Inverse_world_model
  - data: multidataset
  - optimizer: optimizer

# General training parameters
batch_size: 8
val_batch_size: 4
seq_len: 16
num_workers: 8
pin_memory: True
shuffle_train: True

phase: 2
exp_name: "Dinov2-DiLA"
grad_accum_every: 1
num_train_steps: 300_000
max_grad_norm: 6.0
save_model_every: 1000
save_milestone_every: 5000
val_every_n_steps: 1000
num_val_batches_to_log: 4
num_val_samples_to_save: 6
milestone_optim_state: True
batch_first: True
seed: 42
work_dir: "./checkpoints/Multi-Phase-Mixed"

# WandB logging
wandb:
  enabled: true
  project: "Abstract Relations"
  name: "Mixed-dataset-phase${phase}" # Hydra variable substitution

RAE:
  encoder_cls: "Dinov2withNorm" # or "MAEwNorm"
  encoder_config_path: "facebook/dinov2-with-registers-base"
  encoder_input_size: 224
  encoder_params:
    dinov2_path: "facebook/dinov2-with-registers-base"
    normalize: True
  decoder_config_path: 'configs/decoder/ViTXL'
  pretrained_decoder_path: "./pretrained/dinov2_decoder.pt"
  normalization_stat_path: "./pretrained/stat.pt"
  reshape_to_2d: True
  noise_tau: 0.0  
  decoder_patch_size: 16

# Accelerator settings
accelerator:
  timeout_seconds: 60