# @package _global_
defaults:
  - /experiment/timeseries/base_corelogic.yaml

# 1 lz
scheduler:
  num_training_steps: ${eval:"${trainer.max_epochs} * ${dataset.steps_per_epoch} / ( ${loader.batch_size} * ${trainer.devices} )"} #300000
  num_warmup_steps: ${eval:"${scheduler.num_training_steps} / 10"} 

trainer:
  devices: 1
  max_epochs: 40
  accelerator: gpu ## Change to gpu if available later

encoder:
  _name_: set_encoder
  chunk_size: 3
  architecture: MLP
  nr_attention_heads: 1
  feature_embedding_dim: 5
  n_attn_summary_statistics: True
  dropout: 0.1 # 0
  debug: False
#total 142000 loans
dataset:
  load_data: True
  data_path: "${oc.env:BASE_PATH,}/data/corelogic/loan_data_top4_zip_52.npz" #"/data/mortgage_new2/"
  num_states: 50 #Total number of features minus 2 
  save_data: False
  max_to_sample: 4500
  nr_loans_to_sample: 2500 #500  # 6000
  steps_per_epoch: 35 #40  #40 # sum to 60 000
  val_split: 0.1 # Let's not use these
  test_split: 0.3
  rolling_model: False
  sample_random_loan_index: True
  sample_random_time_index: True
  val_split_date: "2009-06"
  test_split_date: "2009-12"
  dataset_config:
    database_size: 400000

model:
  n_layers: 6  #1
  dropout: ${encoder.dropout}  #0
  d_model: 300 #32 #300 # 32  #64
  layer:
    kernel_dropout: ${encoder.dropout}  #0
    weight_init: random
    use_set_mixing: True
    set_mixing_architecture: MLP
    nr_layers_with_set: 6  # 1
    set_mixing_dropout: ${encoder.dropout} # 0
    set_feature_embedding_dim: 5 # 5 # 5
    set_debug: ${encoder.debug}
    set_nr_attn_heads: 1
    l_max: 50
    kernel_len: 30 # 20
  __l_max: ${model.layer.l_max}

optimizer:
  lr: 0.003
  weight_decay: 0.05 #0.05 0

loader:
  batch_size: 1
  #batch_size: 1

wandb:
  project: 'corelogic-mortgage'
  name: "corelogic_set-seq"