model:
  model_name: ELM_MIL 
  type: ResNet2
  n_classes: 2                  # number of classes
  in_channels: 20               # number of EEG channel
  n_time_samples: 6000          # samples per epoch
  encoder_blocks: 4             # number of res. encoder blocks
  encoder_conv1_params:         # associated parameters for initial conv
    - [4, 32, 1]
    - [8, 32, 1]
    - [16, 32, 1]
  encoder_res_params:           # associated parameters for res blocks
    - [4, 32, 1]
    - [8, 32, 1]
    - [16, 32, 1]
  encoder_pool_size: [4,4,4,4]
  encoder_dropout_p: False      
  res_dropout_p: False          # dropout prob for res blocks
  rep_dim: 96                   # dimensionality of representation
  head_dims: [256]              # dimensionality of head (internal)
  head_out_dim: 32              # dimensionality of head (output)
  head_batch_norm: True         # batch-norm toggle for head
  head_dropout_p: False         # dropout prob for head
  convert_to_TF: False
  checkpoint_path: null
  pretrained_path: null
  ELM:
    eeg_proj_size: [512, 256] 
    text_proj_size: [1024, 256]
    LM_freeze_layers: 12
    MIL_max_eeg_pairs: 32
    MIL_max_text_pairs: 8
    MIL_positive_sampling: x,y
    LM_pretrained_url: ncbi/MedCPT-Query-Encoder 
    custom_cache: /path/to/custom/models/cache/
    text_data_filename: path/to/reports.json 
    text_prefix : "" 
    text_sample_mode: paragraph # report, paragraph, sentence
    text_headings: [CORRELATION, CLINICAL CORRELATION, IMPRESSION, CONCLUSION, SUMMARY, DIAGNOSIS, INTERPRETATION]

training:
  target: [PAT]                 
  setting: [SSL_PRE]            # SSL_PRE for pretrianing, SSL_LIN for linear evaluation, GEN_EMB for only generating embeddings
  finetune_on_subset: False     # whether to do finetuning on the same subset as pretraining
  subject_level_features: False
  adv_targets: null
  loss_function: ELM_MIL_FrozenLM_Loss     # loss function to be minimized in 'setting'
  gamma: 0.5                    
  use_LARS: True                # whether to use LARS-optimizer. If False, Adam is used.
  inference_type: epochs        # whether to sample and process single channels or epochs
  n_augmentations: 0            # only for SSL_PRE: amount of augmentations applied
  batch_size: 30               # per GPU
  num_epochs: 50
  patience: 999                 # early stopping
  warmup_epochs: 4              # linear warmup
  model_save_path: /path/to/model/save/location/
  results_save_path: /path/to/result/save/location/
  do_test: False                
  num_workers: 10               # per GPU 
  amp: True                     # Whether to use autocasting (FP16 inference)
  n_cv_iter: 1 
  n_outer_folds: [1]           
  n_train_labels: "ALL" 
  n_val_labels: 1
  n_test_labels: 1
  learning_rate: 0.06
  weight_decay: 0.0001 
  T: 0.3                        # temperature parameter for loss (ContraWR, CLIP)
  m: 0.996                      # BYOL EMA parameter
  embed: test_indices           # what data to create embeddings for; subsample, all, or null
  embed_batch_size: 800
  random_seed: 0
  debug: False

# path should contain:
# ./data/your_dataset.h5
# ./indices/[train_subsample]_indices.npy
# Optionally: 
# ./data/your_test_dataset.h5
# ./indices/[test_subsample]_test_indices.npy

dataset:
  path: /path/to/dataset/TUEG/
  name: your_dataset.h5       # filename of the dataset
  test_name: null 
  sfreq: 100                  # EEG sampling frequency
  preload: False              # Whether to load the entire dataset into RAM prior to training
  train_subsample: pretrain   
  val_subsample: null
  test_subsample: all_to_embed  

grid:
  random_seed: [0,1,2,3,4]

# nohup torchrun --nproc_per_node=1 --master_port 12429 new_main.py -f ResNet2_SSL.yaml > ./logs/name.log 2>&1 &!
  
