# @package _global_

defaults:
  - override /general: scratch 
  - override /model: gpt
  - override /train: base
  - override /finetune: microsoft_dynamic


project: TRACE_RECONSTRUCTION
experiment: finetune_microsoft_test_reproducibility

general:
  checkpoint_path:  # where checkpoint should be saved
  #train_time: '20250613_183701'

data:
  data_type: ids_data
  sequence_type: nuc
  observation_size: 10
  target_type: CPRED 
  ground_truth_length: 110
  lower_bound_obs_size: 2
  block_size: 1500
  train_seed : 42

train:
  ddp: false
  eval_interval: 250
  log_interval: 10
  eval_iters: 250
  eval_only: false
  always_interval: 250
  always_save_checkpoint: true
  device: cuda:0
  gradient_accumulation_steps: 2
  batch_size: 26
  learning_rate: 1e-5
  max_epochs: 100000 # placeholder, we train for fixed compute 
  max_iters: 566047  
  weight_decay: 0.1
  beta1: 0.9
  beta2: 0.95
  grad_clip: 1.0
  decay_lr: true
  warmup_iters: 0 #placeholder set to 1% of max_iters in finetune
  min_lr: 0.0
  lr_decay_iters: ${train.max_iters} 
  #patience: if set do early stopping otherwise is set to large value 


model:
  gpt_params: 
    n_layer: 12
    n_head: 8
    n_embd: 512
    dropout: 0.1
    bias: false

wandb: 
  wandb_log: true