defaults:
  - override /general: resume  
  - override /data: ids_data
  - override /model: gpt
  - override /train: base

project: TRACE_RECONSTRUCTION
experiment: std_NESTED

general:
  checkpoint_path: 
  train_time: '20250506_172234'

data:
  sequence_type: nuc
  target_type: std_NESTED 
  observation_size: 10
  ground_truth_length: 110
  lower_bound_obs_size: 2
  block_size: 4100 
  test: true
  test_seed: 34721
  test_dataset_size: 50000
  test_artifact: std_NESTED_seed34721_gl110_bs3900_ds50000

train:
  ddp: true
  eval_interval: 500
  log_interval: 10
  eval_iters: 1000
  eval_only: false
  always_interval: 500
  always_save_checkpoint: true
  device: cuda:0
  gradient_accumulation_steps: 16 
  batch_size: 30
  learning_rate: 0.0005477225575051661
  max_iters: 214889 #compute 1e20
  weight_decay: 0.1 
  beta1: 0.9
  beta2: 0.95
  grad_clip: 1.0
  decay_lr: true
  warmup_iters: 0
  min_lr: 0.0
  lr_decay_iters: ${train.max_iters} 
  train_seed: 100

model:
  gpt_params: 
    n_layer: 12
    n_head: 8
    n_embd: 512
    dropout: 0
    bias: false

wandb: 
  wandb_log: true