# @package _global_

# python finetune.py exps=microsoft_data/mic_GPT_300M_LC
# CUDA_VISIBLE_DEVICES=x,y torchrun --standalone --nproc_per_node=2 finetune.py exps=microsoft_data/mic_GPT_300M_LC

defaults:
  - override /general: scratch
  - override /model: gpt
  - override /train: base
  - override /finetune: finetune_microsoft_LC

hydra:
  run:
    dir: ../model_checkpoints/${project}/${general.run_dir}

project: TRACE_RECONSTRUCTION
experiment: finetune_microsoft

general:
  now_str: ${now:%Y%m%d_%H%M%S}
  train_time: ''
  run_dir: ${finetune.finetune_data_type}_${finetune.finetune_sequence_type}_${finetune.finetune_target_type}_observation_size_${finetune.finetune_observation_size}_ground_truth_${finetune.finetune_ground_truth_length}/${finetune.finetune_experiment}/train_run_${model.model_type}_${general.now_str}/

data:
  data_type: ids_data
  sequence_type: nuc
  observation_size: 10
  target_type: CPRED 
  ground_truth_length: 110
  lower_bound_obs_size: 6
  block_size: 1500

train:
  ddp: false
  eval_interval: 250
  log_interval: 10
  eval_iters: ~
  eval_only: false
  always_interval: 500
  always_save_checkpoint: true
  device: cuda:0
  gradient_accumulation_steps: 2
  batch_size: 8
  learning_rate: 0.00001
  max_iters: 200000
  weight_decay: 0.2
  beta1: 0.9
  beta2: 0.95
  grad_clip: 1.0
  decay_lr: true
  warmup_iters: 500
  min_lr: 0.0
  lr_decay_iters: ${train.max_iters} 

model:
  gpt_params:
    n_layer: 24
    n_head: 16
    n_embd: 1024
    dropout: 0.3
    bias: false

wandb: 
  wandb_log: true