data:
  train_set_data_path: "medllm/data/combined/train.csv"
  val_set_data_path: "medllm/data/combined/val.csv"
  test_set_data_path: "medllm/data/combined/test.csv"
  questions: # Questions to include from the template, accessible by dataset 
    pretreatment:
    - "pretreatment"
    ontreatment:
    - "ontreatment"
  report_text_col: "Report Text"
  start_idx: 0 # Index of data to begin from, used only for test and inference
  end_idx: null # Index of data to end at, used only for test and inference

model:
  model_name: "Meta-Llama-3.1-8B-Instruct"
  load_in_4bit: True # Use 4bit quantization to reduce memory usage. 
  lora_r: 128 
  lora_scaling: 2
  lora_dropout: 0.0

trainer:
  max_seq_length: 4096
  epochs: 20
  learning_rate: 0.0001
  seed: 0
  per_device_train_batch_size: 8 
  gradient_accumulation_steps: 1
  
test:
  max_new_tokens: 1024
  top_p: 0.5
  temperature: 0.5

templates_path: configs/combined/templates.yaml 
  
# The path for the saved model if we train the model
exp_name: "llama-3-1/extraction"
zero_shot: False 
save_path: outputs/combined/
save_only_necessary_cols: True
verbose: False
save_response: True
use_test_set: True