base_model: models/Llama-2-7b-chat-hf
new_model: llama_natural_question
output_dir: llama_finetuned_adapters
visible_devices: 0
use_deepspeed: False
path_to_ds_config: null

dataset:
  path_to_dataset: dataset/natural_question/natural_question_finetune_llama.jsonl
  shuffle: True

wandb:
  project_name: Research
  run_name: llama-natural-question

peft:
  lora_r: 16
  lora_alpha: 32
  lora_dropout: 0.05
  target_modules: 
    - q_proj
    - v_proj
    - o_proj

  modules_to_save: null

params:
  num_train_epochs: 2
  per_device_train_batch_size: 8
  gradient_accumulation_steps: 1
  optim: paged_adamw_32bit
  save_steps : 2000
  logging_steps: 25
  learning_rate: 0.00003
  weight_decay: 0.01
  bf16: True
  max_grad_norm: 1.0   # gradient clipping
  max_steps: -1
  warmup_ratio: 0.05
  warmup_steps: 200
  group_by_length: True
  lr_scheduler_type: cosine
  max_length: 4096


