experiment_name: llama3
destpath: ./runs/llama3/
datapath: ../data/meditod/
tasks:
  - pol

model:
  wildcard: unsloth/llama-3-8b-instruct-bnb-4bit
  max_seq_length: 4096
  quantization: 4
  prompt_type: baseline
  history_size: -1
  note_type: json

train:
  per_device_train_batch_size: 16
  gradient_accumulation_steps: 1
  gradient_checkpointing: false
  learning_rate: 0.00005
  num_epochs: 10
  seed: 44
  fp16: false
  save_eval_steps: 1
  evaluation_strategy: epoch
  save_strategy: epoch
  save_total_limit: 2
  metric_for_best_model: loss
  greater_is_better: false
  early_stopping_patience: 5
  warmup_ratio: 0.1
  lr_scheduler: cosine
  group_by_length: true
  metric_type: 1
  lora_r: 32
  lora_alpha: 128

dev:
  per_device_eval_batch_size: 1

use_wandb: true
