model:
  base_model: meta-llama/Llama-3.1-8B-Instruct
  device: cuda
  torch_dtype: bfloat16
  attn_implementation: flash_attention_2
  use_cache: false
  freeze_base_model: false
  mask_predictor:
    target_intermediate_size: 3584

data:
  system_prompt: >
    You are roleplaying as the specified persona. Stay in character at all times.
  cutoff_len: 1024
  prompt_max_length: 512
  append_eos_to_response: true
  seed: 42
  num_workers: 2

training:
  output_dir: outputs/mask_predictor_with_llm_persona
  num_epochs: 3
  batch_size: 2
  grad_accumulation_steps: 8
  learning_rate: 5.0e-5
  weight_decay: 0.01
  warmup_ratio: 0.03
  max_grad_norm: 1.0
  logging_steps: 10
  save_each_epoch: true
  wandb:
    enable: false
    project: project
    run_name: run_name
    entity: null
