pretrained_model_name_or_path: qwen/Qwen3-8B
model_family: qwen3-8b

lora:
    r: 8
    lora_alpha: 32
    lora_dropout: 0.05
    bias: none
    task_type: CAUSAL_LM

train_batch_size: 8
eval_batch_size: 32
gradient_accumulation_steps: 1
learning_rate: 0.0001
weight_decay: 0
num_epochs: 15
generation_max_length: 200
max_seq_length: 512
load_in_4bit: false
save_strategy: no

# hyperparameters to overwrite 

finetune:
    learning_rate: 0.00003
    num_epochs: 1

grad_ascent:
    learning_rate: 0.00005
    num_epochs: 1

grad_diff:
    learning_rate: 0.00005
    num_epochs: 1

scrub:
    num_epochs: 1
    learning_rate: 0.00005
    num_total_epochs: 3

npo:
    learning_rate: 0.00005
    num_epochs: 1

scr_newton:
    M: 400
    num_outer_steps: 2
    grad_sample_size: 4
    hess_sample_size: 2
    num_inner_steps: 5
    learning_rate: 0.07
    sigma: 0.1
    forget_coeff: 0.5
    tofu: true