pretrained_model_name_or_path: qwen/Qwen3-8B
model_family: qwen3-8b

lora:
    r: 8
    lora_alpha: 32
    lora_dropout: 0.05
    bias: none
    task_type: CAUSAL_LM

train_batch_size: 8
eval_batch_size: 32
gradient_accumulation_steps: 1
learning_rate: 0.00015
weight_decay: 0
num_epochs: 5
generation_max_length: 200
max_seq_length: 512
load_in_4bit: false
save_strategy: no

grad_ascent:
    learning_rate: 0.00015
    num_epochs: 1

grad_diff:
    learning_rate: 0.00015
    num_epochs: 2

scrub:
    num_epochs: 1
    learning_rate: 0.00015
    num_total_epochs: 5

finetune:
    learning_rate: 0.00015
    num_epochs: 1

idk:
    train_batch_size: 4

npo:
    learning_rate: 0.00005
    num_epochs: 1

scr_newton:
    M: 400
    num_outer_steps: 2
    grad_sample_size: 4
    hess_sample_size: 2
    num_inner_steps: 5
    learning_rate: 0.1
    sigma: 0.1
    forget_coeff: 0.05
    tofu: true