{
    "output_dir": "/path/to/output_dir",
    "run_name": "Qwen2.5-3B-MATH-HARD-sft",
    "dataset_text_field": "text",
    "dataset_kwargs": {"add_special_tokens": false},
    "max_seq_length": 3000,
    "per_device_train_batch_size": 4,
    "learning_rate": 2e-5,
    "warmup_ratio": 0.03,
    "lr_scheduler_type": "cosine",
    "num_train_epochs": 3,
    "bf16": true,
    "logging_steps": 1,
    "save_strategy": "epoch",
    "gradient_accumulation_steps": 8,
    "gradient_checkpointing": true,
    "report_to": "wandb"
}
