{
    "model": "unsloth/Qwen2.5-3B-Instruct",
    "training_file": "dataset_math/math_train.jsonl",
    "test_file": "dataset_math/math_eval.jsonl",
    "finetuned_model_id": "grpo/model",
    "max_seq_length": 3048,
    "load_in_4bit": true,
    "loss": "grpo",
    "is_peft": true,
    "target_modules": [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    "lora_bias": "none",
    "r": 32,
    "lora_alpha": 64,
    "lora_dropout": 0.0,
    "use_rslora": true,
    "merge_before_push": true,
    "push_to_private": true,
    "epochs": 1,
    "max_steps": null,
    "per_device_train_batch_size": 4,
    "gradient_accumulation_steps": 4,
    "warmup_steps": 5,
    "learning_rate": 5e-6,
    "logging_steps": 5,
    "optim": "adamw_8bit",
    "weight_decay": 0.1,
    "lr_scheduler_type": "cosine",
    "seed": 3407,
    "beta": 0,
    "save_steps": 5000,
    "output_dir": "./tmp/grpo_with_steering",
    "train_on_responses_only": true,
    "reward_model": "gpt-4.1-mini",
    "grader_type": "math_correct",
    "reasoning_grader_type": "none",
    "reward_coherence": false,
    "print_training": true,
    "define_assistant_reasoning": false,
    "evaluate_epoch": 3,
    "num_generations": 4,
    "rl_max_new_tokens": 32768,
    "max_prompt_length": 756,
    "rl_temperature": 0.9,
    "rl_top_p": 0.9,
    "enable_steering_during_training": false
}
