{
    "model": {
        "baseline_model": "Qwen/Qwen3-0.6B"
    },
    "training": {
        "learning_rate": 1e-4,
        "weight_decay": 0.01,
        "num_epochs": 3,
        "max_length": 2048,
        "device": "cuda",
        "scheduler_type": "linear",
        "warmup_ratio": 0.1,
        "max_grad_norm": 1.0,
        "per_device_train_batch_size": 4,
        "gradient_accumulation_steps": 4,
        "seed": 42,
        "lora": {
            "r": 16,
            "lora_alpha": 32,
            "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
            "lora_dropout": 0.1,
            "bias": "none",
            "task_type": "CAUSAL_LM"
        }
    },
    "data": {
        "type": "MMLUChatDataset",
        "kwargs": {
            "split": "test",
            "num_samples": 1000
        },
        "train_ratio": 0.8
    },
    "output": {
        "output_dir": "outputs/baseline_lora",
        "eval_steps": 100,
        "save_steps": 500,
        "wandb_config": {
            "project": "baseline_lora_training",
            "run_name": "baseline_lora_run",
            "mode": "offline"
        }
    }
}
