{
    "model": {
        "base_model": "Qwen/Qwen3-0.6B",
        "teacher_model": "Qwen/Qwen3-4B",
        "include_response": false,
        "projector": {
            "type": "ExtendGatedProjector",
            "params": {
                "hidden_dim": 1024,
                "num_layers": 3,
                "dropout": 0.1,
                "activation": "gelu",
                "use_layer_norm": true,
                "init_type": "zero",
                "gate_type": "channelwise"
            }
        }
    },
    "training": {
        "learning_rate": 3e-4,
        "weight_decay": 0.01,
        "num_epochs": 1,
        "max_length": 32768,
        "device": "cuda",
        "scheduler_type": "linear",
        "warmup_ratio": 0.1,
        "max_grad_norm": 1.0,
        "per_device_train_batch_size": 4,
        "num_processes": 8,
        "freeze": ["teacher","base"]
    },
    "output": {
        "output_dir": "local/checkpoints",
        "save_steps": 10000,
        "eval_steps": 10000,
        "wandb_config": {
            "project": "Rosetta",
            "mode": "online",
            "run_name": "rosetta_gated_projector_layer3"
        }
    },
     "data": {
        "type": "MMLUChatDataset",
        "split": "test",
        "num_samples": null,
        "train_ratio": 0.99
    }
} 