{
    "model": {
        "baseline_model": "Qwen/Qwen3-0.6B",
        "attn_implementation": "flash_attention_2"
    },
    "training": {
        "learning_rate": 1e-4,
        "weight_decay": 0.01,
        "num_epochs": 1,
        "max_length": 32768,
        "device": "cuda",
        "scheduler_type": "linear",
        "warmup_ratio": 0.1,
        "max_grad_norm": 1.0,
        "per_device_train_batch_size": 8,
        "num_processes": 8,
        "freeze": [],
        "gradient_accumulation_steps": 2,
        "seed": 42
    },
    "output": {
        "output_dir": "local/checkpoints/Qwen3-0.6B_Swahili_1.6M",
        "save_steps": 64,
        "eval_steps": 16,
        "wandb_config": {
            "project": "Rosetta",
            "mode": "online",
            "run_name": "baseline"
        }
    },
    "data": {
        "type": "AyaChatDataset",
        "kwargs": {
            "split": "train",
            "num_samples": 1600000,
            "max_word_count": 1536,
            "language": "swahili"
        },
        "train_ratio": 0.95
    }
} 