{
    "effective_batch_size": 512,
    "checkpoint_every_n_steps":100,
    "num_warmup_steps": 3000,
    "batch_size": 128,
    "context_length": 256,
    "apply_nesim_every_n_steps":1,
    "dataset_name": "openwebtext",
    "num_train_epochs": 1,
    "learning_rate": 0.0001
}