{
    "name": "bert_large_seq",
    "bert_token_file": "bert-large-uncased",
    "bert_model_file": "bert-large-uncased",
    "bert_model_config": {
        "vocab_size_or_config_json_file": 119547,
        "hidden_size": 1024,
        "num_hidden_layers": 24,
        "num_attention_heads": 16,
        "intermediate_size": 4096,
        "hidden_act": "gelu",
        "hidden_dropout_prob": 0.1,
        "attention_probs_dropout_prob": 0.1,
        "max_position_embeddings": 512,
        "type_vocab_size": 2,
        "initializer_range": 0.02
    },
    "data": {
        "flags": {
            "pretrain_dataset": true,
            "pretrain_type": "wiki_bc"
        },
        "mixed_seq_datasets": {
            "128": {
                "wiki_pretrain_dataset": "path to dataset",
                "bc_pretrain_dataset": "path to dataset"
            },
            "512": {
                "wiki_pretrain_dataset": "path to dataset",
                "bc_pretrain_dataset": "path to dataset"
            }
        }
    },
    "mixed_seq_training": {
        "128": {
            "num_epochs": 500,
            "warmup_proportion": 0.1,
            "learning_rate": 4e-4,
            "num_workers": 32,
            "async_worker": true,
            "decay_rate": 0.99,
            "decay_step": 520,
            "total_training_steps": 125000
        },
        "512": {
            "num_epochs": 160,
            "warmup_proportion": 0.02,
            "learning_rate": 1e-5,
            "num_workers": 4,
            "async_worker": true,
            "decay_rate": 0.90,
            "decay_step": 150,
            "total_training_steps": 7500
        }
    },
    "validation": {
        "path": "validation_set/"
    }
}
