{
    "name": "bing_bert_large",
    "model_config": {
        "vocab_size_or_config_json_file": 30528,
        "hidden_size": 1024,
        "num_hidden_layers": 24,
        "num_attention_heads": 16,
        "intermediate_size": 4096,
        "hidden_act": "gelu",
        "hidden_dropout_prob": 0.1,
        "attention_probs_dropout_prob": 0.1,
        "max_position_embeddings": 512,
        "attention_kernel": "linear",
        "feature_map": "1_plus_elu",
        "type_vocab_size": 2,
        "initializer_range": 0.02
    },
    "data": {
        "training": {
            "input_file": "sentence_512/c4_en/c4-train.00100-of-01024.json.gz",
            "max_seq_length": 128,
            "total_samples": 1048576
        },
        "validation": {
            "input_file": "sentence_512/c4_en/c4-train.00101-of-01024.json.gz",
            "max_seq_length": 128,
            "total_samples": 1048576
        },
        "test": {
            "input_file": "sentence_512/c4_en/c4-train.00102-of-01024.json.gz",
            "max_seq_length": 128,
            "total_samples": 1048576
        }
    },
    "training": {
        "num_epochs": 1600,
        "lr_scheduler_params": {
            "warmup_ratio": 0.03,
            "warmup_degree": 1,
            "degree": 1,
            "one_cycle_steps": 250000
        },
        "lr_schedule": "cosine",
        "lr_offset": 0.0,
        "learning_rate": 4e-4,
        "weight_decay": 0.0,
        "num_workers": 4
    }
}