{
    "name": "bing_bert_large_lamb_seq",
    "bert_token_file": "bert-large-uncased",
    "bert_model_file": "bert-large-uncased",
    "model_config": {
        "vocab_size_or_config_json_file": 264,
        "hidden_size": 128,
        "num_hidden_layers": 4,
        "num_attention_heads": 8,
        "intermediate_size": 1,
        "hidden_act": "gelu",
        "hidden_dropout_prob": 0.05,
        "attention_probs_dropout_prob": 0.05,
        "max_position_embeddings": 1024,
        "type_vocab_size": 2,
        "initializer_range": 0.2
    },
    "data": {
        "flags": {
            "pretrain_dataset": true,
            "pretrain_type": "wiki_bc"
        },
        "mixed_seq_datasets": {
            "128": {
                "pretrain_dataset": "data/sentence_128/final_data"
            },
            "512": {
                "pretrain_dataset": "data/sentence_512/final_data"
            },
            "1024": {
                "pretrain_dataset": "data/sentence_1024/final_data"
            },
            "16384": {
                "pretrain_dataset": "data/sentence_16384/final_data"
            }
        }
    },
    "mixed_seq_training": {
        "128": {
            "num_epochs": 1600,
            "warmup_proportion": 0.1,
            "learning_rate": 21e-5,
            "weight_decay": 0.0,
            "num_workers": 4,
            "async_worker": true,
            "decay_rate": 0.90,
            "decay_step": 250,
            "degree": 1.0,
            "one_cycle_steps": 125000
        },
        "512": {
            "num_epochs": 1600,
            "warmup_proportion": 0.02,
            "learning_rate": 2e-4,
            "num_workers": 4,
            "async_worker": true,
            "decay_rate": 0.90,
            "decay_step": 150,
            "one_cycle_steps": 125000
        },
        "1024": {
            "num_epochs": 1600,
            "warmup_proportion": 0.02,
            "learning_rate": 1e-3,
            "weight_decay": 0.1,
            "num_workers": 4,
            "async_worker": true,
            "decay_rate": 0.90,
            "decay_step": 150,
            "one_cycle_steps": 125000
        },
        "16384": {
            "num_epochs": 1600,
            "warmup_proportion": 0.02,
            "learning_rate": 1e-3,
            "weight_decay": 0.1,
            "num_workers": 4,
            "async_worker": true,
            "decay_rate": 0.90,
            "decay_step": 150,
            "one_cycle_steps": 125000
        }
    },
    "validation": {
        "path": "validation_set/"
    }
}
