{
    "task_name": "mention_memory",
    "model_config": {
        "encoder_config": {
            "dtype": "bfloat16",
            "vocab_size": 30522,
            "max_positions": 512,
            "max_length": 128,
            "hidden_size": 768,
            "intermediate_dim": 3072,
            "memory_key_dim": 128,
            "memory_value_dim": 512,
            "separate_memory_values": true,
            "memory_update_type": "additive",
            "memory_update_config": {},
            "same_passage_memory_policy": "disallow",
            "k_top_device": 2,
            "k_top_post_selection": 128,
            "rows": 128,
            "splits": 32,
            "num_attention_heads": 12,
            "num_initial_layers": 4,
            "num_final_layers": 8,
            "dropout_rate": 0.1,
            "final_k_top_device": 2,
            "final_splits": 32
        }
    },
    "seed": 0,
    "num_train_steps": 500000,
    "learning_rate": 0.0001,
    "warmup": true,
    "warmup_steps": 25000,
    "linear_decay": true,
    "decay_minimum_factor": 0,
    "weight_decay": 0.01,
    "weight_decay_exclude": [
        "layer_norm",
        "bias"
    ],
    "grad_clip": 1.0,
    "ignore_k_nans": 10,
    "per_device_batch_size": 32,
    "train_data": [
        {
            "patterns": [
                <TRAIN DATA PATH>
            ],
            "samples_per_example": 1
        }
    ],
    "eval_data": [
        {
            "patterns": <EVAL DATA PATH>,
            "samples_per_example": 1
        }
    ],
    "save_checkpoints": true,
    "checkpoint_every_steps": 5000,
    "save_every_steps": 125000,
    "eval_every_steps": 5000,
    "num_eval_steps": 100,
    "mask_rate": 0.1,
    "mention_mask_rate": 0.2,
    "mlm_weight": 0.85,
    "el_im_weight": 0,
    "el_final_weight": 0.15,
    "max_mlm_targets": 30,
    "max_mention_targets": 10,
    "max_mentions": 24,
    "max_length_with_entity_tokens": 192,
    "coref_res_weight": 0,
    "coref_res_mode": "dot",
    "mtb_im_weight": 0,
    "mtb_final_weight": 0,
    "mtb_score_mode": "dot",
    "same_passage_weight": 0,
    "memory_reduction": 4,
    "memory_key_pattern": <PATH TO MEMORY KEY ENCODINGS>,
    "memory_value_pattern": <PATH TO MEMORY VALUE ENCODINGS>,
    "memory_id_pattern": <PATH TO MEMORY ENTITY IDs>
    "memory_entity_id_pattern": null,
    "memory_text_pattern": null,
    "memory_positions_pattern": null,
    "save_retrieval_every_steps": null,
    "memory_prop": null,
    "load_weights": <PATH TO PRE-TRAINED BATCH TOME-1>
}