{
    "model_impl": "retrieval",
    "logger_project": "llm-retrieval",

    "model_name": "pythia-160m-retr-32k_w_meta",
    "tokenizer_path": "/XXXX-36/XXXX-22/retrieval-pretrained-01/ret_meta_tokens",

    "data_config": {
        "train_data": [
            {
                "type": "hfds",
                "prefix": "",
                "weight": 1,
                "data_dir": "/fs/XXXX-37/llm-pretraining/datasets/raw/dolma_v1_6_sample/train"
            }
        ],
        "val_data": [
            {
                "type": "hfds",
                "prefix": "",
                "weight": 1,
                "data_dir": "/fs/XXXX-37/llm-pretraining/datasets/raw/dolma_v1_6_sample/train"
            }
        ]
    },
    
    "log_rank_zero_only": true,
    
    "track_memory": true,
    "derive_cost_basis": true,
    "stability_step": 500,
    "target_token_count": 395e9,
    
    "max_tokens": null,
    "max_steps": 131900,
    
    "save_step_interval": 1000,
    "eval_step_interval": 1000,
    "save_n_min_before_job_done": 3,

    "fabric_precision": "bf16-mixed",
    "fabric_strategy": "axonn_tp",
    "fabric": {
        "optimize_communication": "false",
        "depth_tensor_parallel_size":8
    },
    "attn_impl": "sdpa",

    "compile_model": false,
    "compile_model_max_autotune_no_cudagraphs": false,
    "pad_to_block_size": false,
    
    "block_size": 2048,
    "world_batch_size": 16,
    "micro_batch_size": 2,

    "suffix_is_prefix": true,
    "batch_prefix_and_suffix": true, 

    "negatives_cross_device": true,
    "negatives_cross_device_group_size": 2,

    "mask_full_ldiag": true,
    "k_random_pos_labels": 5,

    "warmup_steps": 4000,
    "cooldown_steps": 0,
    "lr_schedule": "cosine",
    "optim_config":{
        "lr": 6e-4,
        "weight_decay": 0.1,
        "betas": [0.9, 0.95],
        "eps": 1e-8
    },
    "min_lr": 6e-5
}