{
    "model_impl": "retrieval",
    "logger_project": "llm-retrieval",

    "model_name": "pythia-160m-retr-32k_w_meta",
    "tokenizer_path": "/XXXX-30/XXXX-29/XXXX-31/proj-shared/language_datasets/retrieval_staging/ret_meta_tokens",

    "pretrained_prefix_model": true,
    "pretrained_suffix_model": true,

    "data_config": "launch_configs/retrieval/nomic_supervised_src_separated_norm_weighted.json",
    "lockstep_sampling": "world_batch",
    "data_telemetry": 10,

    "add_bos": false,
    "add_eos": true,


    "log_rank_zero_only": true,

    "track_memory": true,

    "max_tokens": null,
    "max_steps": 9936,

    "save_step_interval": 1000,
    "eval_step_interval": 1000000,
    "validate_at_end": false,

    "fabric_precision": "bf16-mixed",
    "fabric_strategy": "axonn_tp",
    "fabric": {
        "optimize_communication": "false",
        "depth_tensor_parallel_size":8
    },
    "attn_impl": "rocm",

    "compile_model": false,
    "compile_model_max_autotune_no_cudagraphs": false,
    
    
    "block_size": 2048,
    "world_batch_size": 16,
    "micro_batch_size": 2,
    
    "suffix_is_prefix": true,
    
    "batch_prefix_and_suffix": true, 
    "pad_to_block_size": true,

    "negatives_cross_device": false,
    "negatives_cross_device_group_size": null,

    "train_group_size": 8,
    "mean_pooling": true,

    "warmup_steps": 400,
    "lr_schedule": "cosine",
    "optim_config":{
        "lr": 3e-5,
        "weight_decay": 0.1,
        "betas": [0.9, 0.95],
        "eps": 1e-8
    },
    "min_lr": 3e-6
}