srun -N ${NUM_NODES} -n ${GPUS} python pretrain_umd/train_retrieval_w_anticausal.py \
    --out_dir $ROOT_DIR/out/cosmopedia-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata \
    --seed 1337 \
    --model_name tiny-llama-1.1b \
    --run_name redpajama-retrieval-dual-causal-llama-1.1b-bsz-1-ctx-2048-batch_negative_hfdata  \
    --logger_name wandb \
    --compile_model False \
    --fabric_precision "bf16-mixed" \
    --world_batch_size 2 \
    --micro_batch_size 1 \
    --learning_rate "3e-3" \
    --block_size 2048 \
    --n_chunks 4 \
    --max_tokens "1_000_000_000_000" \
    --warmup_steps 2000 \
    --log_step_interval 1 \
    --eval_iters 100 \
    --save_and_eval_interval 2000 \
    --weight_decay "2e-2" \
    --beta1 0.9 \
    --beta2 0.999 \
    --grad_clip 1.0 \
    --decay_lr True \
    --min_lr "4e-5" \
    --data_telemetry False \
    --data_config /XXXX-36/XXXX-22/XXXX-40/launch_scripts/XXXX-22/sample_hfds_only.json \
    --train_data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/splitted_redpajama \
    --val_data_dir /fs/XXXX-37/llm-pretraining/llm-retrieval/data/splitted_redpajama
