set -u # stop on unset variables

export CUDA_DEVICE_MAX_CONNECTIONS=1

# Set PROJECT_NAME from the first argument or use the default value if not provided
PROJECT_NAME=${1:-gpt2-small-openwebtext-adam-lr3e-3-wd0.05}
CHECKPOINT_PATH=checkpoints/$PROJECT_NAME
VOCAB_FILE=tokenizer/vocab.json
MERGE_FILE=tokenizer/merges.txt
GPT_ARGS=$(cat ${CHECKPOINT_PATH}/gpt_args)
DATA_PATH=/data2/ymdong/openwebtext/openwebtext_text_document

torchrun --nproc_per_node=8 --master_port=2346 \
    pretrain_gpt.py \
    --tensor-model-parallel-size 1 \
    --pipeline-model-parallel-size 1 \
    --recompute-activations \
    $GPT_ARGS \
    --vocab-file $VOCAB_FILE \
    --merge-file $MERGE_FILE \
    --save $CHECKPOINT_PATH \
    --load $CHECKPOINT_PATH \
    --data-path $DATA_PATH 