RESULT_DIR="hallucinate_small/"
mkdir -p "${RESULT_DIR}pretrain_perturbed18"

python convert_binary.py -i "${RESULT_DIR}SFT17.txt" -o "${RESULT_DIR}pretrain_perturbed18" --val_shard_size 10000000
    
log_dir="/data/temp_log18"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed18" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_18" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log10/xs_pretrain_small_10/state_step046251.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"