export HF_ENDPOINT=https://hf-mirror.com

python -m torch.distributed.run \
--nproc_per_node 1 --nnodes 1 --master_port 44144 \
    spiking_pretrain.py \
    --dataset_name \
        /data/*/datasets/STORIES \
        /data/*/datasets/bookcorpus \
        /data/*/datasets/cc_news \
        /data/*/datasets/openwebtext \
        /data/*/datasets/wikipedia \
    --model_name_or_path bert-base-uncased \
    --per_device_train_batch_size 64 \
    --per_device_eval_batch_size 64 \
    --learning_rate 2e-4 \
    --max_train_steps 800000 \
    --num_warmup_steps 5000 \
    --output_dir ./snn_base \
    --max_seq_length 128 \
    --checkpointing_steps 50000 \
    --preprocessing_num_workers 32\
    --with_tracking \
    --report_to wandb 2>&1 | tee snn_training.log