
#CUDA_VISIBLE_DEVICES=0,3 deepspeed --include localhost:0,3 --master_port 29501 src/train/pretrain.py --deepspeed configs/ds_config.json \
#    --encoder_layers_num 4 \
#    --decoder_layers_num 1 \
#    --hidden_size 256 \
#    --intermediate_size 1024 \
#    --data_size 0.001 \
#    --eval_steps 1000 \


#CUDA_VISIBLE_DEVICES=0,1,2,3 deepspeed --include localhost:0,1,2,3  src/train/pretrain.py --deepspeed configs/ds_config.json \
#    --encoder_layers_num 6 \
#    --decoder_layers_num 1 \
#    --hidden_size 384 \
#    --intermediate_size 1536 \
#    --data_size 1.0 \
#    --eval_steps 5000

CUDA_VISIBLE_DEVICES=1,3 deepspeed --include localhost:1,3  src/train/pretrain.py --deepspeed configs/ds_config.json

