CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun --nproc_per_node=8 src/main_pretrain.py --base_config src/config/train_local/base.yaml --deepspeed src/config/deepspeed/deepspeed_config_mistral.json