#!/bin/bash



source ~/.bashrc
conda activate agent_training

WANDB__SERVICE_WAIT=500 WANDB_PROJECT=agent WANDB_ENTITY=vl001 WANDB_NAME=m2w_16k_stop_2k_b32s2267 deepspeed --hostfile hostfile --include babel-1-27:0,1,2,3,4,5,6,7 --master_port=9985 /home/bo/cais/deepseek_llamafactory/LLaMA-Factory/src/train_bash.py     --deepspeed ds_config_3_xofl.json     --stage sft     --model_name_or_path  /data/tir/projects/tir3/users/bo/ckpts/cllama/cllama/models--codellama--CodeLlama-7b-hf/snapshots/bc5283229e2fe411552f55c71657e97edf79066c     --do_train     --dataset m2w_16k_stop_2k     --train_size 17400     --shuffle False     --dataset_dir /home/bo/cais/agent/data/code/     --template llama2     --finetuning_type full     --output_dir /data/tir/projects/tir4/users/bo/ckpts/output_17k_m2w_16k_stop_2k_1e-5_b32s2267/     --overwrite_output_dir True     --cache_path /scratch/bo/m2w_16k_stop_2k/     --per_device_train_batch_size 2     --per_device_eval_batch_size 1     --gradient_accumulation_steps 2     --gradient_checkpointing True     --lr_scheduler_type cosine     --evaluation_strategy "steps"     --save_strategy "epoch"     --logging_steps "30"     --save_total_limit 5     --preprocessing_num_workers 16     --learning_rate 1e-5     --weight_decay 0.     --warmup_ratio 0.03     --num_train_epochs 6     --plot_loss     --bf16 True     --cutoff_len 4096     --report_to 'wandb'     --flash_attn True     --save_only_model     --seed 2267

echo "exit code: $?"